Merge tag 'x86_kdump_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 May 2022 02:14:17 +0000 (19:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 May 2022 02:14:17 +0000 (19:14 -0700)
Pull x86 kdump fixlet from Borislav Petkov:

 - A single debug message fix

* tag 'x86_kdump_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/crash: Fix minor typo/bug in debug message

2275 files changed:
.mailmap
Documentation/ABI/testing/securityfs-secrets-coco [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-firmware-attributes
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Documentation/ABI/testing/sysfs-fs-erofs
Documentation/RCU/Design/Data-Structures/Data-Structures.rst
Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
Documentation/RCU/Design/Requirements/Requirements.rst
Documentation/RCU/arrayRCU.rst
Documentation/RCU/checklist.rst
Documentation/RCU/rcu.rst
Documentation/RCU/rculist_nulls.rst
Documentation/RCU/stallwarn.rst
Documentation/RCU/whatisRCU.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/memory-tagging-extension.rst
Documentation/arm64/silicon-errata.rst
Documentation/cdrom/cdrom-standard.rst
Documentation/core-api/timekeeping.rst
Documentation/dev-tools/kunit/start.rst
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml
Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
Documentation/devicetree/bindings/bus/ti-sysc.yaml
Documentation/devicetree/bindings/clock/imx8m-clock.yaml
Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
Documentation/devicetree/bindings/clock/samsung,exynos-audss-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos-ext-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos4412-isp-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos5260-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos5410-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos5433-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos7-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos7885-clock.yaml
Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml
Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml
Documentation/devicetree/bindings/clock/samsung,s5pv210-audss-clock.yaml
Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.yaml
Documentation/devicetree/bindings/devfreq/event/samsung,exynos-nocp.yaml
Documentation/devicetree/bindings/devfreq/event/samsung,exynos-ppmu.yaml
Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml
Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml
Documentation/devicetree/bindings/display/msm/dpu-qcm2290.yaml
Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml
Documentation/devicetree/bindings/display/panel/panel-timing.yaml
Documentation/devicetree/bindings/display/renesas,du.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi-ddc.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos-hdmi.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-decon.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos5433-mic.yaml
Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml
Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml
Documentation/devicetree/bindings/dma/qcom,gpi.yaml
Documentation/devicetree/bindings/extcon/maxim,max77843.yaml
Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml
Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml
Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml
Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml
Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml
Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml
Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml
Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml
Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml
Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml
Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml
Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml
Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml
Documentation/devicetree/bindings/interrupt-controller/samsung,exynos4210-combiner.yaml
Documentation/devicetree/bindings/leds/leds-mt6360.yaml
Documentation/devicetree/bindings/leds/maxim,max77693.yaml
Documentation/devicetree/bindings/media/coda.yaml
Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml
Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml
Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml
Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml
Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2-timings.yaml
Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml
Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3-timings.yaml
Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr3.yaml
Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml
Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml
Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml
Documentation/devicetree/bindings/memory-controllers/renesas,h8300-bsc.yaml
Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml
Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml
Documentation/devicetree/bindings/memory-controllers/ti,da8xx-ddrctl.yaml
Documentation/devicetree/bindings/mfd/atmel-flexcom.txt
Documentation/devicetree/bindings/mfd/maxim,max14577.yaml
Documentation/devicetree/bindings/mfd/maxim,max77686.yaml
Documentation/devicetree/bindings/mfd/maxim,max77693.yaml
Documentation/devicetree/bindings/mfd/maxim,max77802.yaml
Documentation/devicetree/bindings/mfd/maxim,max77843.yaml
Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.yaml
Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml
Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml
Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml
Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml
Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
Documentation/devicetree/bindings/net/dsa/realtek.yaml
Documentation/devicetree/bindings/net/ethernet-controller.yaml
Documentation/devicetree/bindings/net/micrel.txt
Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml
Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml
Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml
Documentation/devicetree/bindings/net/nfc/nxp,pn544.yaml
Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml
Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml
Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml
Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml
Documentation/devicetree/bindings/net/snps,dwmac.yaml
Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
Documentation/devicetree/bindings/net/xilinx_axienet.txt
Documentation/devicetree/bindings/pci/apple,pcie.yaml
Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml
Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml
Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml
Documentation/devicetree/bindings/phy/samsung,dp-video-phy.yaml
Documentation/devicetree/bindings/phy/samsung,exynos-hdmi-phy.yaml
Documentation/devicetree/bindings/phy/samsung,exynos5250-sata-phy.yaml
Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml
Documentation/devicetree/bindings/phy/samsung,usb2-phy.yaml
Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml
Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml
Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-gpio-bank.yaml
Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-pins-cfg.yaml
Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml
Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml
Documentation/devicetree/bindings/power/renesas,apmu.yaml
Documentation/devicetree/bindings/power/supply/bq2415x.yaml
Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml
Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
Documentation/devicetree/bindings/regulator/maxim,max14577.yaml
Documentation/devicetree/bindings/regulator/maxim,max77686.yaml
Documentation/devicetree/bindings/regulator/maxim,max77693.yaml
Documentation/devicetree/bindings/regulator/maxim,max77802.yaml
Documentation/devicetree/bindings/regulator/maxim,max77843.yaml
Documentation/devicetree/bindings/regulator/maxim,max8952.yaml
Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
Documentation/devicetree/bindings/regulator/maxim,max8997.yaml
Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml
Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml
Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml
Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.yaml
Documentation/devicetree/bindings/reset/socionext,uniphier-reset.yaml
Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml
Documentation/devicetree/bindings/rng/timeriomem_rng.yaml
Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
Documentation/devicetree/bindings/serial/samsung_uart.yaml
Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
Documentation/devicetree/bindings/sound/samsung,arndale.yaml
Documentation/devicetree/bindings/sound/samsung,smdk5250.yaml
Documentation/devicetree/bindings/sound/samsung,snow.yaml
Documentation/devicetree/bindings/sound/samsung,tm2.yaml
Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml
Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml
Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
Documentation/devicetree/bindings/spi/samsung,spi.yaml
Documentation/devicetree/bindings/sram/sram.yaml
Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml
Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml
Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml
Documentation/driver-api/dma-buf.rst
Documentation/driver-api/gpio/driver.rst
Documentation/driver-api/libata.rst
Documentation/filesystems/caching/backend-api.rst
Documentation/filesystems/caching/netfs-api.rst
Documentation/filesystems/ext4/attributes.rst
Documentation/filesystems/f2fs.rst
Documentation/filesystems/proc.rst
Documentation/filesystems/zonefs.rst
Documentation/networking/bonding.rst
Documentation/networking/dsa/dsa.rst
Documentation/networking/ip-sysctl.rst
Documentation/process/embargoed-hardware-issues.rst
Documentation/process/maintainer-tip.rst
Documentation/security/index.rst
Documentation/security/secrets/coco.rst [new file with mode: 0644]
Documentation/security/secrets/index.rst [new file with mode: 0644]
Documentation/security/siphash.rst
Documentation/userspace-api/ioctl/cdrom.rst
Documentation/virt/coco/sev-guest.rst [new file with mode: 0644]
Documentation/virt/index.rst
Documentation/virt/kvm/api.rst
Documentation/virt/kvm/vcpu-requests.rst
Documentation/virt/kvm/x86/amd-memory-encryption.rst
Documentation/virt/kvm/x86/errata.rst
Documentation/virt/kvm/x86/running-nested-guests.rst
Documentation/vm/page_owner.rst
Documentation/x86/cpuinfo.rst
Documentation/x86/index.rst
Documentation/x86/tdx.rst [new file with mode: 0644]
Documentation/x86/x86_64/boot-options.rst
Documentation/x86/zero-page.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/arc/boot/dts/hsdk.dts
arch/arc/include/asm/atomic-llsc.h
arch/arc/include/asm/pgtable-levels.h
arch/arc/kernel/disasm.c
arch/arc/kernel/entry.S
arch/arc/kernel/signal.c
arch/arc/kernel/smp.c
arch/arc/kernel/unaligned.c
arch/arc/mm/cache.c
arch/arm/boot/dts/am33xx-l4.dtsi
arch/arm/boot/dts/am3517-evm.dts
arch/arm/boot/dts/am3517-som.dtsi
arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
arch/arm/boot/dts/aspeed-g6.dtsi
arch/arm/boot/dts/at91-dvk_su60_somc.dtsi
arch/arm/boot/dts/at91-q5xr5.dts
arch/arm/boot/dts/at91-sam9_l9260.dts
arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts
arch/arm/boot/dts/at91-sama5d2_xplained.dts
arch/arm/boot/dts/at91-sama5d3_xplained.dts
arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi
arch/arm/boot/dts/at91-sama5d4_xplained.dts
arch/arm/boot/dts/at91-sama5d4ek.dts
arch/arm/boot/dts/at91-sama7g5ek.dts
arch/arm/boot/dts/at91-vinco.dts
arch/arm/boot/dts/at91rm9200ek.dts
arch/arm/boot/dts/at91sam9260ek.dts
arch/arm/boot/dts/at91sam9261ek.dts
arch/arm/boot/dts/at91sam9263ek.dts
arch/arm/boot/dts/at91sam9g20ek_common.dtsi
arch/arm/boot/dts/at91sam9m10g45ek.dts
arch/arm/boot/dts/at91sam9n12ek.dts
arch/arm/boot/dts/at91sam9rlek.dts
arch/arm/boot/dts/at91sam9x5ek.dtsi
arch/arm/boot/dts/da850-evm.dts
arch/arm/boot/dts/dm8168-evm.dts
arch/arm/boot/dts/dra7-l4.dtsi
arch/arm/boot/dts/imx28-ts4600.dts
arch/arm/boot/dts/imx6qdl-apalis.dtsi
arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
arch/arm/boot/dts/imx6ul-phytec-segin-peb-av-02.dtsi
arch/arm/boot/dts/imx6ull-colibri.dtsi
arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi
arch/arm/boot/dts/omap3-gta04.dtsi
arch/arm/boot/dts/qcom-apq8064-pins.dtsi
arch/arm/boot/dts/qcom-ipq8064.dtsi
arch/arm/boot/dts/sama5d3xmb.dtsi
arch/arm/boot/dts/sama5d3xmb_cmp.dtsi
arch/arm/boot/dts/sama7g5.dtsi
arch/arm/boot/dts/spear1310-evb.dts
arch/arm/boot/dts/spear1340-evb.dts
arch/arm/boot/dts/stm32mp157c-emstamp-argon.dtsi
arch/arm/boot/dts/stm32mp157c-ev1.dts
arch/arm/boot/dts/usb_a9263.dts
arch/arm/configs/gemini_defconfig
arch/arm/configs/imote2_defconfig [deleted file]
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/tegra_defconfig
arch/arm/configs/u8500_defconfig
arch/arm/include/asm/arch_gicv3.h
arch/arm/include/asm/io.h
arch/arm/kernel/entry-armv.S
arch/arm/mach-davinci/board-da850-evm.c
arch/arm/mach-ep93xx/clock.c
arch/arm/mach-exynos/Kconfig
arch/arm/mach-iop32x/cp6.c
arch/arm/mach-omap2/omap4-common.c
arch/arm/mach-sunxi/Kconfig
arch/arm/mach-vexpress/spc.c
arch/arm/mm/ioremap.c
arch/arm/mm/proc-v7-bugs.c
arch/arm/xen/enlighten.c
arch/arm64/Kconfig
arch/arm64/Kconfig.platforms
arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
arch/arm64/boot/dts/amlogic/meson-s4.dtsi
arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
arch/arm64/boot/dts/freescale/imx8qm.dtsi
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi
arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts
arch/arm64/boot/dts/qcom/sm8250-mtp.dts
arch/arm64/boot/dts/qcom/sm8250.dtsi
arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
arch/arm64/include/asm/arch_gicv3.h
arch/arm64/include/asm/asm-bug.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/io.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/alternative.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/elfcore.c
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/module-plts.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/paravirt.c
arch/arm64/kernel/patching.c
arch/arm64/kernel/proton-pack.c
arch/arm64/kernel/relocate_kernel.S
arch/arm64/kernel/smp.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/psci.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/vgic/vgic-debug.c
arch/arm64/kvm/vgic/vgic-its.c
arch/arm64/mm/init.c
arch/arm64/mm/ioremap.c
arch/mips/include/asm/timex.h
arch/mips/kernel/time.c
arch/parisc/Kconfig
arch/parisc/configs/generic-32bit_defconfig
arch/parisc/configs/generic-64bit_defconfig
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/asm/page.h
arch/parisc/include/asm/pgtable.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/kprobes.c
arch/parisc/kernel/processor.c
arch/parisc/kernel/setup.c
arch/parisc/kernel/time.c
arch/parisc/kernel/traps.c
arch/parisc/math-emu/dfadd.c
arch/parisc/math-emu/dfsub.c
arch/parisc/math-emu/sfadd.c
arch/parisc/math-emu/sfsub.c
arch/parisc/mm/fault.c
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/setup.h
arch/powerpc/include/asm/static_call.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/module.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/vdso/gettimeofday.S
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s_32_sr.S
arch/powerpc/kvm/book3s_64_entry.S
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_64_vio_hv.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_pr_papr.c
arch/powerpc/kvm/book3s_rtas.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/numa.c
arch/powerpc/perf/Makefile
arch/powerpc/perf/power10-pmu.c
arch/powerpc/perf/power9-pmu.c
arch/powerpc/platforms/powernv/opal-core.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/vas-sysfs.c
arch/powerpc/platforms/pseries/vas.c
arch/powerpc/platforms/pseries/vas.h
arch/riscv/Kconfig.socs
arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
arch/riscv/boot/dts/sifive/fu540-c000.dtsi
arch/riscv/configs/defconfig
arch/riscv/configs/rv32_defconfig
arch/riscv/include/asm/bug.h
arch/riscv/include/asm/kvm_host.h
arch/riscv/kernel/patch.c
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_exit.c
arch/riscv/kvm/vcpu_fp.c
arch/riscv/kvm/vcpu_sbi.c
arch/riscv/mm/init.c
arch/s390/Kconfig
arch/s390/Makefile
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/include/asm/bug.h
arch/s390/include/asm/entry-common.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/stacktrace.h
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/processor.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/pv.c
arch/s390/kvm/vsie.c
arch/s390/lib/test_unwind.c
arch/s390/mm/gmap.c
arch/sparc/include/asm/cacheflush_32.h
arch/um/drivers/ubd_kern.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/boot.h
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/acpi.c
arch/x86/boot/compressed/early_serial_console.c
arch/x86/boot/compressed/efi.c [new file with mode: 0644]
arch/x86/boot/compressed/efi.h [new file with mode: 0644]
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/ident_map_64.c
arch/x86/boot/compressed/idt_64.c
arch/x86/boot/compressed/kaslr.c
arch/x86/boot/compressed/mem_encrypt.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/boot/compressed/pgtable.h
arch/x86/boot/compressed/pgtable_64.c
arch/x86/boot/compressed/sev.c
arch/x86/boot/compressed/tdcall.S [new file with mode: 0644]
arch/x86/boot/compressed/tdx.c [new file with mode: 0644]
arch/x86/boot/compressed/tdx.h [new file with mode: 0644]
arch/x86/boot/cpucheck.c
arch/x86/boot/cpuflags.c
arch/x86/boot/cpuflags.h
arch/x86/boot/header.S
arch/x86/boot/io.h [new file with mode: 0644]
arch/x86/boot/main.c
arch/x86/boot/msr.h [new file with mode: 0644]
arch/x86/coco/Makefile
arch/x86/coco/core.c
arch/x86/coco/tdx/Makefile [new file with mode: 0644]
arch/x86/coco/tdx/tdcall.S [new file with mode: 0644]
arch/x86/coco/tdx/tdx.c [new file with mode: 0644]
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore_snb.c
arch/x86/events/msr.c
arch/x86/ia32/Makefile
arch/x86/ia32/ia32_aout.c [deleted file]
arch/x86/include/asm/acenv.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/bootparam_utils.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/compat.h
arch/x86/include/asm/cpu.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/cpuid.h [new file with mode: 0644]
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/fpu/api.h
arch/x86/include/asm/fpu/internal.h [deleted file]
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/io.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/mmx.h [deleted file]
arch/x86/include/asm/msi.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/msr.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/pkeys.h
arch/x86/include/asm/proto.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/realmode.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/sev-common.h
arch/x86/include/asm/sev.h
arch/x86/include/asm/shared/io.h [new file with mode: 0644]
arch/x86/include/asm/shared/msr.h [new file with mode: 0644]
arch/x86/include/asm/shared/tdx.h [new file with mode: 0644]
arch/x86/include/asm/smap.h
arch/x86/include/asm/special_insns.h
arch/x86/include/asm/static_call.h
arch/x86/include/asm/suspend_32.h
arch/x86/include/asm/suspend_64.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/tdx.h [new file with mode: 0644]
arch/x86/include/asm/topology.h
arch/x86/include/asm/traps.h
arch/x86/include/uapi/asm/bootparam.h
arch/x86/include/uapi/asm/svm.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/cppc.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/cpu/aperfmperf.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mce/amd.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/mce/severity.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/cpu/resctrl/rdtgroup.c
arch/x86/kernel/cpu/tsx.c
arch/x86/kernel/crash_dump_64.c
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/fpu/xstate.h
arch/x86/kernel/head64.c
arch/x86/kernel/head_64.S
arch/x86/kernel/idt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/nmi.c
arch/x86/kernel/probe_roms.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/setup.c
arch/x86/kernel/sev-shared.c
arch/x86/kernel/sev.c
arch/x86/kernel/signal.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/static_call.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vm86_32.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/mmu.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/mmu/spte.h
arch/x86/kvm/mmu/tdp_iter.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/lib/copy_user_64.S
arch/x86/lib/insn-eval.c
arch/x86/lib/kaslr.c
arch/x86/lib/mmx_32.c [deleted file]
arch/x86/lib/putuser.S
arch/x86/lib/retpoline.S
arch/x86/lib/usercopy_64.c
arch/x86/math-emu/get_address.c
arch/x86/mm/Makefile
arch/x86/mm/amdtopology.c
arch/x86/mm/fault.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/mem_encrypt_amd.c
arch/x86/mm/mem_encrypt_identity.c
arch/x86/mm/mmio-mod.c
arch/x86/mm/numa_emulation.c
arch/x86/mm/pat/set_memory.c
arch/x86/mm/setup_nx.c [deleted file]
arch/x86/mm/tlb.c
arch/x86/net/bpf_jit_comp.c
arch/x86/pci/irq.c
arch/x86/pci/xen.c
arch/x86/platform/efi/efi.c
arch/x86/platform/pvh/head.S
arch/x86/platform/uv/uv_nmi.c
arch/x86/power/cpu.c
arch/x86/realmode/init.c
arch/x86/realmode/rm/header.S
arch/x86/realmode/rm/trampoline_64.S
arch/x86/realmode/rm/trampoline_common.S
arch/x86/realmode/rm/wakemain.c
arch/x86/virt/vmx/tdx/tdxcall.S [new file with mode: 0644]
arch/x86/xen/smp_pv.c
arch/x86/xen/xen-head.S
arch/xtensa/kernel/coprocessor.S
arch/xtensa/kernel/jump_label.c
arch/xtensa/platforms/iss/console.c
block/Makefile
block/badblocks.c
block/bdev.c
block/bfq-cgroup.c
block/bfq-iosched.c
block/bfq-iosched.h
block/bio.c
block/blk-cgroup-fc-appid.c [new file with mode: 0644]
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-iocost.c
block/blk-iolatency.c
block/blk-lib.c
block/blk-map.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-settings.c
block/blk-throttle.c
block/blk.h
block/bounce.c
block/fops.c
block/genhd.c
block/ioctl.c
block/mq-deadline.c
block/partitions/acorn.c
block/partitions/atari.c
block/partitions/core.c
block/partitions/ldm.c
drivers/acpi/processor_idle.c
drivers/acpi/scan.c
drivers/android/binder.c
drivers/ata/Kconfig
drivers/ata/ahci.c
drivers/ata/ahci.h
drivers/ata/ahci_brcm.c
drivers/ata/libata-core.c
drivers/ata/libata-sata.c
drivers/ata/libata-scsi.c
drivers/ata/libata-sff.c
drivers/ata/libata.h
drivers/ata/pata_ftide010.c
drivers/ata/pata_marvell.c
drivers/ata/pata_mpc52xx.c
drivers/ata/pata_sil680.c
drivers/ata/pata_via.c
drivers/ata/sata_dwc_460ex.c
drivers/ata/sata_gemini.c
drivers/base/arch_topology.c
drivers/base/dd.c
drivers/base/firmware_loader/main.c
drivers/base/topology.c
drivers/block/Kconfig
drivers/block/aoe/aoe.h
drivers/block/aoe/aoeblk.c
drivers/block/aoe/aoecmd.c
drivers/block/aoe/aoedev.c
drivers/block/aoe/aoemain.c
drivers/block/ataflop.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_state.c
drivers/block/drbd/drbd_state_change.h
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/loop.h [deleted file]
drivers/block/mtip32xx/mtip32xx.c
drivers/block/nbd.c
drivers/block/null_blk/main.c
drivers/block/null_blk/null_blk.h
drivers/block/null_blk/zoned.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/block/rnbd/rnbd-clt.c
drivers/block/rnbd/rnbd-srv-dev.h
drivers/block/rnbd/rnbd-srv.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/bus/fsl-mc/fsl-mc-msi.c
drivers/bus/imx-weim.c
drivers/bus/mhi/host/pci_generic.c
drivers/bus/sunxi-rsb.c
drivers/bus/ti-sysc.c
drivers/cdrom/cdrom.c
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/random.c
drivers/clk/at91/clk-generated.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/microchip/clk-mpfs.c
drivers/clk/qcom/clk-rcg2.c
drivers/clk/sunxi-ng/ccu-sun6i-rtc.c
drivers/clk/sunxi/clk-sun9i-mmc.c
drivers/cpufreq/qcom-cpufreq-hw.c
drivers/cpufreq/sun50i-cpufreq-nvmem.c
drivers/cpuidle/cpuidle-riscv-sbi.c
drivers/crypto/qcom-rng.c
drivers/cxl/pci.c
drivers/dma-buf/Makefile
drivers/dma-buf/dma-buf.c
drivers/dma-buf/dma-fence-array.c
drivers/dma-buf/selftests.h
drivers/dma-buf/st-dma-fence-unwrap.c [new file with mode: 0644]
drivers/dma-buf/sync_file.c
drivers/dma/at_xdmac.c
drivers/dma/dw-edma/dw-edma-v0-core.c
drivers/dma/idxd/device.c
drivers/dma/idxd/submit.c
drivers/dma/idxd/sysfs.c
drivers/dma/imx-sdma.c
drivers/dma/mediatek/mtk-uart-apdma.c
drivers/edac/Kconfig
drivers/edac/armada_xp_edac.c
drivers/edac/dmc520_edac.c
drivers/edac/edac_device.c
drivers/edac/edac_device.h
drivers/edac/edac_device_sysfs.c
drivers/edac/edac_mc.c
drivers/edac/edac_module.h
drivers/edac/edac_pci.c
drivers/edac/ghes_edac.c
drivers/edac/i5100_edac.c
drivers/edac/mpc85xx_edac.c
drivers/edac/synopsys_edac.c
drivers/edac/xgene_edac.c
drivers/firewire/core-card.c
drivers/firewire/core-cdev.c
drivers/firewire/core-topology.c
drivers/firewire/core-transaction.c
drivers/firewire/sbp2.c
drivers/firmware/arm_scmi/clock.c
drivers/firmware/arm_scmi/driver.c
drivers/firmware/arm_scmi/optee.c
drivers/firmware/cirrus/cs_dsp.c
drivers/firmware/efi/Kconfig
drivers/firmware/efi/cper.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/arm32-stub.c
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efi-stub.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/randomalloc.c
drivers/firmware/efi/libstub/riscv-stub.c
drivers/firmware/efi/libstub/x86-stub.c
drivers/gpio/TODO
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpio-pl061.c
drivers/gpio/gpio-sim.c
drivers/gpio/gpio-tegra186.c
drivers/gpio/gpio-vf610.c
drivers/gpio/gpio-visconti.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/ObjectID.h
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_events.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_stream.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
drivers/gpu/drm/amd/pm/amdgpu_dpm.c
drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
drivers/gpu/drm/bridge/Kconfig
drivers/gpu/drm/dp/drm_dp_mst_topology.c
drivers/gpu/drm/drm_of.c
drivers/gpu/drm/i915/display/intel_dmc.c
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
drivers/gpu/drm/i915/display/intel_fbc.c
drivers/gpu/drm/i915/display/intel_psr.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_uc.c
drivers/gpu/drm/i915/gt/uc/intel_uc.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/imx/dw_hdmi-imx.c
drivers/gpu/drm/imx/imx-ldb.c
drivers/gpu/drm/imx/parallel-display.c
drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/adreno_device.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
drivers/gpu/drm/msm/dp/dp_panel.c
drivers/gpu/drm/msm/dsi/dsi_manager.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/nouveau/nouveau_backlight.c
drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
drivers/gpu/drm/panel/panel-ilitek-ili9341.c
drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
drivers/gpu/drm/radeon/radeon_sync.c
drivers/gpu/drm/sun4i/sun4i_frontend.c
drivers/gpu/drm/vc4/Kconfig
drivers/gpu/drm/vc4/vc4_dsi.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
drivers/gpu/ipu-v3/ipu-di.c
drivers/hv/channel_mgmt.c
drivers/hv/hv_balloon.c
drivers/hv/hv_common.c
drivers/hv/ring_buffer.c
drivers/hv/vmbus_drv.c
drivers/hwmon/Kconfig
drivers/hwmon/adt7470.c
drivers/hwmon/asus_wmi_sensors.c
drivers/hwmon/f71882fg.c
drivers/hwmon/pmbus/delta-ahe50dc-fan.c
drivers/hwmon/pmbus/pmbus_core.c
drivers/hwmon/pmbus/xdpe12284.c
drivers/hwmon/tmp401.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-mt7621.c
drivers/i2c/busses/i2c-pasemi-core.c
drivers/i2c/busses/i2c-qcom-geni.c
drivers/i2c/busses/i2c-thunderx-pcidrv.c
drivers/i2c/i2c-dev.c
drivers/idle/intel_idle.c
drivers/iio/adc/ad7280a.c
drivers/iio/chemical/scd4x.c
drivers/iio/dac/ad3552r.c
drivers/iio/dac/ad5446.c
drivers/iio/dac/ad5592r-base.c
drivers/iio/dac/ltc2688.c
drivers/iio/dac/ti-dac5571.c
drivers/iio/filter/Kconfig
drivers/iio/imu/bmi160/bmi160_core.c
drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
drivers/iio/magnetometer/ak8975.c
drivers/iio/proximity/sx9324.c
drivers/iio/proximity/sx_common.c
drivers/infiniband/core/cm.c
drivers/infiniband/hw/hfi1/mmu_rb.c
drivers/infiniband/hw/irdma/cm.c
drivers/infiniband/hw/irdma/utils.c
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rxe/rxe_mcast.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/sw/siw/siw_cm.c
drivers/input/keyboard/cypress-sf.c
drivers/input/keyboard/omap4-keypad.c
drivers/input/touchscreen/ili210x.c
drivers/interconnect/core.c
drivers/interconnect/qcom/sc7180.c
drivers/interconnect/qcom/sdx55.c
drivers/iommu/apple-dart.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/svm.c
drivers/iommu/iommu.c
drivers/iommu/omap-iommu.c
drivers/irqchip/Kconfig
drivers/irqchip/Makefile
drivers/irqchip/irq-armada-370-xp.c
drivers/irqchip/irq-aspeed-i2c-ic.c
drivers/irqchip/irq-aspeed-scu-ic.c
drivers/irqchip/irq-bcm6345-l1.c
drivers/irqchip/irq-csky-apb-intc.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-imx-irqsteer.c
drivers/irqchip/irq-qcom-mpm.c
drivers/irqchip/irq-sni-exiu.c
drivers/irqchip/irq-sun6i-r.c
drivers/irqchip/irq-xtensa-mx.c
drivers/md/bcache/alloc.c
drivers/md/bcache/debug.c
drivers/md/bcache/journal.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-clone-target.c
drivers/md/dm-integrity.c
drivers/md/dm-io.c
drivers/md/dm-log-writes.c
drivers/md/dm-ps-historical-service-time.c
drivers/md/dm-raid.c
drivers/md/dm-table.c
drivers/md/dm-thin.c
drivers/md/dm-zone.c
drivers/md/dm-zoned-target.c
drivers/md/dm.c
drivers/md/md-bitmap.c
drivers/md/md-cluster.c
drivers/md/md-linear.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/platform/nxp/Kconfig
drivers/media/platform/rockchip/rga/rga.c
drivers/media/tuners/si2157.c
drivers/memory/atmel-ebi.c
drivers/memory/fsl_ifc.c
drivers/memory/renesas-rpc-if.c
drivers/message/fusion/mptbase.c
drivers/misc/eeprom/at25.c
drivers/misc/habanalabs/common/memory.c
drivers/mmc/core/block.c
drivers/mmc/core/core.c
drivers/mmc/core/mmc.c
drivers/mmc/core/mmc_ops.c
drivers/mmc/core/mmc_test.c
drivers/mmc/core/queue.c
drivers/mmc/host/mmci_stm32_sdmmc.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/sdhci-msm.c
drivers/mmc/host/sdhci-xenon.c
drivers/mmc/host/sunxi-mmc.c
drivers/mtd/mtd_blkdevs.c
drivers/mtd/nand/raw/mtk_ecc.c
drivers/mtd/nand/raw/qcom_nandc.c
drivers/mtd/nand/raw/sh_flctl.c
drivers/net/bonding/bond_main.c
drivers/net/can/grcan.c
drivers/net/can/m_can/m_can.c
drivers/net/can/m_can/m_can.h
drivers/net/can/m_can/m_can_pci.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/b53/b53_serdes.c
drivers/net/dsa/b53/b53_serdes.h
drivers/net/dsa/b53/b53_srab.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/lantiq_gswip.c
drivers/net/dsa/microchip/ksz9477.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/port_hidden.c
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/realtek/Kconfig
drivers/net/dsa/realtek/realtek-mdio.c
drivers/net/dsa/realtek/realtek-smi.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/aq_vec.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/dec/tulip/tulip_core.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
drivers/net/ethernet/freescale/enetc/enetc_qos.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/fungible/funcore/fun_dev.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_arfs.c
drivers/net/ethernet/intel/ice/ice_eswitch.c
drivers/net/ethernet/intel/ice/ice_eswitch.h
drivers/net/ethernet/intel/ice/ice_fltr.c
drivers/net/ethernet/intel/ice/ice_idc.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_nvm.c
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_sriov.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igc/igc_i225.c
drivers/net/ethernet/intel/igc/igc_phy.c
drivers/net/ethernet/intel/igc/igc_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/mediatek/mtk_ppe.c
drivers/net/ethernet/mediatek/mtk_sgmii.c
drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlxsw/i2c.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/micrel/Kconfig
drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_flower.c
drivers/net/ethernet/mscc/ocelot_vcap.c
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/qlogic/qede/qede_fp.c
drivers/net/ethernet/qlogic/qla3xxx.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx_channels.c
drivers/net/ethernet/sfc/ptp.c
drivers/net/ethernet/sfc/ptp.h
drivers/net/ethernet/sfc/rx_common.c
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sfc/tx_common.c
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/ti/cpsw_new.c
drivers/net/ethernet/xilinx/xilinx_axienet.h
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/hippi/rrunner.c
drivers/net/ipa/gsi.c
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_qmi.c
drivers/net/macvlan.c
drivers/net/mctp/mctp-i2c.c
drivers/net/mdio/fwnode_mdio.c
drivers/net/mdio/mdio-mscc-miim.c
drivers/net/mdio/mdio-mux-bcm6368.c
drivers/net/phy/marvell10g.c
drivers/net/phy/micrel.c
drivers/net/phy/microchip_t1.c
drivers/net/phy/phy.c
drivers/net/phy/sfp.c
drivers/net/ppp/pppoe.c
drivers/net/slip/slip.c
drivers/net/tun.c
drivers/net/usb/aqc111.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vrf.c
drivers/net/vxlan/vxlan_core.c
drivers/net/wan/cosa.c
drivers/net/wireguard/device.c
drivers/net/wireless/ath/ath10k/sdio.c
drivers/net/wireless/ath/ath11k/core.c
drivers/net/wireless/ath/ath11k/core.h
drivers/net/wireless/ath/ath11k/mac.c
drivers/net/wireless/ath/ath11k/mac.h
drivers/net/wireless/ath/ath11k/reg.c
drivers/net/wireless/ath/ath11k/reg.h
drivers/net/wireless/ath/ath11k/wmi.c
drivers/net/wireless/ath/ath9k/main.c
drivers/net/wireless/ath/ath9k/xmit.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/mwifiex/sdio.c
drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
drivers/net/wireless/ti/wlcore/sdio.c
drivers/nfc/nfcmrvl/main.c
drivers/nfc/pn533/pn533.c
drivers/nvme/host/constants.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/ioctl.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/zns.c
drivers/pci/controller/dwc/pcie-qcom.c
drivers/pci/controller/pci-aardvark.c
drivers/pci/controller/pci-hyperv.c
drivers/pci/pci.c
drivers/perf/Kconfig
drivers/perf/arm_pmu.c
drivers/perf/fsl_imx8_ddr_perf.c
drivers/perf/qcom_l2_pmu.c
drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
drivers/phy/motorola/phy-mapphone-mdm6600.c
drivers/phy/samsung/phy-exynos5250-sata.c
drivers/phy/ti/phy-am654-serdes.c
drivers/phy/ti/phy-omap-usb2.c
drivers/phy/ti/phy-ti-pipe3.c
drivers/phy/ti/phy-tusb1210.c
drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
drivers/pinctrl/intel/pinctrl-alderlake.c
drivers/pinctrl/mediatek/Kconfig
drivers/pinctrl/mediatek/pinctrl-mt8365.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-apple-gpio.c
drivers/pinctrl/pinctrl-ocelot.c
drivers/pinctrl/pinctrl-pistachio.c
drivers/pinctrl/pinctrl-rockchip.c
drivers/pinctrl/qcom/pinctrl-msm.c
drivers/pinctrl/qcom/pinctrl-sm6350.c
drivers/pinctrl/samsung/Kconfig
drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/pinctrl/sunplus/sppctl_sp7021.c
drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
drivers/platform/surface/aggregator/core.c
drivers/platform/surface/surface_gpe.c
drivers/platform/x86/acerhdf.c
drivers/platform/x86/amd-pmc.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/barco-p50-gpio.c
drivers/platform/x86/dell/dell-laptop.c
drivers/platform/x86/gigabyte-wmi.c
drivers/platform/x86/intel/pmc/core.h
drivers/platform/x86/intel/pmt/telemetry.c
drivers/platform/x86/intel/sdsi.c
drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
drivers/platform/x86/samsung-laptop.c
drivers/platform/x86/think-lmi.c
drivers/platform/x86/think-lmi.h
drivers/platform/x86/thinkpad_acpi.c
drivers/power/supply/power_supply_core.c
drivers/power/supply/samsung-sdi-battery.c
drivers/ptp/ptp_ocp.c
drivers/regulator/atc260x-regulator.c
drivers/regulator/rtq2134-regulator.c
drivers/regulator/wm8994-regulator.c
drivers/reset/reset-rzg2l-usbphy-ctrl.c
drivers/reset/tegra/reset-bpmp.c
drivers/rtc/rtc-sun6i.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_fba.c
drivers/s390/block/dasd_int.h
drivers/s390/net/ctcm_mpc.c
drivers/s390/net/ctcm_sysfs.c
drivers/s390/net/lcs.c
drivers/scsi/aha152x.c
drivers/scsi/aic7xxx/aic79xx_osm.h
drivers/scsi/aic7xxx/aic79xx_pci.c
drivers/scsi/aic7xxx/aic7xxx_osm.h
drivers/scsi/aic7xxx/aic7xxx_pci.c
drivers/scsi/bnx2fc/bnx2fc_hwi.c
drivers/scsi/bnx2i/bnx2i_hwi.c
drivers/scsi/bnx2i/bnx2i_iscsi.c
drivers/scsi/cxgbi/libcxgbi.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
drivers/scsi/isci/host.c
drivers/scsi/libiscsi.c
drivers/scsi/libiscsi_tcp.c
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/megaraid/megaraid_sas.h
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_config.c
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/mvsas/mv_init.c
drivers/scsi/pcmcia/sym53c500_cs.c
drivers/scsi/pm8001/pm80xx_hwi.c
drivers/scsi/pmcraid.c
drivers/scsi/pmcraid.h
drivers/scsi/qedi/qedi_iscsi.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_logging.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/sd.c
drivers/scsi/sr.c
drivers/scsi/sr_ioctl.c
drivers/scsi/ufs/ufs-qcom.c
drivers/scsi/ufs/ufshcd-pci.c
drivers/scsi/ufs/ufshcd.h
drivers/scsi/ufs/ufshpb.c
drivers/scsi/virtio_scsi.c
drivers/scsi/zorro7xx.c
drivers/slimbus/qcom-ctrl.c
drivers/soc/imx/imx8m-blk-ctrl.c
drivers/spi/atmel-quadspi.c
drivers/spi/spi-bcm-qspi.c
drivers/spi/spi-cadence-quadspi.c
drivers/spi/spi-intel-pci.c
drivers/spi/spi-mtk-nor.c
drivers/spi/spi-mxic.c
drivers/spi/spi-rpc-if.c
drivers/spi/spi.c
drivers/staging/r8188eu/core/rtw_br_ext.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_user.c
drivers/tee/optee/ffa_abi.c
drivers/thermal/Kconfig
drivers/thermal/gov_user_space.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/thermal_sysfs.c
drivers/tty/n_gsm.c
drivers/tty/serial/8250/8250_mtk.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/amba-pl011.c
drivers/tty/serial/digicolor-usart.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/imx.c
drivers/tty/serial/mpc52xx_uart.c
drivers/tty/serial/sc16is7xx.c
drivers/usb/cdns3/cdns3-gadget.c
drivers/usb/class/cdc-wdm.c
drivers/usb/core/devio.c
drivers/usb/core/quirks.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/drd.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_uvc.c
drivers/usb/gadget/function/uvc.h
drivers/usb/gadget/function/uvc_queue.c
drivers/usb/gadget/function/uvc_v4l2.c
drivers/usb/gadget/legacy/raw_gadget.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/ehci-pci.c
drivers/usb/host/ehci.h
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.h
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci-tegra.c
drivers/usb/host/xhci.c
drivers/usb/misc/qcom_eud.c
drivers/usb/misc/uss720.c
drivers/usb/mtu3/mtu3_dr.c
drivers/usb/phy/phy-generic.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/option.c
drivers/usb/serial/pl2303.c
drivers/usb/serial/pl2303.h
drivers/usb/serial/qcserial.c
drivers/usb/serial/whiteheat.c
drivers/usb/typec/Kconfig
drivers/usb/typec/tcpm/tcpci.c
drivers/usb/typec/tcpm/tcpci_mt6360.c
drivers/usb/typec/ucsi/ucsi.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vfio/pci/vfio_pci_core.c
drivers/vhost/net.c
drivers/video/fbdev/arkfb.c
drivers/video/fbdev/aty/aty128fb.c
drivers/video/fbdev/aty/atyfb_base.c
drivers/video/fbdev/aty/radeon_pm.c
drivers/video/fbdev/aty/radeonfb.h
drivers/video/fbdev/clps711x-fb.c
drivers/video/fbdev/controlfb.c
drivers/video/fbdev/core/fbmem.c
drivers/video/fbdev/core/fbsysfs.c
drivers/video/fbdev/efifb.c
drivers/video/fbdev/i740fb.c
drivers/video/fbdev/imxfb.c
drivers/video/fbdev/kyro/fbdev.c
drivers/video/fbdev/matrox/matroxfb_base.h
drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
drivers/video/fbdev/mmp/core.c
drivers/video/fbdev/neofb.c
drivers/video/fbdev/omap/hwa742.c
drivers/video/fbdev/omap/lcdc.c
drivers/video/fbdev/omap/sossi.c
drivers/video/fbdev/platinumfb.c
drivers/video/fbdev/pm2fb.c
drivers/video/fbdev/pxafb.c
drivers/video/fbdev/s3fb.c
drivers/video/fbdev/sh_mobile_lcdcfb.c
drivers/video/fbdev/simplefb.c
drivers/video/fbdev/sis/sis_main.c
drivers/video/fbdev/tridentfb.c
drivers/video/fbdev/udlfb.c
drivers/video/fbdev/valkyriefb.c
drivers/video/fbdev/vesafb.c
drivers/video/fbdev/vt8623fb.c
drivers/video/of_display_timing.c
drivers/virt/Kconfig
drivers/virt/Makefile
drivers/virt/coco/efi_secret/Kconfig [new file with mode: 0644]
drivers/virt/coco/efi_secret/Makefile [new file with mode: 0644]
drivers/virt/coco/efi_secret/efi_secret.c [new file with mode: 0644]
drivers/virt/coco/sev-guest/Kconfig [new file with mode: 0644]
drivers/virt/coco/sev-guest/Makefile [new file with mode: 0644]
drivers/virt/coco/sev-guest/sev-guest.c [new file with mode: 0644]
drivers/virt/coco/sev-guest/sev-guest.h [new file with mode: 0644]
drivers/virtio/virtio.c
drivers/xen/balloon.c
drivers/xen/gntalloc.c
drivers/xen/unpopulated-alloc.c
fs/afs/inode.c
fs/afs/write.c
fs/binfmt_elf.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/btrfs_inode.h
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/props.c
fs/btrfs/props.h
fs/btrfs/scrub.c
fs/btrfs/sysfs.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
fs/btrfs/zoned.c
fs/btrfs/zoned.h
fs/cachefiles/namei.c
fs/cachefiles/xattr.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/file.c
fs/ceph/mds_client.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/connect.c
fs/cifs/dfs_cache.c
fs/cifs/link.c
fs/cifs/netmisc.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/trace.h
fs/cifs/transport.c
fs/direct-io.c
fs/erofs/zdata.c
fs/erofs/zdata.h
fs/exfat/file.c
fs/exfat/super.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/super.c
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/inode.c
fs/f2fs/segment.c
fs/f2fs/super.c
fs/fat/file.c
fs/fat/inode.c
fs/file_table.c
fs/fs-writeback.c
fs/fscache/Kconfig
fs/fscache/cache.c
fs/fscache/cookie.c
fs/fscache/internal.h
fs/fscache/io.c
fs/gfs2/bmap.c
fs/gfs2/file.c
fs/gfs2/rgrp.c
fs/hugetlbfs/inode.c
fs/internal.h
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/iomap/direct-io.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jfs/ioctl.c
fs/jfs/super.c
fs/kernfs/dir.c
fs/ksmbd/misc.c
fs/ksmbd/misc.h
fs/ksmbd/oplock.c
fs/ksmbd/oplock.h
fs/ksmbd/smb2pdu.c
fs/ksmbd/vfs.c
fs/ksmbd/vfs_cache.c
fs/ksmbd/vfs_cache.h
fs/namei.c
fs/namespace.c
fs/nfs/Kconfig
fs/nfs/dir.c
fs/nfs/fs_context.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs42xattr.c
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfs/unlink.c
fs/nfsd/filecache.c
fs/nfsd/nfs2acl.c
fs/nilfs2/ioctl.c
fs/nilfs2/sufile.c
fs/nilfs2/the_nilfs.c
fs/notify/fanotify/fanotify_user.c
fs/ntfs3/file.c
fs/ntfs3/super.c
fs/ocfs2/ioctl.c
fs/pipe.c
fs/posix_acl.c
fs/proc/cpuinfo.c
fs/proc/fd.c
fs/squashfs/block.c
fs/stat.c
fs/super.c
fs/sysfs/file.c
fs/udf/namei.c
fs/xattr.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_discard.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_super.c
fs/xfs/xfs_trans.h
fs/zonefs/Makefile
fs/zonefs/super.c
fs/zonefs/sysfs.c [new file with mode: 0644]
fs/zonefs/zonefs.h
include/acpi/acpi_bus.h
include/asm-generic/bug.h
include/asm-generic/mshyperv.h
include/asm-generic/tlb.h
include/asm-generic/unaligned.h
include/dt-bindings/clock/microchip,mpfs-clock.h
include/linux/audit.h
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blk-cgroup.h
include/linux/blk-mq.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/cc_platform.h
include/linux/cdrom.h
include/linux/ceph/osd_client.h
include/linux/cper.h
include/linux/cpu.h
include/linux/cpufreq.h
include/linux/dma-buf-map.h [deleted file]
include/linux/dma-fence-array.h
include/linux/dma-fence-chain.h
include/linux/dma-fence-unwrap.h [new file with mode: 0644]
include/linux/efi.h
include/linux/elfcore.h
include/linux/fs.h
include/linux/fscache.h
include/linux/gfp.h
include/linux/gpio/consumer.h
include/linux/gpio/driver.h
include/linux/hugetlb.h
include/linux/io_uring.h
include/linux/irq.h
include/linux/irqchip/arm-gic-v3.h
include/linux/kernel.h
include/linux/kfence.h
include/linux/kobject.h
include/linux/kthread.h
include/linux/kvm_host.h
include/linux/libata.h
include/linux/linkage.h
include/linux/local_lock_internal.h
include/linux/memcontrol.h
include/linux/mm.h
include/linux/mmc/core.h
include/linux/mmzone.h
include/linux/mtd/mtd.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/nfs_xdr.h
include/linux/nvme.h
include/linux/objtool.h
include/linux/posix_acl_xattr.h
include/linux/rcupdate.h
include/linux/sched.h
include/linux/sched/mm.h
include/linux/sched/signal.h
include/linux/socket.h
include/linux/srcutree.h
include/linux/static_call.h
include/linux/stmmac.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/xprt.h
include/linux/t10-pi.h
include/linux/task_work.h
include/linux/timekeeping.h
include/linux/timer.h
include/linux/timex.h
include/linux/torture.h
include/linux/usb/pd_bdo.h
include/linux/vfio_pci_core.h
include/linux/virtio_config.h
include/linux/vmalloc.h
include/memory/renesas-rpc-if.h
include/net/bluetooth/hci.h
include/net/bluetooth/hci_core.h
include/net/esp.h
include/net/flow_dissector.h
include/net/inet_hashtables.h
include/net/inet_timewait_sock.h
include/net/ip.h
include/net/ip6_tunnel.h
include/net/ip_tunnels.h
include/net/mctp.h
include/net/netns/ipv6.h
include/net/secure_seq.h
include/net/tc_act/tc_pedit.h
include/net/tcp.h
include/net/xfrm.h
include/net/xsk_buff_pool.h
include/scsi/libiscsi.h
include/scsi/scsi_transport_iscsi.h
include/soc/mscc/ocelot_vcap.h
include/sound/core.h
include/sound/memalloc.h
include/sound/soc-component.h
include/target/target_core_backend.h
include/trace/events/io_uring.h
include/trace/events/sched.h
include/trace/events/sunrpc.h
include/trace/events/timer.h
include/uapi/linux/cdrom.h
include/uapi/linux/dma-buf.h
include/uapi/linux/elf.h
include/uapi/linux/fb.h
include/uapi/linux/input-event-codes.h
include/uapi/linux/io_uring.h
include/uapi/linux/kvm.h
include/uapi/linux/loop.h
include/uapi/linux/nvme_ioctl.h
include/uapi/linux/rfkill.h
include/uapi/linux/sev-guest.h [new file with mode: 0644]
include/uapi/linux/stddef.h
include/uapi/linux/virtio_ids.h
kernel/Makefile
kernel/auditsc.c
kernel/bpf/Kconfig
kernel/cgroup/cpuset.c
kernel/configs/x86_debug.config [new file with mode: 0644]
kernel/cpu.c
kernel/dma/direct.h
kernel/entry/common.c
kernel/events/core.c
kernel/events/internal.h
kernel/events/ring_buffer.c
kernel/fork.c
kernel/irq/affinity.c
kernel/irq/chip.c
kernel/irq/debugfs.c
kernel/irq/internals.h
kernel/irq/irq_sim.c
kernel/irq/irqdesc.c
kernel/irq/manage.c
kernel/irq/matrix.c
kernel/irq/msi.c
kernel/irq_work.c
kernel/kcov.c
kernel/kexec_core.c
kernel/kprobes.c
kernel/kthread.c
kernel/rcu/Kconfig
kernel/rcu/Kconfig.debug
kernel/rcu/rcu.h
kernel/rcu/rcu_segcblist.c
kernel/rcu/rcuscale.c
kernel/rcu/rcutorture.c
kernel/rcu/refscale.c
kernel/rcu/srcutree.c
kernel/rcu/sync.c
kernel/rcu/tasks.h
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_nocb.h
kernel/rcu/tree_plugin.h
kernel/rcu/tree_stall.h
kernel/rcu/update.c
kernel/scftorture.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/sched.h
kernel/smp.c
kernel/smpboot.c
kernel/static_call.c
kernel/static_call_inline.c [new file with mode: 0644]
kernel/sysctl.c
kernel/task_work.c
kernel/time/clockevents.c
kernel/time/clocksource.c
kernel/time/sched_clock.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer.c
kernel/trace/Kconfig
kernel/trace/blktrace.c
kernel/trace/bpf_trace.c
kernel/trace/fgraph.c
kernel/trace/ftrace.c
kernel/trace/rethook.c
kernel/trace/trace_events.c
kernel/trace/trace_osnoise.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
lib/bug.c
lib/debugobjects.c
lib/dim/net_dim.c
lib/hexdump.c
lib/irq_poll.c
lib/kobject.c
lib/lz4/lz4_decompress.c
lib/percpu-refcount.c
lib/strncpy_from_user.c
lib/strnlen_user.c
lib/xarray.c
mm/backing-dev.c
mm/compaction.c
mm/filemap.c
mm/highmem.c
mm/huge_memory.c
mm/hugetlb.c
mm/kasan/hw_tags.c
mm/kasan/kasan.h
mm/kasan/quarantine.c
mm/kfence/core.c
mm/kfence/kfence.h
mm/kfence/report.c
mm/kmemleak.c
mm/list_lru.c
mm/memcontrol.c
mm/memory-failure.c
mm/mempolicy.c
mm/migrate.c
mm/mmap.c
mm/mmu_notifier.c
mm/mremap.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/page_io.c
mm/page_vma_mapped.c
mm/readahead.c
mm/secretmem.c
mm/shmem.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/swapfile.c
mm/userfaultfd.c
mm/util.c
mm/vmalloc.c
mm/workingset.c
net/batman-adv/fragmentation.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/hci_sync.c
net/bpf/test_run.c
net/bridge/br_input.c
net/bridge/br_switchdev.c
net/can/isotp.c
net/ceph/osd_client.c
net/core/dev.c
net/core/filter.c
net/core/flow_dissector.c
net/core/lwt_bpf.c
net/core/rtnetlink.c
net/core/secure_seq.c
net/core/skbuff.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/decnet/dn_dev.c
net/decnet/dn_neigh.c
net/decnet/dn_route.c
net/dsa/dsa2.c
net/dsa/master.c
net/dsa/port.c
net/dsa/slave.c
net/dsa/tag_hellcreek.c
net/ipv4/esp4.c
net/ipv4/fib_semantics.c
net/ipv4/igmp.c
net/ipv4/inet_hashtables.c
net/ipv4/inet_timewait_sock.c
net/ipv4/ip_gre.c
net/ipv4/ip_tunnel.c
net/ipv4/netfilter/nf_flow_table_ipv4.c [deleted file]
net/ipv4/ping.c
net/ipv4/route.c
net/ipv4/syncookies.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_rate.c
net/ipv6/esp6.c
net/ipv6/inet6_hashtables.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6mr.c
net/ipv6/mcast.c
net/ipv6/netfilter.c
net/ipv6/route.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/key/af_key.c
net/l3mdev/l3mdev.c
net/mac80211/debugfs_sta.c
net/mac80211/mlme.c
net/mac80211/rx.c
net/mctp/af_mctp.c
net/mctp/device.c
net/mctp/route.c
net/mptcp/options.c
net/mptcp/pm.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_bitwise.c
net/netfilter/nft_connlimit.c
net/netfilter/nft_counter.c
net/netfilter/nft_flow_offload.c
net/netfilter/nft_last.c
net/netfilter/nft_limit.c
net/netfilter/nft_quota.c
net/netfilter/nft_set_rbtree.c
net/netfilter/nft_socket.c
net/netlink/af_netlink.c
net/nfc/core.c
net/nfc/nci/core.c
net/nfc/nci/data.c
net/nfc/nci/hci.c
net/nfc/netlink.c
net/openvswitch/actions.c
net/openvswitch/flow_netlink.c
net/packet/af_packet.c
net/rds/tcp.c
net/rds/tcp.h
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rxrpc/local_object.c
net/rxrpc/net_ns.c
net/sched/act_pedit.c
net/sched/cls_api.c
net/sched/cls_flower.c
net/sched/cls_u32.c
net/sched/sch_taprio.c
net/sctp/outqueue.c
net/sctp/sm_sideeffect.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_clc.c
net/smc/smc_close.c
net/smc/smc_pnet.c
net/smc/smc_rx.c
net/socket.c
net/sunrpc/auth_gss/gss_rpc_upcall.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/socklib.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtsock.c
net/tls/tls_device.c
net/tls/tls_sw.c
net/wireless/nl80211.c
net/wireless/scan.c
net/xdp/xsk.c
net/xdp/xsk_buff_pool.c
net/xfrm/xfrm_policy.c
samples/trace_events/trace_custom_sched.h
scripts/Makefile.build
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/link-vmlinux.sh
security/selinux/ss/hashtab.c
sound/core/init.c
sound/core/memalloc.c
sound/core/pcm_misc.c
sound/drivers/mtpav.c
sound/firewire/fireworks/fireworks_hwdep.c
sound/hda/hdac_i915.c
sound/hda/intel-dsp-config.c
sound/isa/galaxy/galaxy.c
sound/isa/sc6000.c
sound/isa/wavefront/wavefront_synth.c
sound/oss/dmasound/dmasound.h
sound/oss/dmasound/dmasound_core.c
sound/pci/ad1889.c
sound/pci/ali5451/ali5451.c
sound/pci/als300.c
sound/pci/als4000.c
sound/pci/atiixp.c
sound/pci/atiixp_modem.c
sound/pci/au88x0/au88x0.c
sound/pci/aw2/aw2-alsa.c
sound/pci/azt3328.c
sound/pci/bt87x.c
sound/pci/ca0106/ca0106_main.c
sound/pci/cmipci.c
sound/pci/cs4281.c
sound/pci/cs5535audio/cs5535audio.c
sound/pci/echoaudio/echoaudio.c
sound/pci/emu10k1/emu10k1x.c
sound/pci/ens1370.c
sound/pci/es1938.c
sound/pci/es1968.c
sound/pci/fm801.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/ice1712/ice1724.c
sound/pci/intel8x0.c
sound/pci/intel8x0m.c
sound/pci/korg1212/korg1212.c
sound/pci/lola/lola.c
sound/pci/lx6464es/lx6464es.c
sound/pci/maestro3.c
sound/pci/nm256/nm256.c
sound/pci/oxygen/oxygen_lib.c
sound/pci/riptide/riptide.c
sound/pci/rme32.c
sound/pci/rme96.c
sound/pci/rme9652/hdsp.c
sound/pci/rme9652/hdspm.c
sound/pci/rme9652/rme9652.c
sound/pci/sis7019.c
sound/pci/sonicvibes.c
sound/pci/via82xx.c
sound/pci/via82xx_modem.c
sound/soc/atmel/mchp-pdmc.c
sound/soc/atmel/sam9g20_wm8731.c
sound/soc/codecs/cs35l41-lib.c
sound/soc/codecs/da7219.c
sound/soc/codecs/lpass-rx-macro.c
sound/soc/codecs/lpass-tx-macro.c
sound/soc/codecs/lpass-va-macro.c
sound/soc/codecs/max98090.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/rk817_codec.c
sound/soc/codecs/rt5682.c
sound/soc/codecs/rt5682s.c
sound/soc/codecs/rt711.c
sound/soc/codecs/rt9120.c
sound/soc/codecs/wcd934x.c
sound/soc/codecs/wm8731.c
sound/soc/codecs/wm8958-dsp2.c
sound/soc/fsl/fsl_sai.c
sound/soc/generic/simple-card-utils.c
sound/soc/intel/boards/sof_es8336.c
sound/soc/intel/boards/sof_rt5682.c
sound/soc/intel/common/soc-acpi-intel-tgl-match.c
sound/soc/meson/aiu-acodec-ctrl.c
sound/soc/meson/aiu-codec-ctrl.c
sound/soc/meson/aiu.c
sound/soc/meson/axg-card.c
sound/soc/meson/axg-tdm-interface.c
sound/soc/meson/g12a-tohdmitx.c
sound/soc/soc-core.c
sound/soc/soc-dapm.c
sound/soc/soc-generic-dmaengine-pcm.c
sound/soc/soc-ops.c
sound/soc/soc-pcm.c
sound/soc/soc-topology.c
sound/soc/sof/sof-pci-dev.c
sound/soc/sof/topology.c
sound/usb/midi.c
sound/usb/mixer_maps.c
sound/usb/pcm.c
sound/usb/quirks-table.h
sound/usb/quirks.c
sound/usb/usbaudio.h
sound/x86/intel_hdmi_audio.c
tools/Makefile
tools/arch/arm64/include/asm/cputype.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/msr-index.h
tools/bpf/bpftool/gen.c
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c [new file with mode: 0644]
tools/include/linux/objtool.h
tools/include/linux/slab.h
tools/include/nolibc/Makefile [new file with mode: 0644]
tools/include/nolibc/arch-aarch64.h [new file with mode: 0644]
tools/include/nolibc/arch-arm.h [new file with mode: 0644]
tools/include/nolibc/arch-i386.h [new file with mode: 0644]
tools/include/nolibc/arch-mips.h [new file with mode: 0644]
tools/include/nolibc/arch-riscv.h [new file with mode: 0644]
tools/include/nolibc/arch-x86_64.h [new file with mode: 0644]
tools/include/nolibc/arch.h [new file with mode: 0644]
tools/include/nolibc/ctype.h [new file with mode: 0644]
tools/include/nolibc/errno.h [new file with mode: 0644]
tools/include/nolibc/nolibc.h
tools/include/nolibc/signal.h [new file with mode: 0644]
tools/include/nolibc/std.h [new file with mode: 0644]
tools/include/nolibc/stdio.h [new file with mode: 0644]
tools/include/nolibc/stdlib.h [new file with mode: 0644]
tools/include/nolibc/string.h [new file with mode: 0644]
tools/include/nolibc/sys.h [new file with mode: 0644]
tools/include/nolibc/time.h [new file with mode: 0644]
tools/include/nolibc/types.h [new file with mode: 0644]
tools/include/nolibc/unistd.h [new file with mode: 0644]
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/vhost.h
tools/lib/perf/evlist.c
tools/memory-model/README
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/include/objtool/elf.h
tools/objtool/include/objtool/objtool.h
tools/objtool/objtool.c
tools/perf/Documentation/perf.txt
tools/perf/Makefile.config
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/arm64/util/machine.c
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/machine.c [deleted file]
tools/perf/arch/s390/util/machine.c
tools/perf/arch/x86/util/perf_regs.c
tools/perf/bench/epoll-ctl.c
tools/perf/bench/epoll-wait.c
tools/perf/bench/futex-hash.c
tools/perf/bench/futex-lock-pi.c
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex-wake-parallel.c
tools/perf/bench/futex-wake.c
tools/perf/bench/numa.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/perf.c
tools/perf/tests/attr/README
tools/perf/tests/attr/test-record-spe-physical-address [new file with mode: 0644]
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/dwarf-unwind.c
tools/perf/tests/perf-time-to-tsc.c
tools/perf/tests/shell/stat_all_pmu.sh
tools/perf/tests/shell/test_arm_coresight.sh
tools/perf/tests/topology.c
tools/perf/util/annotate.c
tools/perf/util/arm-spe.c
tools/perf/util/arm64-frame-pointer-unwind-support.c
tools/perf/util/bpf-event.c
tools/perf/util/c++/clang.cpp
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/machine.c
tools/perf/util/parse-events.c
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/stat.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libdw.h
tools/perf/util/unwind-libunwind-local.c
tools/perf/util/unwind-libunwind.c
tools/perf/util/unwind.h
tools/power/x86/intel-speed-select/Makefile
tools/testing/nvdimm/test/nfit.c
tools/testing/radix-tree/linux.c
tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
tools/testing/selftests/bpf/progs/map_ptr_kern.c
tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
tools/testing/selftests/kselftest_harness.h
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/aarch64/vcpu_width_config.c [new file with mode: 0644]
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/include/riscv/processor.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/riscv/processor.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/x86_64/amx_test.c
tools/testing/selftests/kvm/x86_64/emulator_error_test.c
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/kvm/x86_64/smm_test.c
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
tools/testing/selftests/mqueue/mq_perf_tests.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/bpf/Makefile [new file with mode: 0644]
tools/testing/selftests/net/bpf/nat6to4.c [new file with mode: 0644]
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/forwarding/Makefile
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/so_txtime.c
tools/testing/selftests/net/udpgro_frglist.sh [new file with mode: 0755]
tools/testing/selftests/pid_namespace/Makefile
tools/testing/selftests/pidfd/pidfd_wait.c
tools/testing/selftests/proc/proc-pid-vm.c
tools/testing/selftests/rcutorture/bin/functions.sh
tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
tools/testing/selftests/rcutorture/bin/kvm-remote.sh
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/bin/torture.sh
tools/testing/selftests/rcutorture/configs/rcu/RUDE01
tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
tools/testing/selftests/rcutorture/configs/rcu/TASKS01
tools/testing/selftests/rcutorture/configs/rcu/TASKS02
tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
tools/testing/selftests/rcutorture/configs/rcu/TASKS03
tools/testing/selftests/rcutorture/configs/rcu/TRACE01
tools/testing/selftests/rcutorture/configs/rcu/TRACE02
tools/testing/selftests/rcutorture/configs/rcu/TREE04
tools/testing/selftests/rcutorture/configs/rcu/TREE07
tools/testing/selftests/rcutorture/configs/rcu/TREE09
tools/testing/selftests/rcutorture/configs/rcu/TREE10
tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
tools/testing/selftests/rcutorture/configs/rcuscale/TREE
tools/testing/selftests/rcutorture/configs/refscale/CFcommon
tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
tools/testing/selftests/rcutorture/configs/scf/PREEMPT
tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/vDSO/vdso_test_correctness.c
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/mremap_test.c
tools/testing/selftests/vm/run_vmtests.sh
tools/testing/selftests/wireguard/netns.sh
tools/testing/selftests/wireguard/qemu/.gitignore
tools/testing/selftests/wireguard/qemu/Makefile
tools/testing/selftests/wireguard/qemu/arch/aarch64.config
tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
tools/testing/selftests/wireguard/qemu/arch/arm.config
tools/testing/selftests/wireguard/qemu/arch/armeb.config
tools/testing/selftests/wireguard/qemu/arch/i686.config
tools/testing/selftests/wireguard/qemu/arch/m68k.config
tools/testing/selftests/wireguard/qemu/arch/mips.config
tools/testing/selftests/wireguard/qemu/arch/mips64.config
tools/testing/selftests/wireguard/qemu/arch/mips64el.config
tools/testing/selftests/wireguard/qemu/arch/mipsel.config
tools/testing/selftests/wireguard/qemu/arch/powerpc.config
tools/testing/selftests/wireguard/qemu/arch/powerpc64.config [new file with mode: 0644]
tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
tools/testing/selftests/wireguard/qemu/arch/riscv32.config [new file with mode: 0644]
tools/testing/selftests/wireguard/qemu/arch/riscv64.config [new file with mode: 0644]
tools/testing/selftests/wireguard/qemu/arch/s390x.config [new file with mode: 0644]
tools/testing/selftests/wireguard/qemu/arch/x86_64.config
tools/testing/selftests/wireguard/qemu/init.c
tools/testing/selftests/x86/Makefile
virt/kvm/dirty_ring.c
virt/kvm/eventfd.c
virt/kvm/kvm_main.c
virt/kvm/kvm_mm.h

index b9d35821758643a0f53fa6561e3f5f49bccdf63d..0b04aa20c431db7b6007bb2863513c445e6a5c7f 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -45,6 +45,7 @@ Andrey Konovalov <andreyknvl@gmail.com> <andreyknvl@google.com>
 Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
 Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
+André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
 Andy Adamson <andros@citi.umich.edu>
 Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
 Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
@@ -204,6 +205,7 @@ Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
 Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
 Kay Sievers <kay.sievers@vrfy.org>
 Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
@@ -249,6 +251,7 @@ Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
 Mathieu Othacehe <m.othacehe@gmail.com>
 Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
 Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
@@ -391,6 +394,10 @@ Uwe Kleine-König <ukleinek@strlen.de>
 Uwe Kleine-König <ukl@pengutronix.de>
 Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
 Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
+Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
+Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
+Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
 Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
diff --git a/Documentation/ABI/testing/securityfs-secrets-coco b/Documentation/ABI/testing/securityfs-secrets-coco
new file mode 100644 (file)
index 0000000..f2b6909
--- /dev/null
@@ -0,0 +1,51 @@
+What:          security/secrets/coco
+Date:          February 2022
+Contact:       Dov Murik <dovmurik@linux.ibm.com>
+Description:
+               Exposes confidential computing (coco) EFI secrets to
+               userspace via securityfs.
+
+               EFI can declare memory area used by confidential computing
+               platforms (such as AMD SEV and SEV-ES) for secret injection by
+               the Guest Owner during VM's launch.  The secrets are encrypted
+               by the Guest Owner and decrypted inside the trusted enclave,
+               and therefore are not readable by the untrusted host.
+
+               The efi_secret module exposes the secrets to userspace.  Each
+               secret appears as a file under <securityfs>/secrets/coco,
+               where the filename is the GUID of the entry in the secrets
+               table.  This module is loaded automatically by the EFI driver
+               if the EFI secret area is populated.
+
+               Two operations are supported for the files: read and unlink.
+               Reading the file returns the content of secret entry.
+               Unlinking the file overwrites the secret data with zeroes and
+               removes the entry from the filesystem.  A secret cannot be read
+               after it has been unlinked.
+
+               For example, listing the available secrets::
+
+                 # modprobe efi_secret
+                 # ls -l /sys/kernel/security/secrets/coco
+                 -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+                 -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+                 -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+                 -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+               Reading the secret data by reading a file::
+
+                 # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+                 the-content-of-the-secret-data
+
+               Wiping a secret by unlinking a file::
+
+                 # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+                 # ls -l /sys/kernel/security/secrets/coco
+                 -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+                 -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+                 -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+               Note: The binary format of the secrets table injected by the
+               Guest Owner is described in
+               drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+               the EFI secret area".
index 05820365f1ec689147d89ab596aeab05e8d7a901..4cdba3477176fd89d8e03e67595843d6f7091aef 100644 (file)
@@ -116,7 +116,7 @@ Description:
                                            <value>[ForceIf:<attribute>=<value>]
                                            <value>[ForceIfNot:<attribute>=<value>]
 
-                                       For example:
+                                       For example::
 
                                            LegacyOrom/dell_value_modifier has value:
                                                    Disabled[ForceIf:SecureBoot=Enabled]
@@ -212,7 +212,7 @@ Description:
                the next boot.
 
                Lenovo specific class extensions
-               ------------------------------
+               --------------------------------
 
                On Lenovo systems the following additional settings are available:
 
@@ -246,9 +246,7 @@ Description:
                                        that is being referenced (e.g hdd0, hdd1 etc)
                                        This attribute defaults to device 0.
 
-               certificate:
-               signature:
-               save_signature:
+               certificate, signature, save_signature:
                                        These attributes are used for certificate based authentication. This is
                                        used in conjunction with a signing server as an alternative to password
                                        based authentication.
@@ -257,22 +255,27 @@ Description:
                                        The attributes can be displayed to check the stored value.
 
                                        Some usage examples:
-                                       Installing a certificate to enable feature:
-                                               echo <supervisor password > authentication/Admin/current_password
-                                               echo <signed certificate> > authentication/Admin/certificate
 
-                                       Updating the installed certificate:
-                                               echo <signature> > authentication/Admin/signature
-                                               echo <signed certificate> > authentication/Admin/certificate
+                                               Installing a certificate to enable feature::
+
+                                                       echo "supervisor password" > authentication/Admin/current_password
+                                                       echo "signed certificate" > authentication/Admin/certificate
+
+                                               Updating the installed certificate::
+
+                                                       echo "signature" > authentication/Admin/signature
+                                                       echo "signed certificate" > authentication/Admin/certificate
 
-                                       Removing the installed certificate:
-                                               echo <signature> > authentication/Admin/signature
-                                               echo '' > authentication/Admin/certificate
+                                               Removing the installed certificate::
 
-                                       Changing a BIOS setting:
-                                               echo <signature> > authentication/Admin/signature
-                                               echo <save signature> > authentication/Admin/save_signature
-                                               echo Enable > attribute/PasswordBeep/current_value
+                                                       echo "signature" > authentication/Admin/signature
+                                                       echo "" > authentication/Admin/certificate
+
+                                               Changing a BIOS setting::
+
+                                                       echo "signature" > authentication/Admin/signature
+                                                       echo "save signature" > authentication/Admin/save_signature
+                                                       echo Enable > attribute/PasswordBeep/current_value
 
                                        You cannot enable certificate authentication if a supervisor password
                                        has not been set.
@@ -288,9 +291,10 @@ Description:
                certificate_to_password:
                                        Write only attribute used to switch from certificate based authentication
                                        back to password based.
-                                       Usage:
-                                               echo <signature> > authentication/Admin/signature
-                                               echo <password> > authentication/Admin/certificate_to_password
+                                       Usage::
+
+                                               echo "signature" > authentication/Admin/signature
+                                               echo "password" > authentication/Admin/certificate_to_password
 
 
 What:          /sys/class/firmware-attributes/*/attributes/pending_reboot
@@ -345,7 +349,7 @@ Description:
 
                    # echo "factory" > /sys/class/firmware-attributes/*/device/attributes/reset_bios
                    # cat /sys/class/firmware-attributes/*/device/attributes/reset_bios
-                   builtinsafe lastknowngood [factory] custom
+                   builtinsafe lastknowngood [factory] custom
 
                Note that any changes to this attribute requires a reboot
                for changes to take effect.
index ab122125ff9aed4f46089fd329deed3193b0ac4f..96b92c105ec49d9b47edc351195d2678c90324e1 100644 (file)
@@ -13,17 +13,19 @@ Description:
                Should the operation fail, one of the following error codes
                may be returned:
 
+               ==========      =====
                Error Code      Cause
-               ----------      -----
-               EIO             General mailbox failure. Log may indicate cause.
-               EBUSY           Mailbox is owned by another agent.
-               EPERM           SDSI capability is not enabled in hardware.
-               EPROTO          Failure in mailbox protocol detected by driver.
+               ==========      =====
+               EIO             General mailbox failure. Log may indicate cause.
+               EBUSY           Mailbox is owned by another agent.
+               EPERM           SDSI capability is not enabled in hardware.
+               EPROTO          Failure in mailbox protocol detected by driver.
                                See log for details.
-               EOVERFLOW       For provision commands, the size of the data
+               EOVERFLOW       For provision commands, the size of the data
                                exceeds what may be written.
-               ESPIPE          Seeking is not allowed.
-               ETIMEDOUT       Failure to complete mailbox transaction in time.
+               ESPIPE          Seeking is not allowed.
+               ETIMEDOUT       Failure to complete mailbox transaction in time.
+               ==========      =====
 
 What:          /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid
 Date:          Feb 2022
index 05482374a741bd100c62a062a3686e8811199526..bb4681a01811603caad50466790d1bd4794c98f0 100644 (file)
@@ -9,8 +9,9 @@ Description:    Shows all enabled kernel features.
 What:          /sys/fs/erofs/<disk>/sync_decompress
 Date:          November 2021
 Contact:       "Huang Jianan" <huangjianan@oppo.com>
-Description:   Control strategy of sync decompression
+Description:   Control strategy of sync decompression:
+
                - 0 (default, auto): enable for readpage, and enable for
-                                    readahead on atomic contexts only,
+                 readahead on atomic contexts only.
                - 1 (force on): enable for readpage and readahead.
                - 2 (force off): disable for all situations.
index f4efd6897b0914520a615f4f2779c850c35cc0da..b34990c7c3778d7c5658615e7a18d6ec88682e4e 100644 (file)
@@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
 and from either dyntick-idle or user mode, so that this counter has an
 even value when the CPU is in dyntick-idle mode or user mode and an odd
 value otherwise. The transitions to/from user mode need to be counted
-for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
 
 The ``->rcu_need_heavy_qs`` field is used to record the fact that the
 RCU core code would really like to see a quiescent state from the
index 6f89cf1e567d099aa3c4f963a7b8c628fe19a062..c9c957c85bac1a5bdd4a69af5136ea4600a19250 100644 (file)
@@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
 period also drove it to completion. This straightforward approach had
 the disadvantage of needing to account for POSIX signals sent to user
 tasks, so more recent implemementations use the Linux kernel's
-`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+workqueues (see Documentation/core-api/workqueue.rst).
 
 The requesting task still does counter snapshotting and funnel-lock
 processing, but the task reaching the top of the funnel lock does a
index 45278e2974c04c13df06c63fe3f6deaaf7ac1ccf..04ed8bf27a0eae4086ad7219046074c8e6455e3b 100644 (file)
@@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
 outermost RCU read-side critical section containing that
 rcu_dereference(), unless protection of the corresponding data
 element has been passed from RCU to some other synchronization
-mechanism, most commonly locking or `reference
-counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).
 
 .. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
 .. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
@@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
 three APIs are therefore implemented by separate functions that check
 for voluntary context switches.
 
+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching.  This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead.  This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker.  And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
+and rcu_barrier_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock().  This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers.  Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
 Possible Future Changes
 -----------------------
 
index 4051ea3871eff0075843d9eee1725b178e4090fd..a5f2ff8fc54c2ad70c603d250d7c23256331e920 100644 (file)
@@ -33,8 +33,8 @@ Situation 1: Hash Tables
 
 Hash tables are often implemented as an array, where each array entry
 has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in the listRCU.txt document.  This approach also applies
-to other array-of-list situations, such as radix trees.
+as described in listRCU.rst.  This approach also applies to other
+array-of-list situations, such as radix trees.
 
 .. _static_arrays:
 
index f4545b7c9a63d29c7b2353802e11d2e58659343f..42cc5d891bd26e4d3841c04e1bad52d01fcb62b7 100644 (file)
@@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
                prevents destructive compiler optimizations.  However,
                with a bit of devious creativity, it is possible to
                mishandle the return value from rcu_dereference().
-               Please see rcu_dereference.txt in this directory for
-               more information.
+               Please see rcu_dereference.rst for more information.
 
                The rcu_dereference() primitive is used by the
                various "_rcu()" list-traversal primitives, such
@@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
                primitives.  This is particularly useful in code that
                is common to readers and updaters.  However, lockdep
                will complain if you access rcu_dereference() outside
-               of an RCU read-side critical section.  See lockdep.txt
+               of an RCU read-side critical section.  See lockdep.rst
                to learn what to do about this.
 
                Of course, neither rcu_dereference() nor the "_rcu()"
@@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
        primitives when the update-side lock is held is that doing so
        can be quite helpful in reducing code bloat when common code is
        shared between readers and updaters.  Additional primitives
-       are provided for this case, as discussed in lockdep.txt.
+       are provided for this case, as discussed in lockdep.rst.
 
        One exception to this rule is when data is only ever added to
        the linked data structure, and is never removed during any
@@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
        both rcu_barrier() and synchronize_rcu(), if necessary, using
        something like workqueues to to execute them concurrently.
 
-       See rcubarrier.txt for more information.
+       See rcubarrier.rst for more information.
index 0e03c6ef3147a3ea06cef16fd3fd931ce7c4081a..3cfe01ba9a4944b20b89399ffeee9cdfd7dfb4d9 100644 (file)
@@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
 must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element.  See the
-:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
-using RCU with linked lists.
+a grace period to elapse, then free the element.  See listRCU.rst for more
+information on using RCU with linked lists.
 
 Frequently Asked Questions
 --------------------------
@@ -50,7 +49,7 @@ Frequently Asked Questions
 - If I am running on a uniprocessor kernel, which can only do one
   thing at a time, why should I wait for a grace period?
 
-  See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
+  See UP.rst for more information.
 
 - How can I see where RCU is currently used in the Linux kernel?
 
@@ -64,13 +63,13 @@ Frequently Asked Questions
 
 - What guidelines should I follow when writing code that uses RCU?
 
-  See the checklist.txt file in this directory.
+  See checklist.rst.
 
 - Why the name "RCU"?
 
   "RCU" stands for "read-copy update".
-  :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
-  this name came from, search for "read-copy update" to find it.
+  listRCU.rst has more information on where this name came from, search
+  for "read-copy update" to find it.
 
 - I hear that RCU is patented?  What is with that?
 
index a9fc774bc400080ba78bd1fd334f8e6eff12b3f1..ca4692775ad41a68fb35175cc510a18ffcaaf492 100644 (file)
@@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
 protect read-mostly linked lists and
 objects using SLAB_TYPESAFE_BY_RCU allocations.
 
-Please read the basics in Documentation/RCU/listRCU.rst
+Please read the basics in listRCU.rst.
 
 Using 'nulls'
 =============
index 78404625bad26bbc7bfadf6374be7136bb5bbc25..794837eb519b94949dff617259a72ab2c1f2d2b6 100644 (file)
@@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
        Stall-warning messages may be enabled and disabled completely via
        /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
 
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+       Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+       the expedited grace period. This parameter defines the period
+       of time that RCU will wait from the beginning of an expedited
+       grace period until it issues an RCU CPU stall warning. This time
+       period is normally 20 milliseconds on Android devices.  A zero
+       value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+       after conversion to milliseconds.
+
+       This configuration parameter may be changed at runtime via the
+       /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+       this parameter is checked only at the beginning of a cycle. If you
+       are in a current stall cycle, setting it to a new value will change
+       the timeout for the -next- stall.
+
+       Stall-warning messages may be enabled and disabled completely via
+       /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
 RCU_STALL_DELAY_DELTA
 ---------------------
 
index c34d2212eaca23fca6bd8025be82a6534682d470..77ea260efd1207e797d84bf895c6c7d26063467a 100644 (file)
@@ -224,7 +224,7 @@ synchronize_rcu()
        be delayed.  This property results in system resilience in face
        of denial-of-service attacks.  Code using call_rcu() should limit
        update rate in order to gain this same sort of resilience.  See
-       checklist.txt for some approaches to limiting the update rate.
+       checklist.rst for some approaches to limiting the update rate.
 
 rcu_assign_pointer()
 ^^^^^^^^^^^^^^^^^^^^
@@ -318,7 +318,7 @@ rcu_dereference()
        must prohibit.  The rcu_dereference_protected() variant takes
        a lockdep expression to indicate which locks must be acquired
        by the caller. If the indicated protection is not provided,
-       a lockdep splat is emitted.  See Documentation/RCU/Design/Requirements/Requirements.rst
+       a lockdep splat is emitted.  See Design/Requirements/Requirements.rst
        and the API's code comments for more details and example usage.
 
 ..     [2] If the list_for_each_entry_rcu() instance might be used by
@@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.
 
 This section shows a simple use of the core RCU API to protect a
 global pointer to a dynamically allocated structure.  More-typical
-uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
-:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
 ::
 
        struct foo {
@@ -482,10 +481,9 @@ So, to sum up:
        RCU read-side critical sections that might be referencing that
        data item.
 
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
-<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
-<NMI_rcu_doc>`.
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst,
+arrayRCU.rst, and NMI-RCU.rst.
 
 .. _4_whatisRCU:
 
@@ -579,7 +577,7 @@ to avoid having to write your own callback::
 
        kfree_rcu(old_fp, rcu);
 
-Again, see checklist.txt for additional rules governing the use of RCU.
+Again, see checklist.rst for additional rules governing the use of RCU.
 
 .. _5_whatisRCU:
 
@@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise.  The smp_mb__after_spinlock()
 promotes synchronize_rcu() to a full memory barrier in compliance with
 the "Memory-Barrier Guarantees" listed in:
 
-       Documentation/RCU/Design/Requirements/Requirements.rst
+       Design/Requirements/Requirements.rst
 
 It is possible to nest rcu_read_lock(), since reader-writer locks may
 be recursively acquired.  Note also that rcu_read_lock() is immune
index 3f1cc5e317ed4a5ad001082c9c589b6008f68db9..c63384ede9588b0f9f572ad02def0a319057e037 100644 (file)
                        Defaults to zero when built as a module and to
                        10 seconds when built into the kernel.
 
-       clearcpuid=BITNUM[,BITNUM...] [X86]
+       clearcpuid=X[,X...] [X86]
                        Disable CPUID feature X for the kernel. See
                        arch/x86/include/asm/cpufeatures.h for the valid bit
-                       numbers. Note the Linux specific bits are not necessarily
-                       stable over kernel options, but the vendor specific
+                       numbers X. Note the Linux-specific bits are not necessarily
+                       stable over kernel options, but the vendor-specific
                        ones should be.
+                       X can also be a string as appearing in the flags: line
+                       in /proc/cpuinfo which does not have the above
+                       instability issue. However, not all features have names
+                       in /proc/cpuinfo.
+                       Note that using this option will taint your kernel.
                        Also note that user programs calling CPUID directly
                        or using the feature without checking anything
                        will still see it. This just prevents it from
                        when set.
                        Format: <int>
 
-       libata.force=   [LIBATA] Force configurations.  The format is comma-
-                       separated list of "[ID:]VAL" where ID is
-                       PORT[.DEVICE].  PORT and DEVICE are decimal numbers
-                       matching port, link or device.  Basically, it matches
-                       the ATA ID string printed on console by libata.  If
-                       the whole ID part is omitted, the last PORT and DEVICE
-                       values are used.  If ID hasn't been specified yet, the
-                       configuration applies to all ports, links and devices.
+       libata.force=   [LIBATA] Force configurations.  The format is comma-
+                       separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+                       PORT and DEVICE are decimal numbers matching port, link
+                       or device.  Basically, it matches the ATA ID string
+                       printed on console by libata.  If the whole ID part is
+                       omitted, the last PORT and DEVICE values are used.  If
+                       ID hasn't been specified yet, the configuration applies
+                       to all ports, links and devices.
 
                        If only DEVICE is omitted, the parameter applies to
                        the port and all links and devices behind it.  DEVICE
                        host link and device attached to it.
 
                        The VAL specifies the configuration to force.  As long
-                       as there's no ambiguity shortcut notation is allowed.
+                       as there is no ambiguity, shortcut notation is allowed.
                        For example, both 1.5 and 1.5G would work for 1.5Gbps.
                        The following configurations can be forced.
 
                          udma[/][16,25,33,44,66,100,133] notation is also
                          allowed.
 
+                       * nohrst, nosrst, norst: suppress hard, soft and both
+                         resets.
+
+                       * rstonce: only attempt one reset during hot-unplug
+                         link recovery.
+
+                       * [no]dbdelay: Enable or disable the extra 200ms delay
+                         before debouncing a link PHY and device presence
+                         detection.
+
                        * [no]ncq: Turn on or off NCQ.
 
-                       * [no]ncqtrim: Turn off queued DSM TRIM.
+                       * [no]ncqtrim: Enable or disable queued DSM TRIM.
+
+                       * [no]ncqati: Enable or disable NCQ trim on ATI chipset.
+
+                       * [no]trim: Enable or disable (unqueued) TRIM.
+
+                       * trim_zero: Indicate that TRIM command zeroes data.
+
+                       * max_trim_128m: Set 128M maximum trim size limit.
+
+                       * [no]dma: Turn on or off DMA transfers.
+
+                       * atapi_dmadir: Enable ATAPI DMADIR bridge support.
+
+                       * atapi_mod16_dma: Enable the use of ATAPI DMA for
+                         commands that are not a multiple of 16 bytes.
+
+                       * [no]dmalog: Enable or disable the use of the
+                         READ LOG DMA EXT command to access logs.
+
+                       * [no]iddevlog: Enable or disable access to the
+                         identify device data log.
 
-                       * nohrst, nosrst, norst: suppress hard, soft
-                         and both resets.
+                       * [no]logdir: Enable or disable access to the general
+                         purpose log directory.
 
-                       * rstonce: only attempt one reset during
-                         hot-unplug link recovery
+                       * max_sec_128: Set transfer size limit to 128 sectors.
 
-                       * dump_id: dump IDENTIFY data.
+                       * max_sec_1024: Set or clear transfer size limit to
+                         1024 sectors.
 
-                       * atapi_dmadir: Enable ATAPI DMADIR bridge support
+                       * max_sec_lba48: Set or clear transfer size limit to
+                         65535 sectors.
+
+                       * [no]lpm: Enable or disable link power management.
+
+                       * [no]setxfer: Indicate if transfer speed mode setting
+                         should be skipped.
+
+                       * dump_id: Dump IDENTIFY data.
 
                        * disable: Disable this device.
 
                                               mds=off [X86]
                                               tsx_async_abort=off [X86]
                                               kvm.nx_huge_pages=off [X86]
+                                              srbds=off [X86,INTEL]
                                               no_entry_flush [PPC]
                                               no_uaccess_flush [PPC]
 
 
        nocache         [ARM]
 
-       noclflush       [BUGS=X86] Don't use the CLFLUSH instruction
-
        delayacct       [KNL] Enable per-task delay accounting
 
        nodsp           [SH] Disable hardware DSP at boot time.
 
        noexec          [IA-64]
 
-       noexec          [X86]
-                       On X86-32 available only on PAE configured kernels.
-                       noexec=on: enable non-executable mappings (default)
-                       noexec=off: disable non-executable mappings
-
-       nosmap          [X86,PPC]
+       nosmap          [PPC]
                        Disable SMAP (Supervisor Mode Access Prevention)
                        even if it is supported by processor.
 
-       nosmep          [X86,PPC64s]
+       nosmep          [PPC64s]
                        Disable SMEP (Supervisor Mode Execution Prevention)
                        even if it is supported by processor.
 
 
        nosbagart       [IA-64]
 
-       nosep           [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
-
        nosgx           [X86-64,SGX] Disables Intel SGX kernel support.
 
        nosmp           [SMP] Tells an SMP kernel to act as a UP kernel,
 
        rcupdate.rcu_cpu_stall_timeout= [KNL]
                        Set timeout for RCU CPU stall warning messages.
+                       The value is in seconds and the maximum allowed
+                       value is 300 seconds.
+
+       rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+                       Set timeout for expedited RCU CPU stall warning
+                       messages.  The value is in milliseconds
+                       and the maximum allowed value is 21000
+                       milliseconds. Please note that this value is
+                       adjusted to an arch timer tick resolution.
+                       Setting this to zero causes the value from
+                       rcupdate.rcu_cpu_stall_timeout to be used (after
+                       conversion from seconds to milliseconds).
 
        rcupdate.rcu_expedited= [KNL]
                        Use expedited grace-period primitives, for
                        number avoids disturbing real-time workloads,
                        but lengthens grace periods.
 
+       rcupdate.rcu_task_stall_info= [KNL]
+                       Set initial timeout in jiffies for RCU task stall
+                       informational messages, which give some indication
+                       of the problem for those not patient enough to
+                       wait for ten minutes.  Informational messages are
+                       only printed prior to the stall-warning message
+                       for a given grace period. Disable with a value
+                       less than or equal to zero.  Defaults to ten
+                       seconds.  A change in value does not take effect
+                       until the beginning of the next grace period.
+
+       rcupdate.rcu_task_stall_info_mult= [KNL]
+                       Multiplier for time interval between successive
+                       RCU task stall informational messages for a given
+                       RCU tasks grace period.  This value is clamped
+                       to one through ten, inclusive.  It defaults to
+                       the value three, so that the first informational
+                       message is printed 10 seconds into the grace
+                       period, the second at 40 seconds, the third at
+                       160 seconds, and then the stall warning at 600
+                       seconds would prevent a fourth at 640 seconds.
+
        rcupdate.rcu_task_stall_timeout= [KNL]
-                       Set timeout in jiffies for RCU task stall warning
-                       messages.  Disable with a value less than or equal
-                       to zero.
+                       Set timeout in jiffies for RCU task stall
+                       warning messages.  Disable with a value less
+                       than or equal to zero.  Defaults to ten minutes.
+                       A change in value does not take effect until
+                       the beginning of the next grace period.
 
        rcupdate.rcu_self_test= [KNL]
                        Run the RCU early boot self tests
 
        serialnumber    [BUGS=X86-32]
 
+       sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
+
        shapers=        [NET]
                        Maximal number of shapers.
 
        smart2=         [HW]
                        Format: <io1>[,<io2>[,...,<io8>]]
 
+       smp.csd_lock_timeout= [KNL]
+                       Specify the period of time in milliseconds
+                       that smp_call_function() and friends will wait
+                       for a CPU to release the CSD lock.  This is
+                       useful when diagnosing bugs involving CPUs
+                       disabling interrupts for extended periods
+                       of time.  Defaults to 5,000 milliseconds, and
+                       setting a value of zero disables this feature.
+                       This feature may be more efficiently disabled
+                       using the csdlock_debug- kernel parameter.
+
        smsc-ircc2.nopnp        [HW] Don't use PNP to discover SMC devices
        smsc-ircc2.ircc_cfg=    [HW] Device configuration I/O port
        smsc-ircc2.ircc_sir=    [HW] SIR base I/O port
                        off:    Disable mitigation and remove
                                performance impact to RDRAND and RDSEED
 
+       srcutree.big_cpu_lim [KNL]
+                       Specifies the number of CPUs constituting a
+                       large system, such that srcu_struct structures
+                       should immediately allocate an srcu_node array.
+                       This kernel-boot parameter defaults to 128,
+                       but takes effect only when the low-order four
+                       bits of srcutree.convert_to_big is equal to 3
+                       (decide at boot).
+
+       srcutree.convert_to_big [KNL]
+                       Specifies under what conditions an SRCU tree
+                       srcu_struct structure will be converted to big
+                       form, that is, with an rcu_node tree:
+
+                                  0:  Never.
+                                  1:  At init_srcu_struct() time.
+                                  2:  When rcutorture decides to.
+                                  3:  Decide at boot time (default).
+                               0x1X:  Above plus if high contention.
+
+                       Either way, the srcu_node tree will be sized based
+                       on the actual runtime number of CPUs (nr_cpu_ids)
+                       instead of the compile-time CONFIG_NR_CPUS.
+
        srcutree.counter_wrap_check [KNL]
                        Specifies how frequently to check for
                        grace-period sequence counter wrap for the
                        expediting.  Set to zero to disable automatic
                        expediting.
 
+       srcutree.small_contention_lim [KNL]
+                       Specifies the number of update-side contention
+                       events per jiffy will be tolerated before
+                       initiating a conversion of an srcu_struct
+                       structure to big form.  Note that the value of
+                       srcutree.convert_to_big must have the 0x10 bit
+                       set for contention-based conversions to occur.
+
        ssbd=           [ARM64,HW]
                        Speculative Store Bypass Disable control
 
index dd27f78d7608f99fa8e1b8de30127a12ac61a0e0..dbae47bba25ec7f5b84b11bc62f7ff6ef2122d75 100644 (file)
@@ -228,10 +228,10 @@ Core dump support
 -----------------
 
 The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
-in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
+in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The
 program header for such segment is defined as:
 
-:``p_type``: ``PT_ARM_MEMTAG_MTE``
+:``p_type``: ``PT_AARCH64_MEMTAG_MTE``
 :``p_flags``: 0
 :``p_offset``: segment file offset
 :``p_vaddr``: segment virtual address, same as the corresponding
index 466cb9e89047fb51e5d2308d44cf4b16fd1e1113..d27db84d585ed223e7a4cbe220ebcbd1ff6ddaa6 100644 (file)
@@ -189,6 +189,9 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo4xx Silver  | N/A             | ARM64_ERRATUM_1024718       |
 +----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold    | N/A             | ARM64_ERRATUM_1286807       |
++----------------+-----------------+-----------------+-----------------------------+
+
 +----------------+-----------------+-----------------+-----------------------------+
 | Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
 +----------------+-----------------+-----------------+-----------------------------+
index 52ea7b6b2fe8eb668c6c7fbd4389124de19af0a4..7964fe134277b8b8337d620e524de7d24a15354e 100644 (file)
@@ -218,7 +218,6 @@ current *struct* is::
                int (*tray_move)(struct cdrom_device_info *, int);
                int (*lock_door)(struct cdrom_device_info *, int);
                int (*select_speed)(struct cdrom_device_info *, int);
-               int (*select_disc)(struct cdrom_device_info *, int);
                int (*get_last_session) (struct cdrom_device_info *,
                                         struct cdrom_multisession *);
                int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -419,15 +418,6 @@ this `auto-selection` capability, the decision should be made on the
 current disc loaded and the return value should be positive. A negative
 return value indicates an error.
 
-::
-
-       int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
 ::
 
        int get_last_session(struct cdrom_device_info *cdi,
index 729e24864fe738a3f5bd79a62d56d9e7c4542c85..22ec68f244210681d89ab36445ecf3f67f7a597d 100644 (file)
@@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
 .. c:function:: u64 ktime_get_mono_fast_ns( void )
                u64 ktime_get_raw_fast_ns( void )
                u64 ktime_get_boot_fast_ns( void )
+               u64 ktime_get_tai_fast_ns( void )
                u64 ktime_get_real_fast_ns( void )
 
        These variants are safe to call from any context, including from
index ad168d16968f5244c23798fc8467fe898cef8cee..867a4bba6bf69c7ccd4999129e595d3cb51f10a0 100644 (file)
@@ -41,13 +41,18 @@ or ``VFAT_FS``. To run ``FAT_KUNIT_TEST``, the ``.kunitconfig`` has:
        CONFIG_MSDOS_FS=y
        CONFIG_FAT_KUNIT_TEST=y
 
-1. A good starting point for the ``.kunitconfig``, is the KUnit default
-   config. Run the command:
+1. A good starting point for the ``.kunitconfig`` is the KUnit default config.
+   You can generate it by running:
 
 .. code-block:: bash
 
        cd $PATH_TO_LINUX_REPO
-       cp tools/testing/kunit/configs/default.config .kunitconfig
+       tools/testing/kunit/kunit.py config
+       cat .kunit/.kunitconfig
+
+.. note ::
+   ``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is
+   ``.kunit`` by default.
 
 .. note ::
    You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as
index 0afec83cc72327c799106c444f03b72a358fd8b1..564ae6aaccf762a6d6f9bab099eed7347769583a 100644 (file)
@@ -13,7 +13,6 @@ maintainers:
 properties:
   compatible:
     enum:
-      - nvidia,tegra20-pmc
       - nvidia,tegra20-pmc
       - nvidia,tegra30-pmc
       - nvidia,tegra114-pmc
index c060c7914cae6573c68af4f4f0cd9eaf17c0ad7b..c4e4a9eab658056d96b9708a4f8b9146adc360fc 100644 (file)
@@ -26,6 +26,7 @@ properties:
       - items:
           - enum:
               - renesas,sata-r8a774b1     # RZ/G2N
+              - renesas,sata-r8a774e1     # RZ/G2H
               - renesas,sata-r8a7795      # R-Car H3
               - renesas,sata-r8a77965     # R-Car M3-N
           - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
index bd40213302dadeffde7648e9b43c79681b6e3fbc..fced4082b047ba5b54fdf459dbac4cfe0940b731 100644 (file)
@@ -34,7 +34,6 @@ properties:
     oneOf:
       - items:
           - enum:
-              - ti,sysc-omap2
               - ti,sysc-omap2
               - ti,sysc-omap4
               - ti,sysc-omap4-simple
index 625f573a7b90e152b684a471989a750b171969d3..458c7645ee6838fc879f68f41332bd38ff0569b4 100644 (file)
@@ -55,8 +55,6 @@ allOf:
     then:
       properties:
         clocks:
-          minItems: 7
-          maxItems: 7
           items:
             - description: 32k osc
             - description: 25m osc
@@ -66,8 +64,6 @@ allOf:
             - description: ext3 clock input
             - description: ext4 clock input
         clock-names:
-          minItems: 7
-          maxItems: 7
           items:
             - const: ckil
             - const: osc_25m
index 0c15afa2214c3b5e78b357210e49d1b5dd5c3537..016a4f378b9b67ffefedc871257c44b86990b88a 100644 (file)
@@ -22,7 +22,16 @@ properties:
     const: microchip,mpfs-clkcfg
 
   reg:
-    maxItems: 1
+    items:
+      - description: |
+          clock config registers:
+          These registers contain enable, reset & divider tables for the, cpu,
+          axi, ahb and rtc/mtimer reference clocks as well as enable and reset
+          for the peripheral clocks.
+      - description: |
+          mss pll dri registers:
+          Block of registers responsible for dynamic reconfiguration of the mss
+          pll
 
   clocks:
     maxItems: 1
@@ -51,7 +60,7 @@ examples:
             #size-cells = <2>;
             clkcfg: clock-controller@20002000 {
                 compatible = "microchip,mpfs-clkcfg";
-                reg = <0x0 0x20002000 0x0 0x1000>;
+                reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
                 clocks = <&ref>;
                 #clock-cells = <1>;
         };
index f14f1d39da3629d81e95ac2377bea8233afdd8ab..d819dfaafff9b3baa5385313ab7bda403067a62e 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos SoC Audio SubSystem clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 4e8062860986aa5726b585bd3ce097cbab4cdffc..0589a63e273a126935eaffb54b0fffd6605380a6 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 64d027dbe3b250585c1a27e942993fabdcf628fb..c98eff64f2b58de0051f32e67f2a02d82c70a32a 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung SoC external/osc/XXTI/XusbXTI clock
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 1ed64add4355d34819275eafea95440c03868b07..b644bbd0df3841bbc0309f1914ee276ceb567fb8 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos4412 SoC ISP clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index a3fac5c6809d20eeaeff5f4d0d2a97afeaa0d70f..b05f83533e3deb4d0f396123eebd1a4150379a99 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos5260 SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 032862e9f55b71ec8c29a62d8aba935f01531ada..b737c9d35a1c2af70979bf1896a90d9c14894cff 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos5410 SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index edd1b4ac433476a80bfbc23657bc43619e975b37..3f9326e09f79ef55af1178a7bdad64693a1b8202 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos5433 SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 599baf0b7231888b568c063461d5fb27ae272560..c137c6744ef91fdf4e5658d1a1cc677ad68bab0f 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos7 SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 7e5a9cac2fd2823decfd8328cac329464e53f3af..5073e569a47fd54d65081bfb62299c749eab14a2 100644 (file)
@@ -9,7 +9,7 @@ title: Samsung Exynos7885 SoC clock controller
 maintainers:
   - Dávid Virág <virag.david003@gmail.com>
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 80ba60838f2badc08c75edda51540955d0a7bd2d..aa11815ad3a308fbbba4686177f1e4acdc9909c9 100644 (file)
@@ -9,7 +9,7 @@ title: Samsung Exynos850 SoC clock controller
 maintainers:
   - Sam Protsenko <semen.protsenko@linaro.org>
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 1410c51e0e7df84e422fd54a75bcb3b05ace7b69..9248bfc16d484a12b2a8fb37ab87f11a15833a42 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2M and S5M family clock generator block
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index ae8f8fc932338194b5119d9a9b9606bc715460b4..2659854ea1c0aeeb915e3ab295f3dca95f935d00 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung S5Pv210 SoC Audio SubSystem clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index dcb29a2d11599b1b87c231e3adf105cd0eaf28e3..67a33665cf00bfe88d965f067a5348a262612fd8 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung S5P6442/S5PC110/S5PV210 SoC clock controller
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index d318fccf78f109631507c130c649eb4593ebaacb..2bdd05af6079bcc3343fd71f374ae663ed0b8552 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos NoC (Network on Chip) Probe
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   The Samsung Exynos542x SoC has a NoC (Network on Chip) Probe for NoC bus.
index c9a8cb5fd55582fbd0c35d5ddf5ff9960b21eb24..e300df4b47f3df4da7123b8a399b5f2a2db2e789 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung Exynos SoC PPMU (Platform Performance Monitoring Unit)
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   The Samsung Exynos SoC has PPMU (Platform Performance Monitoring Unit) for
index 62c3bd4cb28d89f36d994fce9dd58292c68ec801..7257fd0ae4da87151f909d60d99f12cc93381b28 100644 (file)
@@ -51,7 +51,6 @@ properties:
           Video port for MIPI DPI output (panel or connector).
 
     required:
-      - port@0
       - port@1
 
 required:
index a51baf8a4c7684b87127f0b7b88679727a84b037..bb9dbfb9beaf508d1f3cebc19bf93efd104e2c89 100644 (file)
@@ -95,7 +95,6 @@ then:
   properties:
     clocks:
       minItems: 1
-      maxItems: 4
       items:
         - description: Functional clock
         - description: EXTAL input clock
@@ -104,7 +103,6 @@ then:
 
     clock-names:
       minItems: 1
-      maxItems: 4
       items:
         - const: fck
         # The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks.
@@ -128,12 +126,10 @@ then:
 else:
   properties:
     clocks:
-      maxItems: 1
       items:
         - description: Functional clock
 
     clock-names:
-      maxItems: 1
       items:
         - const: fck
 
index 5216c27fc0ada58881235761037162c5873ed0c6..a412a1da950fb9e5aeb33da06ea1733d0324ec58 100644 (file)
@@ -39,7 +39,6 @@ properties:
           Video port for MIPI DPI output (panel or connector).
 
     required:
-      - port@0
       - port@1
 
 required:
index d31483a78eab0ff7f7c888420c61333d0d7c0b37..6fb7e321f011873e2ce4c48b688d653c4b7a8cea 100644 (file)
@@ -160,7 +160,7 @@ examples:
     mdss: mdss@5e00000 {
         #address-cells = <1>;
         #size-cells = <1>;
-        compatible = "qcom,qcm2290-mdss", "qcom,mdss";
+        compatible = "qcom,qcm2290-mdss";
         reg = <0x05e00000 0x1000>;
         reg-names = "mdss";
         power-domains = <&dispcc MDSS_GDSC>;
@@ -180,7 +180,7 @@ examples:
                  <&apps_smmu 0x421 0x0>;
         ranges;
 
-        mdss_mdp: mdp@5e01000 {
+        mdss_mdp: display-controller@5e01000 {
                 compatible = "qcom,qcm2290-dpu";
                 reg = <0x05e01000 0x8f000>,
                       <0x05eb0000 0x2008>;
index f29789994b1804f46e25f888e2b845778eb6e475..c2df8d28aaf5f594b6e1fac2e277ecd161b6f581 100644 (file)
@@ -83,6 +83,8 @@ properties:
 required:
   - compatible
   - reg
+  - width-mm
+  - height-mm
   - panel-timing
 
 unevaluatedProperties: false
index 9bf592dc3033aa3efc90e9ddb721ebca5200c846..7749de95ee405f7d35af13b6c6034b8db95ceb0e 100644 (file)
@@ -71,78 +71,72 @@ properties:
 
   hfront-porch:
     description: Horizontal front porch panel timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of pixels
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of pixels
 
   hback-porch:
     description: Horizontal back porch timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of pixels
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of pixels
 
   hsync-len:
     description: Horizontal sync length panel timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of pixels
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of pixels
 
   vfront-porch:
     description: Vertical front porch panel timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of lines
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of lines
 
   vback-porch:
     description: Vertical back porch panel timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of lines
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of lines
 
   vsync-len:
     description: Vertical sync length panel timing
+    $ref: /schemas/types.yaml#/definitions/uint32-array
     oneOf:
-      - $ref: /schemas/types.yaml#/definitions/uint32
-        maxItems: 1
+      - maxItems: 1
         items:
           description: typical number of lines
-      - $ref: /schemas/types.yaml#/definitions/uint32-array
-        minItems: 3
+      - minItems: 3
         maxItems: 3
         items:
           description: min, typ, max number of lines
index 56cedcd6d5768c4a852b400347d73c3702b05710..b3e588022082d8209ccd01874ffe5f1fa3eb1d04 100644 (file)
@@ -109,7 +109,6 @@ allOf:
       properties:
         clocks:
           minItems: 1
-          maxItems: 3
           items:
             - description: Functional clock
             - description: DU_DOTCLKIN0 input clock
@@ -117,7 +116,6 @@ allOf:
 
         clock-names:
           minItems: 1
-          maxItems: 3
           items:
             - const: du.0
             - pattern: '^dclkin\.[01]$'
@@ -159,7 +157,6 @@ allOf:
       properties:
         clocks:
           minItems: 2
-          maxItems: 4
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -168,7 +165,6 @@ allOf:
 
         clock-names:
           minItems: 2
-          maxItems: 4
           items:
             - const: du.0
             - const: du.1
@@ -216,7 +212,6 @@ allOf:
       properties:
         clocks:
           minItems: 2
-          maxItems: 4
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -225,7 +220,6 @@ allOf:
 
         clock-names:
           minItems: 2
-          maxItems: 4
           items:
             - const: du.0
             - const: du.1
@@ -271,7 +265,6 @@ allOf:
       properties:
         clocks:
           minItems: 2
-          maxItems: 4
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -280,7 +273,6 @@ allOf:
 
         clock-names:
           minItems: 2
-          maxItems: 4
           items:
             - const: du.0
             - const: du.1
@@ -327,7 +319,6 @@ allOf:
       properties:
         clocks:
           minItems: 2
-          maxItems: 4
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -336,7 +327,6 @@ allOf:
 
         clock-names:
           minItems: 2
-          maxItems: 4
           items:
             - const: du.0
             - const: du.1
@@ -386,7 +376,6 @@ allOf:
       properties:
         clocks:
           minItems: 3
-          maxItems: 6
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -397,7 +386,6 @@ allOf:
 
         clock-names:
           minItems: 3
-          maxItems: 6
           items:
             - const: du.0
             - const: du.1
@@ -448,7 +436,6 @@ allOf:
       properties:
         clocks:
           minItems: 4
-          maxItems: 8
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -461,7 +448,6 @@ allOf:
 
         clock-names:
           minItems: 4
-          maxItems: 8
           items:
             - const: du.0
             - const: du.1
@@ -525,7 +511,6 @@ allOf:
       properties:
         clocks:
           minItems: 3
-          maxItems: 6
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -536,7 +521,6 @@ allOf:
 
         clock-names:
           minItems: 3
-          maxItems: 6
           items:
             - const: du.0
             - const: du.1
@@ -596,7 +580,6 @@ allOf:
       properties:
         clocks:
           minItems: 3
-          maxItems: 6
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -607,7 +590,6 @@ allOf:
 
         clock-names:
           minItems: 3
-          maxItems: 6
           items:
             - const: du.0
             - const: du.1
@@ -666,14 +648,12 @@ allOf:
       properties:
         clocks:
           minItems: 1
-          maxItems: 2
           items:
             - description: Functional clock for DU0
             - description: DU_DOTCLKIN0 input clock
 
         clock-names:
           minItems: 1
-          maxItems: 2
           items:
             - const: du.0
             - const: dclkin.0
@@ -723,7 +703,6 @@ allOf:
       properties:
         clocks:
           minItems: 2
-          maxItems: 4
           items:
             - description: Functional clock for DU0
             - description: Functional clock for DU1
@@ -732,7 +711,6 @@ allOf:
 
         clock-names:
           minItems: 2
-          maxItems: 4
           items:
             - const: du.0
             - const: du.1
@@ -791,7 +769,6 @@ allOf:
             - description: Functional clock
 
         clock-names:
-          maxItems: 1
           items:
             - const: du.0
 
index f998a3a5b71f63603ecda5cc4de9a471249f2673..919734c05c0b1b92ab6d5d7e6d937b178bce95b2 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index cb8e735ce3bd2baaebe750a8719ef51526ae26b9..63379fae36366ebae1df9a2e6cb942e3edb9a2b6 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index ba40284ac66f09cdb3c7491fdf7be9e0180f91be..00e325a19cb1df9171432d15d7d27265fdf808ed 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description:
   Samsung Exynos SoC Mixer is responsible for mixing and blending multiple data
index 6f796835ea03a8bd9da87f869054da2248518e88..7c37470bd32973520be68536343159f72ab9ab59 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   DECON (Display and Enhancement Controller) is the Display Controller for the
index 01fccb138ebd98a563e08587c2a77f69cdb4aa44..c5c6239c28d079a0d92742a1c5105069450e1830 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   MIC (Mobile Image Compressor) resides between DECON and MIPI DSI. MIPI DSI is
index afa137d4792281ff0e425435374db19b6ed23db8..320eedc61a5b5b7dcdbdf29decb0a95467f32865 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   DECON (Display and Enhancement Controller) is the Display Controller for the
index 9cf5f120d5168d8e8d61302b8df9c6707e2391c4..c62ea9d228432bcc23e635c5553ff4531598b638 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index e614fe3187bbc0376013ff69b955c4b8b94ceffa..d09d79d7406a3aa8378591c37c763f85683e197f 100644 (file)
@@ -29,6 +29,7 @@ properties:
   interrupts:
     description:
       Interrupt lines for each GPI instance
+    minItems: 1
     maxItems: 13
 
   "#dma-cells":
index f9ffe3d6f9575b85a4b0f44b8ab3c882eaceb5c6..1289605456408167bc9b8f756ad0d4fdbe2ff58b 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77843 MicroUSB and Companion Power Management IC Extcon
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77843 MicroUSB
@@ -25,7 +25,7 @@ properties:
     $ref: /schemas/connector/usb-connector.yaml#
 
   ports:
-    $ref: /schemas/graph.yaml#/properties/port
+    $ref: /schemas/graph.yaml#/properties/ports
     description:
       Any connector to the data bus of this controller should be modelled using
       the OF graph bindings specified
index 4d6bfae0653c457a2469adc51f41f61260611be0..85f8d4764740a3d013d4c04f57802d4622ee2302 100644 (file)
@@ -20,6 +20,7 @@ properties:
           - mediatek,mt8183-mali
           - realtek,rtd1619-mali
           - renesas,r9a07g044-mali
+          - renesas,r9a07g054-mali
           - rockchip,px30-mali
           - rockchip,rk3568-mali
       - const: arm,mali-bifrost # Mali Bifrost GPU model/revision is fully discoverable
@@ -109,7 +110,9 @@ allOf:
       properties:
         compatible:
           contains:
-            const: renesas,r9a07g044-mali
+            enum:
+              - renesas,r9a07g044-mali
+              - renesas,r9a07g054-mali
     then:
       properties:
         interrupts:
index 4b5851c326f7a5027fc9746407608d62430ce44a..b1a4c235376ef3ea8e7d31e8265b156276239686 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: LTC4151 High Voltage I2C Current and Voltage Monitor
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index c42051f8a1914f2e91bd3827e286aab48d74529a..028d6e570131fb60e24bf3aea1961d1c2c9fc970 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Microchip MCP3021 A/D converter
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 4669217d01e16311a943b02003fc60cf1d22c372..80df7182ea28b9de30f912173c48dedcd9739164 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Sensirion SHT15 humidity and temperature sensor
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index d3eff4fac1075e51d04ee1791268e7c057c0480e..c5a889e3e27b9316fbd41a2fe9e022264bd699f8 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: TMP102 temperature sensor
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index eda55bbc172dcb7a3876436dac868c7a87c27741..dcbc6fbc3b48f63bf03039aaa3fef3f1422efb1e 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: TMP108 temperature sensor
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 36f649938fb7ef14eeea1171a7907ae41469425d..a6f1fa75a67cd56ecede289401cd9f886bbf9b88 100644 (file)
@@ -58,10 +58,9 @@ patternProperties:
         description: |
           The value (two's complement) to be programmed in the channel specific N correction register.
           For remote channels only.
-        $ref: /schemas/types.yaml#/definitions/uint32
-        items:
-          minimum: 0
-          maximum: 255
+        $ref: /schemas/types.yaml#/definitions/int32
+        minimum: -128
+        maximum: 127
 
     required:
       - reg
index 801ca9ba7d344f112449b5689dd6d0fc7a7f32b2..e7493e25a7d2a2d02c6ac98de4875443087636cc 100644 (file)
@@ -58,9 +58,8 @@ patternProperties:
           The value (two's complement) to be programmed in the channel specific N correction register.
           For remote channels only.
         $ref: /schemas/types.yaml#/definitions/int32
-        items:
-          minimum: -128
-          maximum: 127
+        minimum: -128
+        maximum: 127
 
     required:
       - reg
index 19874e8b73b9b1ce907d43ffcbcbca51ceb79072..3e52a0db6c41b16b049dd4cb5d799ff51f196a17 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung's High Speed I2C controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   The Samsung's High Speed I2C controller is used to interface with I2C devices
index 84051b0129c2bc1739531b8b7b65c28601bbbd14..c26230518957e48c4d9d0e9beb3d00f008e14e55 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC I2C Controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index cf711082ad7db0fa5d2c2353e14babf5bf480f9b..666414a9c0defd831e4dc561e267790ddeb61f67 100644 (file)
@@ -98,6 +98,7 @@ allOf:
               - ti,adc121s
               - ti,ads7866
               - ti,ads7868
+    then:
       required:
         - vcc-supply
   # Devices with a vref
index 4d6074518b5cd2dd65ddc2db46e5413c9a976755..fa8da42cb1e6baad8dbf2c9efd0843cb876f15de 100644 (file)
@@ -138,7 +138,6 @@ allOf:
             - const: bus
             - const: adc
           minItems: 1
-          maxItems: 2
 
         interrupts:
           items:
@@ -170,7 +169,6 @@ allOf:
             - const: bus
             - const: adc
           minItems: 1
-          maxItems: 2
 
         interrupts:
           items:
index 7c260f209687afbd6b0ba453605f7df239573d6a..92f9472a77ae1e5b46acfd2def91a5ec41bb24c1 100644 (file)
@@ -108,9 +108,7 @@ patternProperties:
           - [1-5]: order 1 to 5.
           For audio purpose it is recommended to use order 3 to 5.
         $ref: /schemas/types.yaml#/definitions/uint32
-        items:
-          minimum: 0
-          maximum: 5
+        maximum: 5
 
       "#io-channel-cells":
         const: 1
@@ -174,7 +172,7 @@ patternProperties:
               contains:
                 const: st,stm32-dfsdm-adc
 
-      - then:
+        then:
           properties:
             st,adc-channels:
               minItems: 1
@@ -206,7 +204,7 @@ patternProperties:
               contains:
                 const: st,stm32-dfsdm-dmic
 
-      - then:
+        then:
           properties:
             st,adc-channels:
               maxItems: 1
@@ -254,7 +252,7 @@ allOf:
           contains:
             const: st,stm32h7-dfsdm
 
-  - then:
+    then:
       patternProperties:
         "^filter@[0-9]+$":
           properties:
@@ -269,7 +267,7 @@ allOf:
           contains:
             const: st,stm32mp1-dfsdm
 
-  - then:
+    then:
       patternProperties:
         "^filter@[0-9]+$":
           properties:
index 0d8fb56f4b093a98187bca02dc74bc6f40f77385..65f86f26947cd5d68d24c16bd2957d08169ea719 100644 (file)
@@ -59,9 +59,9 @@ allOf:
           contains:
             enum:
               - adi,ad5371
-      then:
-        required:
-          - vref2-supply
+    then:
+      required:
+        - vref2-supply
 
 examples:
   - |
index b1770640f94bb378c7650709233db3b2e95db084..03ebd2665d0781e69ab388fc11e08bd63803f07f 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Mediatek's Keypad Controller device tree bindings
 
 maintainers:
-  - Fengping Yu <fengping.yu@mediatek.com>
+  - Mattijs Korpershoek <mkorpershoek@baylibre.com>
 
 allOf:
   - $ref: "/schemas/input/matrix-keymap.yaml#"
index 89853b4825133766810527ca9793f8f253e9ea5c..8a676fef8c1d14cd9fbaf2e006575a2be7cb3258 100644 (file)
@@ -93,48 +93,48 @@ allOf:
               - qcom,sdm660-gnoc
               - qcom,sdm660-snoc
 
-      then:
-        properties:
-          clock-names:
-            items:
-              - const: bus
-              - const: bus_a
-
-          clocks:
-            items:
-              - description: Bus Clock
-              - description: Bus A Clock
-
-        # Child node's properties
-        patternProperties:
-          '^interconnect-[a-z0-9]+$':
-            type: object
-            description:
-              snoc-mm is a child of snoc, sharing snoc's register address space.
-
-            properties:
-              compatible:
-                enum:
-                  - qcom,msm8939-snoc-mm
-
-              '#interconnect-cells':
-                const: 1
-
-              clock-names:
-                items:
-                  - const: bus
-                  - const: bus_a
-
-              clocks:
-                items:
-                  - description: Bus Clock
-                  - description: Bus A Clock
-
-            required:
-              - compatible
-              - '#interconnect-cells'
-              - clock-names
-              - clocks
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: bus
+            - const: bus_a
+
+        clocks:
+          items:
+            - description: Bus Clock
+            - description: Bus A Clock
+
+      # Child node's properties
+      patternProperties:
+        '^interconnect-[a-z0-9]+$':
+          type: object
+          description:
+            snoc-mm is a child of snoc, sharing snoc's register address space.
+
+          properties:
+            compatible:
+              enum:
+                - qcom,msm8939-snoc-mm
+
+            '#interconnect-cells':
+              const: 1
+
+            clock-names:
+              items:
+                - const: bus
+                - const: bus_a
+
+            clocks:
+              items:
+                - description: Bus Clock
+                - description: Bus A Clock
+
+          required:
+            - compatible
+            - '#interconnect-cells'
+            - clock-names
+            - clocks
 
   - if:
       properties:
index b7197f78e1588754bab736f414af8833cc82f283..3912a89162f02b79daf687e9b4927104420251f2 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ARM Generic Interrupt Controller, version 3
 
 maintainers:
-  - Marc Zyngier <marc.zyngier@arm.com>
+  - Marc Zyngier <maz@kernel.org>
 
 description: |
   AArch64 SMP cores are often associated with a GICv3, providing Private
@@ -78,7 +78,11 @@ properties:
       - GIC Hypervisor interface (GICH)
       - GIC Virtual CPU interface (GICV)
 
-      GICC, GICH and GICV are optional.
+      GICC, GICH and GICV are optional, but must be described if the CPUs
+      support them. Examples of such CPUs are ARM's implementations of the
+      ARMv8.0 architecture such as Cortex-A32, A34, A35, A53, A57, A72 and
+      A73 (this list is not exhaustive).
+
     minItems: 2
     maxItems: 4096   # Should be enough?
 
index 372ccbfae7716f590d9a2b57ecbeeec445611a9f..5a583bf3dbc105a010a3ba1f166c961a536919b0 100644 (file)
@@ -7,10 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Marvell MMP/Orion Interrupt controller bindings
 
 maintainers:
-  - Thomas Gleixner <tglx@linutronix.de>
-  - Jason Cooper <jason@lakedaemon.net>
-  - Marc Zyngier <maz@kernel.org>
-  - Rob Herring <robh+dt@kernel.org>
+  - Andrew Lunn <andrew@lunn.ch>
+  - Gregory Clement <gregory.clement@bootlin.com>
 
 allOf:
   - if:
index d631b7589d506403b7cd4441d236026285dd2546..72456a07dac968dc364f47282460c96c0948b980 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC Interrupt Combiner Controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   Samsung's Exynos4 architecture includes a interrupt combiner controller which
index b2fe6eb89389a3a3397878f1f00d2b2f60050149..10f95bf1d666a7cd57547f74cd90b0dd598eb0e9 100644 (file)
@@ -43,8 +43,6 @@ patternProperties:
           - 4 # LED output FLASH1
           - 5 # LED output FLASH2
 
-unevaluatedProperties: false
-
 required:
   - compatible
   - "#address-cells"
index 86a0005cf1569982ad37d07187b798d64e84c13c..e27f57bb52ae8bf0a432b6aacd66fec9e7eb6b88 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX77693 MicroUSB and Companion Power Management IC LEDs
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated
index 36781ee4617f9a8065ed9138166b575b2c33e665..c9d5adbc8c4a2a1da81739519a78dceb14e6ee6a 100644 (file)
@@ -65,7 +65,6 @@ properties:
   iram:
     $ref: /schemas/types.yaml#/definitions/phandle
     description: phandle pointing to the SRAM device node
-    maxItems: 1
 
 required:
   - compatible
index 9b179bb44dfb677ff5a9fa2ecd54913a949c3808..aa55ca65d6ed6d4fa31aa6ccebcc83ad42dfb2f2 100644 (file)
@@ -63,13 +63,11 @@ properties:
 
   mediatek,vpu:
     $ref: /schemas/types.yaml#/definitions/phandle
-    maxItems: 1
     description:
       Describes point to vpu.
 
   mediatek,scp:
     $ref: /schemas/types.yaml#/definitions/phandle
-    maxItems: 1
     description:
       Describes point to scp.
 
index e7b65a91c92c26ffbe7c3d8dc6365f5f7ba01fcb..deb5b657a2d5826522571f718625d0d4737a1099 100644 (file)
@@ -55,13 +55,11 @@ properties:
 
   mediatek,vpu:
     $ref: /schemas/types.yaml#/definitions/phandle
-    maxItems: 1
     description:
       Describes point to vpu.
 
   mediatek,scp:
     $ref: /schemas/types.yaml#/definitions/phandle
-    maxItems: 1
     description:
       Describes point to scp.
 
@@ -106,7 +104,6 @@ allOf:
           enum:
             - mediatek,mt8173-vcodec-enc
             - mediatek,mt8192-vcodec-enc
-            - mediatek,mt8173-vcodec-enc
 
     then:
       properties:
index 7687be0f50aa54e8709bb3aabc4bea94ff0bf3a3..c73bf2352aca6a9d8ffd019211bfab857fcd6e56 100644 (file)
@@ -61,7 +61,6 @@ properties:
 
   mediatek,scp:
     $ref: /schemas/types.yaml#/definitions/phandle
-    maxItems: 1
     description: |
       The node of system control processor (SCP), using
       the remoteproc & rpmsg framework.
index 769f13250047434202810cf603503617c2d27330..08cbdcddfead0403cfdddfb9b1695f4c4e63c5a4 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: DDR PHY Front End (DPFE) for Broadcom STB
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Markus Mayer <mmayer@broadcom.com>
 
 properties:
index f3e62ee07126ab3b760d7340844bd7d7711ef14b..1daa66592477e36dc99c6b02a86ef73ca03edb90 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: LPDDR2 SDRAM AC timing parameters for a given speed-bin
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index dd2141cad866442928d34e8eedd9b1b08a4dd177..9d78f140609b6c116e2b692b0c1b4dac8fa65a89 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: LPDDR2 SDRAM compliant to JEDEC JESD209-2
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 97c3e988af5f4284b4ad44acfc7e987f5bd05c14..5c6512c1e1e37b0ea0a236b0a3790cb36c1c5d18 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: LPDDR3 SDRAM AC timing parameters for a given speed-bin
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index c542f32c39fa98bfc6927a9c6e220e7fa7981b6f..48908a19473c3f349b02d5bb46b97e0773ed4aae 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: LPDDR3 SDRAM compliant to JEDEC JESD209-3
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index af5147f9da7201d1d6a9e4d2a308fd67c226a628..84f778a99546bba898f7221f04296ee35b753f7b 100644 (file)
@@ -25,12 +25,6 @@ properties:
           - const: fsl,qoriq-memory-controller
       - enum:
           - fsl,bsc9132-memory-controller
-          - fsl,8540-memory-controller
-          - fsl,8541-memory-controller
-          - fsl,8544-memory-controller
-          - fsl,8548-memory-controller
-          - fsl,8555-memory-controller
-          - fsl,8568-memory-controller
           - fsl,mpc8536-memory-controller
           - fsl,mpc8540-memory-controller
           - fsl,mpc8541-memory-controller
index 14a6bc8f421fccbeb86dd56c08afb942cb71877d..9249624c4fa009673e326cf97bf11e0764b4253a 100644 (file)
@@ -8,7 +8,7 @@ title: Marvell MVEBU SDRAM controller
 
 maintainers:
   - Jan Luebbe <jlu@pengutronix.de>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 9566b3421f0394101370e27a93950da166c2231d..0c511ab906bf897b158b09a9d6916a40882852c9 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm Atheros AR7xxx/AR9xxx DDR controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   The DDR controller of the AR7xxx and AR9xxx families provides an interface to
index 2b18cef99511f0e4f64cdc78f59ad7d7918caa5f..514b2c5f885869f5f5fc6c0dc1604b1fc5b9812e 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: H8/300 bus controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Yoshinori Sato <ysato@users.sourceforge.jp>
 
 properties:
index f152243f6b180696073548212c50541763752376..098348b2b815eab70a680e8701c37bea1e75f2c8 100644 (file)
@@ -9,7 +9,7 @@ title: |
   Controller device
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Lukasz Luba <lukasz.luba@arm.com>
 
 description: |
index fb7ae38a9c86672d2f7c3afd707af9ba66d975b9..f46e95704f532a62991dfe2f8ef14b483cdced2b 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Synopsys IntelliDDR Multi Protocol memory controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Manish Narani <manish.narani@xilinx.com>
   - Michal Simek <michal.simek@xilinx.com>
 
@@ -24,9 +24,9 @@ description: |
 properties:
   compatible:
     enum:
+      - snps,ddrc-3.80a
       - xlnx,zynq-ddrc-a05
       - xlnx,zynqmp-ddrc-2.40a
-      - snps,ddrc-3.80a
 
   interrupts:
     maxItems: 1
@@ -43,7 +43,9 @@ allOf:
       properties:
         compatible:
           contains:
-            const: xlnx,zynqmp-ddrc-2.40a
+            enum:
+              - snps,ddrc-3.80a
+              - xlnx,zynqmp-ddrc-2.40a
     then:
       required:
         - interrupts
index 9ed51185ff996b304c506db9113f36cd7759aeb8..382ddab60fbda10be248a587782967e4c2ee8b02 100644 (file)
@@ -8,7 +8,7 @@ title: Texas Instruments da8xx DDR2/mDDR memory controller
 
 maintainers:
   - Bartosz Golaszewski <bgolaszewski@baylibre.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   Documentation:
index 692300117c647c2a78abd3da8140e62d1d08c897..9d837535637b467aad496642d8f764b7106e6842 100644 (file)
@@ -54,7 +54,7 @@ flexcom@f8034000 {
                clock-names = "spi_clk";
                atmel,fifo-size = <32>;
 
-               mtd_dataflash@0 {
+               flash@0 {
                        compatible = "atmel,at25f512b";
                        reg = <0>;
                        spi-max-frequency = <20000000>;
index 27870b8760a6d492d46156fe82b694d16733600c..52edd1bf549f6d863f5662e77ac906f23f6f515f 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
index 859655a789c3bc1c6bccfe3f28611f9e4b8fb0ae..d027aabe453ba5d3a789244d37119768561e1dfd 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77686 Power Management IC
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77686 Power Management
index 906101197e113c4317cb81437ad5ecc17200301d..1b06a77ec79895cc0174b1e17b20625cf1867982 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77693 MicroUSB and Companion Power Management IC
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77693 MicroUSB
index baa1346ac5d5a5fc71196e19e602ab3dd42dfebe..ad2013900b0378c71b8d8dff20d9bb32ae752b2a 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77802 Power Management IC
 
 maintainers:
   - Javier Martinez Canillas <javier@dowhile0.org>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77802 Power Management
index 61a0f9dcb9837de14afa73b6e704381bb4c03785..f30f96bbff43ab85c4b5bd273ae9dcbb9227d733 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX77843 MicroUSB and Companion Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77843 MicroUSB
index bae55c98961c52878cdd5afa8029d7a08888fde0..f7bb67d10eff3618fe9542ff096402863a7b67bf 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC Low Power Audio Subsystem (LPASS)
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
 properties:
index 017befdf8adb5a7cd1b26490a68b9b5c983d6600..055dfc337c2f943ab98ec173caec202cf7fbe983 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPA01 Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 771b3f16da965caf04175d4ebf5627b356053a03..5ff6546c72b79a54723d00b6de093df2d22fca31 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPS11/13/14/15 and S2MPU02 Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 5531718abdf07d49fd7096b88a20415b5aeda8f0..10c7b408f33aa3d815578fcf1134ed5a9a94eec6 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S5M8767 Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index ce64b34983785a9217335220fc9408e46f997647..fe0270207622dc21d36bcdbe70fea1ea60acd52b 100644 (file)
@@ -197,25 +197,22 @@ allOf:
               - nvidia,tegra30-sdhci
               - nvidia,tegra114-sdhci
               - nvidia,tegra124-sdhci
+    then:
+      properties:
         clocks:
           items:
             - description: module clock
-          minItems: 1
-          maxItems: 1
     else:
       properties:
         clocks:
           items:
             - description: module clock
             - description: timeout clock
-          minItems: 2
-          maxItems: 2
+
         clock-names:
           items:
             - const: sdhci
             - const: tmclk
-          minItems: 2
-          maxItems: 2
       required:
         - clock-names
 
index 9d764e654e1db80df5ed666808d0762e3cd500b2..849aeae319a9213319fbb00da94e90cd9d2250d3 100644 (file)
@@ -147,8 +147,6 @@ allOf:
             - description: SoC gpmi io clock
             - description: SoC gpmi bch apb clock
         clock-names:
-          minItems: 2
-          maxItems: 2
           items:
             - const: gpmi_io
             - const: gpmi_bch_apb
index 8bad328b184df2bda976cf0cdd28d1a0145d4109..51aa89ac7e8507aa6aa6fa68d84fc12365cc3c46 100644 (file)
@@ -80,8 +80,6 @@ if:
 then:
   properties:
     interrupts:
-      minItems: 4
-      maxItems: 4
       items:
         - description: Error and status IRQ
         - description: Message object IRQ
@@ -91,7 +89,6 @@ then:
 else:
   properties:
     interrupts:
-      maxItems: 1
       items:
         - description: Error and status IRQ
 
index 8756060895a8e54a430debcbba1dffafb20f2a0a..99ee4b5b9346c0c710f013e66951d76ea8bef3ba 100644 (file)
@@ -27,32 +27,25 @@ description:
   The realtek-mdio driver is an MDIO driver and it must be inserted inside
   an MDIO node.
 
+  The compatible string is only used to identify which (silicon) family the
+  switch belongs to. Roughly speaking, a family is any set of Realtek switches
+  whose chip identification register(s) have a common location and semantics.
+  The different models in a given family can be automatically disambiguated by
+  parsing the chip identification register(s) according to the given family,
+  avoiding the need for a unique compatible string for each model.
+
 properties:
   compatible:
     enum:
       - realtek,rtl8365mb
-      - realtek,rtl8366
       - realtek,rtl8366rb
-      - realtek,rtl8366s
-      - realtek,rtl8367
-      - realtek,rtl8367b
-      - realtek,rtl8367rb
-      - realtek,rtl8367s
-      - realtek,rtl8368s
-      - realtek,rtl8369
-      - realtek,rtl8370
     description: |
-      realtek,rtl8365mb: 4+1 ports
-      realtek,rtl8366: 5+1 ports
-      realtek,rtl8366rb: 5+1 ports
-      realtek,rtl8366s: 5+1 ports
-      realtek,rtl8367:
-      realtek,rtl8367b:
-      realtek,rtl8367rb: 5+2 ports
-      realtek,rtl8367s: 5+2 ports
-      realtek,rtl8368s: 8 ports
-      realtek,rtl8369: 8+1 ports
-      realtek,rtl8370: 8+2 ports
+      realtek,rtl8365mb:
+        Use with models RTL8363NB, RTL8363NB-VB, RTL8363SC, RTL8363SC-VB,
+        RTL8364NB, RTL8364NB-VB, RTL8365MB, RTL8366SC, RTL8367RB-VB, RTL8367S,
+        RTL8367SB, RTL8370MB, RTL8310SR
+      realtek,rtl8366rb:
+        Use with models RTL8366RB, RTL8366S
 
   mdc-gpios:
     description: GPIO line for the MDC clock line.
@@ -335,7 +328,7 @@ examples:
             #size-cells = <0>;
 
             switch@29 {
-                    compatible = "realtek,rtl8367s";
+                    compatible = "realtek,rtl8365mb";
                     reg = <29>;
 
                     reset-gpios = <&gpio2 20 GPIO_ACTIVE_LOW>;
index 817794e56227429f0c8e517992576294c33780dc..4f15463611f8bbc68f5642d4341b7265548693a3 100644 (file)
@@ -106,6 +106,12 @@ properties:
   phy-mode:
     $ref: "#/properties/phy-connection-type"
 
+  pcs-handle:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Specifies a reference to a node representing a PCS PHY device on a MDIO
+      bus to link with an external PHY (phy-handle) if exists.
+
   phy-handle:
     $ref: /schemas/types.yaml#/definitions/phandle
     description:
index c5ab62c391335862c4da0bb28c4523fe6f24db5c..8d157f0295a502fd326eab4e2196f17654d8b484 100644 (file)
@@ -45,20 +45,3 @@ Optional properties:
 
        In fiber mode, auto-negotiation is disabled and the PHY can only work in
        100base-fx (full and half duplex) modes.
-
- - lan8814,ignore-ts: If present the PHY will not support timestamping.
-
-       This option acts as check whether Timestamping is supported by
-       hardware or not. LAN8814 phy support hardware tmestamping.
-
- - lan8814,latency_rx_10: Configures Latency value of phy in ingress at 10 Mbps.
-
- - lan8814,latency_tx_10: Configures Latency value of phy in egress at 10 Mbps.
-
- - lan8814,latency_rx_100: Configures Latency value of phy in ingress at 100 Mbps.
-
- - lan8814,latency_tx_100: Configures Latency value of phy in egress at 100 Mbps.
-
- - lan8814,latency_rx_1000: Configures Latency value of phy in ingress at 1000 Mbps.
-
- - lan8814,latency_tx_1000: Configures Latency value of phy in egress at 1000 Mbps.
index 15a45db3899a6eb2a0ff087e0bea0fdbbd55c570..1bcaf6ba822cbbad131a7dc1c2f86281773d2edf 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Marvell International Ltd. NCI NFC controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 7465aea2e1c040ee156c68ed251886651658e0cd..e381a3c148368b799f167f50a6c2a8c989b22045 100644 (file)
@@ -8,7 +8,7 @@ title: NXP Semiconductors NCI NFC controller
 
 maintainers:
   - Charles Gorand <charles.gorand@effinnov.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index d8ba5a18db98d534d6a20bf8bc8e1231548c1014..0509e0166345a1fe67d9127297612b1afd94a222 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NXP Semiconductors PN532 NFC controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index d520414de4636a858f9dc51bbdcd7041ee8eeca7..18b3a7d819df5ba1ebc646d12206dd71c5329d96 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NXP Semiconductors PN544 NFC Controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index a6a1bc788d29a4ff924da506c8f437c908ee538f..ef1155038a2fcdbe529fc9e328e24ffe688a12c6 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics ST NCI NFC controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 4356eacde8aa8b57bb754f115697da1387c0c6e0..8a7274357b46f19309d333b97cb80fc518c5f2a0 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics SAS ST21NFCA NFC controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index d3bca376039ef34b1b898f8d9dd4f94c6b4874c7..963d9531a856a25b187b92cc42f9f72df2957fe9 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics ST95HF NFC controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 40da2ac989780cfd87abfc8bab6c0cfaaf9bace1..404c8df993640e1551b3287e8e35a568b2e546c2 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Texas Instruments TRF7970A RFID/NFC/15693 Transceiver
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Mark Greer <mgreer@animalcreek.com>
 
 properties:
index 2d5248f5b91944b8b0382c7cc0c7af0e4c0f0de6..36c85eb3dc0de6f7fb4874f0e35da858f8984977 100644 (file)
@@ -53,20 +53,18 @@ properties:
         - allwinner,sun8i-r40-gmac
         - allwinner,sun8i-v3s-emac
         - allwinner,sun50i-a64-emac
-        - loongson,ls2k-dwmac
-        - loongson,ls7a-dwmac
         - amlogic,meson6-dwmac
         - amlogic,meson8b-dwmac
         - amlogic,meson8m2-dwmac
         - amlogic,meson-gxbb-dwmac
         - amlogic,meson-axg-dwmac
-        - loongson,ls2k-dwmac
-        - loongson,ls7a-dwmac
         - ingenic,jz4775-mac
         - ingenic,x1000-mac
         - ingenic,x1600-mac
         - ingenic,x1830-mac
         - ingenic,x2000-mac
+        - loongson,ls2k-dwmac
+        - loongson,ls7a-dwmac
         - rockchip,px30-gmac
         - rockchip,rk3128-gmac
         - rockchip,rk3228-gmac
index e602761f7b149e7d434151dced31586144650142..b0ebcef6801ce50f728b24e70251125d8e1d373c 100644 (file)
@@ -13,9 +13,6 @@ description: |
   This describes the devicetree bindings for AVE ethernet controller
   implemented on Socionext UniPhier SoCs.
 
-allOf:
-  - $ref: ethernet-controller.yaml#
-
 properties:
   compatible:
     enum:
@@ -44,25 +41,13 @@ properties:
     minItems: 1
     maxItems: 4
 
-  clock-names:
-    oneOf:
-      - items:          # for Pro4
-          - const: gio
-          - const: ether
-          - const: ether-gb
-          - const: ether-phy
-      - const: ether    # for others
+  clock-names: true
 
   resets:
     minItems: 1
     maxItems: 2
 
-  reset-names:
-    oneOf:
-      - items:          # for Pro4
-          - const: gio
-          - const: ether
-      - const: ether    # for others
+  reset-names: true
 
   socionext,syscon-phy-mode:
     $ref: /schemas/types.yaml#/definitions/phandle-array
@@ -78,6 +63,42 @@ properties:
     $ref: mdio.yaml#
     unevaluatedProperties: false
 
+allOf:
+  - $ref: ethernet-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: socionext,uniphier-pro4-ave4
+    then:
+      properties:
+        clocks:
+          minItems: 4
+          maxItems: 4
+        clock-names:
+          items:
+            - const: gio
+            - const: ether
+            - const: ether-gb
+            - const: ether-phy
+        resets:
+          minItems: 2
+          maxItems: 2
+        reset-names:
+          items:
+            - const: gio
+            - const: ether
+    else:
+      properties:
+        clocks:
+          maxItems: 1
+        clock-names:
+          const: ether
+        resets:
+          maxItems: 1
+        reset-names:
+          const: ether
+
 required:
   - compatible
   - reg
@@ -90,7 +111,7 @@ required:
   - reset-names
   - mdio
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
index dbfca5ee91399466bba2b184c109da4acdf27574..6f44f9516c3647dedcc684ca45c338be366d7f87 100644 (file)
@@ -56,6 +56,7 @@ if:
     compatible:
       contains:
         const: ti,davinci_mdio
+then:
   required:
     - bus_freq
 
index b8e4894bc6340070c0ff0723763664b5d1cf742b..1aa4c6006cd07c50419ef8d2e0d016a3c09436f2 100644 (file)
@@ -26,7 +26,8 @@ Required properties:
                  specified, the TX/RX DMA interrupts should be on that node
                  instead, and only the Ethernet core interrupt is optionally
                  specified here.
-- phy-handle   : Should point to the external phy device.
+- phy-handle   : Should point to the external phy device if exists. Pointing
+                 this to the PCS/PMA PHY is deprecated and should be avoided.
                  See ethernet.txt file in the same directory.
 - xlnx,rxmem   : Set to allocated memory buffer for Rx/Tx in the hardware
 
@@ -68,6 +69,11 @@ Optional properties:
                  required through the core's MDIO interface (i.e. always,
                  unless the PHY is accessed through a different bus).
 
+ - pcs-handle:           Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X
+                 modes, where "pcs-handle" should be used to point
+                 to the PCS/PMA PHY, and "phy-handle" should point to an
+                 external PHY if exists.
+
 Example:
        axi_ethernet_eth: ethernet@40c00000 {
                compatible = "xlnx,axi-ethernet-1.00.a";
index 7f01e15fc81c2ab512652db8bfd6818b682340e2..daf602ac0d0fd0e9bfa4e7f3dfa0f18da1826e6c 100644 (file)
@@ -142,7 +142,6 @@ examples:
           device_type = "pci";
           reg = <0x0 0x0 0x0 0x0 0x0>;
           reset-gpios = <&pinctrl_ap 152 0>;
-          max-link-speed = <2>;
 
           #address-cells = <3>;
           #size-cells = <2>;
@@ -153,7 +152,6 @@ examples:
           device_type = "pci";
           reg = <0x800 0x0 0x0 0x0 0x0>;
           reset-gpios = <&pinctrl_ap 153 0>;
-          max-link-speed = <2>;
 
           #address-cells = <3>;
           #size-cells = <2>;
@@ -164,7 +162,6 @@ examples:
           device_type = "pci";
           reg = <0x1000 0x0 0x0 0x0 0x0>;
           reset-gpios = <&pinctrl_ap 33 0>;
-          max-link-speed = <1>;
 
           #address-cells = <3>;
           #size-cells = <2>;
index cb1aa325336f834cd7e5e1dccbf4e36b8171ffc5..435b971dfd9beb4039eaf0d85e57fcdb90514a4a 100644 (file)
@@ -102,19 +102,17 @@ if:
 then:
   properties:
     reg:
-      maxItems: 2
+      minItems: 2
+
     reg-names:
-      items:
-        - const: "phy"
-        - const: "phy-ctrl"
+      minItems: 2
 else:
   properties:
     reg:
       maxItems: 1
+
     reg-names:
       maxItems: 1
-      items:
-        - const: "phy"
 
 required:
   - compatible
index dfde0eaf66e125590daf4f2b912b936c28baa835..d61585c96e319ef309f087b69111e82d77b7cb5b 100644 (file)
@@ -275,17 +275,17 @@ allOf:
           - nvidia,hssquelch-level
           - nvidia,hsdiscon-level
 
-        else:
-          properties:
-            clocks:
-              maxItems: 4
+      else:
+        properties:
+          clocks:
+            maxItems: 4
 
-            clock-names:
-              items:
-                - const: reg
-                - const: pll_u
-                - const: timer
-                - const: utmi-pads
+          clock-names:
+            items:
+              - const: reg
+              - const: pll_u
+              - const: timer
+              - const: utmi-pads
 
   - if:
       properties:
index e23e5590eaa3d0e60caa59a75063e86ec678b650..0655e485b2604a9bb81f7b5cd228f9caa1ba8a1e 100644 (file)
@@ -14,24 +14,24 @@ if:
     compatible:
       contains:
         const: qcom,usb-hs-phy-apq8064
-  then:
-    properties:
-      resets:
-        maxItems: 1
+then:
+  properties:
+    resets:
+      maxItems: 1
 
-      reset-names:
-        const: por
+    reset-names:
+      const: por
 
-  else:
-    properties:
-      resets:
-        minItems: 2
-        maxItems: 2
+else:
+  properties:
+    resets:
+      minItems: 2
+      maxItems: 2
 
-      reset-names:
-        items:
-          - const: phy
-          - const: por
+    reset-names:
+      items:
+        - const: phy
+        - const: por
 
 properties:
   compatible:
@@ -92,6 +92,8 @@ additionalProperties: false
 examples:
   - |
     otg: usb-controller {
+      #reset-cells = <1>;
+
       ulpi {
         phy {
           compatible = "qcom,usb-hs-phy-msm8974", "qcom,usb-hs-phy";
index 838c6d480ce62a93a8e3215de5b5ef85034c0b90..b03b2f00cc5b7e7aa6753a149dec2680f21bdd72 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC DisplayPort PHY
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Marek Szyprowski <m.szyprowski@samsung.com>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
index c61574e10b2a7ab85bbed9072c0b85a8c0d3bb01..3e5f035de2e9c0b88892f4433ff0c5bd0e2c600a 100644 (file)
@@ -11,7 +11,7 @@ maintainers:
   - Joonyoung Shim <jy0922.shim@samsung.com>
   - Seung-Woo Kim <sw0312.kim@samsung.com>
   - Kyungmin Park <kyungmin.park@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 62b39bb465858d784cb94a11af557550d5676a21..8751e559484fae38b494588d959e7bb286405738 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos5250 SoC SATA PHY
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Marek Szyprowski <m.szyprowski@samsung.com>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
index 54aa056b224d15e15b9be2554ef55071aa96f8f9..415440aaad89160bff8bd2480e6d17071b5665a8 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S5P/Exynos SoC MIPI CSIS/DSIM DPHY
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Marek Szyprowski <m.szyprowski@samsung.com>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
index 056e270a4e88e52b10453a51d1ba3a827d06f65e..d9f22a801cbf7d8ab0293affb3b1cce698563147 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S5P/Exynos SoC USB 2.0 PHY
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Marek Szyprowski <m.szyprowski@samsung.com>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
index f83f0f8135b940e420638c0d790a073cd7c194f4..5ba55f9f20cc58cf19434d3d4d04406568d5d291 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC USB 3.0 DRD PHY USB 2.0 PHY
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Marek Szyprowski <m.szyprowski@samsung.com>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
index 57b68d6c7c70d4f1e9583bd77bdbf916c840b4fb..3666ac5b6518dd9d6651c5b3b89c127d658aa0bc 100644 (file)
@@ -33,7 +33,7 @@ patternProperties:
           $ref: "/schemas/types.yaml#/definitions/string"
           enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
                   ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT,
-                  FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
+                  FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
                   GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5,
                   GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16,
                   I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5,
@@ -58,7 +58,7 @@ patternProperties:
           $ref: "/schemas/types.yaml#/definitions/string"
           enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
                   ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4,
-                  EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP,
+                  EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP,
                   GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
                   GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10,
                   I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5,
index 8a90d82737676c8101259bb7335e5a69f8b4112a..6bd42e43cdabe3bfcd0be176501f4ef4f32ae574 100644 (file)
@@ -48,13 +48,12 @@ properties:
               Name of one pin group to configure.
             enum: [ aif1, aif2, aif3, aif4, mif1, mif2, mif3, pdmspk1,
                     pdmspk2, dmic4, dmic5, dmic6, gpio1, gpio2, gpio3,
-                    gpio4, gpio5, gpio6, gpio7, gpio7, gpio8, gpio9,
+                    gpio4, gpio5, gpio6, gpio7, gpio8, gpio9,
                     gpio10, gpio11, gpio12, gpio13, gpio14, gpio15,
-                    gpio16, gpio17, gpio17, gpio18, gpio19, gpio20,
-                    gpio21, gpio22, gpio23, gpio24, gpio25, gpio26,
-                    gpio27, gpio27, gpio28, gpio29, gpio30, gpio31,
-                    gpio32, gpio33, gpio34, gpio35, gpio36, gpio37,
-                    gpio37, gpio38, gpio39 ]
+                    gpio16, gpio17, gpio18, gpio19, gpio20, gpio21,
+                    gpio22, gpio23, gpio24, gpio25, gpio26, gpio27,
+                    gpio28, gpio29, gpio30, gpio31, gpio32, gpio33,
+                    gpio34, gpio35, gpio36, gpio37, gpio38, gpio39 ]
 
           function:
             description:
index 4b22a9e3a4471c00adcb89fcfb49e9561b7ada93..f5a121311f612e0e2498778231f41c2acc5b9182 100644 (file)
@@ -52,11 +52,19 @@ properties:
       hardware supporting it the pull strength in Ohm.
 
   drive-push-pull:
-    type: boolean
+    oneOf:
+      - type: boolean
+      - $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1 ]
+        deprecated: true
     description: drive actively high and low
 
   drive-open-drain:
-    type: boolean
+    oneOf:
+      - type: boolean
+      - $ref: /schemas/types.yaml#/definitions/uint32
+        const: 1    # No known cases of 0
+        deprecated: true
     description: drive with open drain
 
   drive-open-source:
index f73348c5474838da04afd3e5217616b651ab5485..8cf3c47ab86b2996c71471af740725b0d41ba6f9 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC pin controller - gpio bank
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index c71939ac8b636ed851abd8c5b7c6d014a1644c4c..9869d4dceddbbb71e572bdb65161efb0b2866209 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC pin controller - pins configuration
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index a822f70f5702e61d69bb6681a7fb89f73db685d3..1de91a51234df4908625264615788d0d316f76fc 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC pin controller - wake-up interrupt controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 989e48c051cff44d4c900cb2830fa480ce339c4f..3a65c66ca71d226691befcb2a7b0f5fbf2e8726c 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC pin controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
   - Tomasz Figa <tomasz.figa@gmail.com>
 
index 4d293b2b2f8466da1ceecd049c0b0e3070cdf8cd..d77fc88050c8afb0d4a9fb858f374b7e7e9786fb 100644 (file)
@@ -36,7 +36,8 @@ properties:
   cpus:
     $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
-      maxItems: 1
+      minItems: 1
+      maxItems: 4
     description: |
       Array of phandles pointing to CPU cores, which should match the order of
       CPU cores used by the WUPCR and PSTR registers in the Advanced Power
index f8461f06e6f430a9c8b2f62c0fa7c4408ca3466b..118cf484cc69f8095a65cb408fdd3c0562093d85 100644 (file)
@@ -16,7 +16,6 @@ allOf:
 properties:
   compatible:
     enum:
-      - ti,bq24150
       - ti,bq24150
       - ti,bq24150a
       - ti,bq24151
index 3978b48299de188bb2704d83ee5c0acd7e6ce006..4d3a1d09036f6a359e10bf2aa5c854998b0eae49 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC Charger
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
index a21dc1a8890ffb65ac4176a579193447182ef10f..f5fd53debbc8e0161628df1e98f63bae91da6058 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX77693 MicroUSB and Companion Power Management IC Charger
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated
index 8a70696395a7de942d7ae0e69c86d8ae693e974a..22ad012660e94417cedf0dfb009a21dd8c74dd32 100644 (file)
@@ -6,12 +6,6 @@ The cache bindings explained below are Devicetree Specification compliant
 Required Properties:
 
 - compatible   : Should include one of the following:
-                 "fsl,8540-l2-cache-controller"
-                 "fsl,8541-l2-cache-controller"
-                 "fsl,8544-l2-cache-controller"
-                 "fsl,8548-l2-cache-controller"
-                 "fsl,8555-l2-cache-controller"
-                 "fsl,8568-l2-cache-controller"
                  "fsl,b4420-l2-cache-controller"
                  "fsl,b4860-l2-cache-controller"
                  "fsl,bsc9131-l2-cache-controller"
index 9b131c6facbc0032b4a994b9f0f374397380c179..84eeaef179a5c47bb40f817cc7ac29d6b9a17dea 100644 (file)
@@ -18,23 +18,23 @@ description:
 
 allOf:
   - $ref: "regulator.yaml#"
-
-if:
-  properties:
-    compatible:
-      contains:
-        const: regulator-fixed-clock
-  required:
-    - clocks
-else:
-  if:
-    properties:
-      compatible:
-        contains:
-          const: regulator-fixed-domain
-    required:
-      - power-domains
-      - required-opps
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: regulator-fixed-clock
+    then:
+      required:
+        - clocks
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: regulator-fixed-domain
+    then:
+      required:
+        - power-domains
+        - required-opps
 
 properties:
   compatible:
index 16f01886a60143ae3b5ecd85f5a535d0ccd00b06..285dc7122977e3794b7bcf7106e3d2cc7640995d 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
index bb64b679f765a726a9748169629113a897ef961b..0e7cd4b3ace019dd0c7f3c2b68058872c4feba1c 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77686 Power Management IC regulators
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77686 Power Management
index 20d8559bdc2b8cda1f13efec578f2bd26b7fa63d..945a539749e8974fdda46c62e304e1c8c7376fcd 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77693 MicroUSB and Companion Power Management IC regulators
 
 maintainers:
   - Chanwoo Choi <cw00.choi@samsung.com>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77693 MicroUSB Integrated
index f2b4dd15a0f366892b77c1831f3468e15cf219b6..236348c4710c9d9d3122ee843d3726ae1fef96e6 100644 (file)
@@ -8,7 +8,7 @@ title: Maxim MAX77802 Power Management IC regulators
 
 maintainers:
   - Javier Martinez Canillas <javier@dowhile0.org>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77802 Power Management
index a963025e96c12ef586ad3e074f5e911d978febe8..9695e72428829f95dd5c0c6187b5aab91e4161fa 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX77843 MicroUSB and Companion Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for Maxim MAX77843 MicroUSB Integrated
index e4e8c58f6046feefcf80f0e9e7b5701c2a63a19f..3ff0d7d980e9775555e3e368e9f9c3bfa37a1ff6 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX8952 voltage regulator
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 allOf:
   - $ref: regulator.yaml#
index 5898dcf10f0672bc574ebee8e1d08318017b5ec6..b92eef68c19f6925b8dda28802aedbc8fa9897c3 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX8973/MAX77621 voltage regulator
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 allOf:
   - $ref: regulator.yaml#
index d5a44ca3df0400535155c7e794af04f12de76b89..4321f061a7f6249c407db73c1091d1a7b13cffc0 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Maxim MAX8997 Power Management IC
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   The Maxim MAX8997 is a Power Management IC which includes voltage and current
index 28725c5467fc8a3c8633363077ce7d6a533e81a7..edb411be039041837bedb2ecf9085b3c23d373b3 100644 (file)
@@ -58,7 +58,7 @@ properties:
         type: object
         $ref: regulator.yaml#
         description: |
-          regulator description for buck1 and buck4.
+          regulator description for buck1 to buck4, and ldo.
 
         properties:
           regulator-allowed-modes:
index 0627dec513da0d1372a1a83e9973b23bf952986c..0f9eb317ba9a5d3258616af8351af8cea64829fa 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPA01 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index e3b780715f446d36ffc7a630f1886a0d872cc91b..f1c50dcd0b04915b92a30e80dbe208b1330fa21d 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPS11 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 579d77aefc3f0c0e9fcb56542650718746a2493e..53b105a4ead1a05a604d42b5231b8a986b597358 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPS13 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index fdea290b3e949b9df9017472227ba1e65643ddac..01f9d4e236e94901e9eb6e7f2d731d00c06cb77a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPS14 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index b3a883c94628933b7e5c6e453eb47394ef64e29f..9576c2df45a61ba714e64dbc7d29b9cee7eece5b 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPS15 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 0ded6953e3b67b708077c27ef2a9d53700a1a6e7..39b652c3c3c486a4703c1feb183b35b17b03c968 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S2MPU02 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 3c1617b66861ecb7b14fa79db3e2086904532cbd..172631ca3c25cd84fba0d5297f3c03d74061457b 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S5M8767 Power Management IC regulators
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   This is a part of device tree bindings for S2M and S5M family of Power
index 2424de733ee43235ae0c15879d334a7f61f3ccd1..d99a729d27107655e931cfccd5642f0cee2ab2de 100644 (file)
@@ -104,8 +104,7 @@ properties:
   qcom,smem-state-names:
     $ref: /schemas/types.yaml#/definitions/string
     description: The names of the state bits used for SMP2P output
-    items:
-      - const: stop
+    const: stop
 
   glink-edge:
     type: object
@@ -130,7 +129,6 @@ properties:
       qcom,remote-pid:
         $ref: /schemas/types.yaml#/definitions/uint32
         description: ID of the shared memory used by GLINK for communication with WPSS
-        maxItems: 1
 
     required:
       - interrupts
index b0c41ab1a746a60bfeef3060c54c887e28107164..cdfcf32c53fa9371833cd77dbc16e11f2a789d2e 100644 (file)
@@ -24,6 +24,11 @@ properties:
           - const: hisilicon,hi3670-reset
           - const: hisilicon,hi3660-reset
 
+  hisi,rst-syscon:
+    deprecated: true
+    description: phandle of the reset's syscon, use hisilicon,rst-syscon instead
+    $ref: /schemas/types.yaml#/definitions/phandle
+
   hisilicon,rst-syscon:
     description: phandle of the reset's syscon.
     $ref: /schemas/types.yaml#/definitions/phandle
index 377a7d242323d01fd1e1963c067c2d515d331307..6566804ec5674354cd1f5330a8031dcb658b5cc7 100644 (file)
@@ -55,6 +55,9 @@ properties:
   "#reset-cells":
     const: 1
 
+  resets:
+    maxItems: 1
+
 additionalProperties: false
 
 required:
index a50c34d5d199a789af1b295858e2c40c1075a58b..765d9f9edd6ef437925d0ff25d143e3c4f84269c 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC True Random Number Generator
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Łukasz Stelmach <l.stelmach@samsung.com>
 
 properties:
index 84bf518a55493fe23131296f89ebd7e9cb2e92b2..4754174e9849b78373a68f40057be73ea44c0366 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: TimerIO Random Number Generator
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index 0b767fec39d87f48665fde70f1461afb9911faa7..6b38bd7eb3b45ac0cdd32bebe81ee0d644bb63a8 100644 (file)
@@ -71,7 +71,6 @@ allOf:
     then:
       properties:
         clock-output-names:
-          minItems: 1
           maxItems: 1
 
   - if:
@@ -102,7 +101,6 @@ allOf:
       properties:
         clock-output-names:
           minItems: 3
-          maxItems: 3
 
   - if:
       properties:
@@ -113,16 +111,12 @@ allOf:
     then:
       properties:
         clocks:
-          minItems: 3
-          maxItems: 3
           items:
             - description: Bus clock for register access
             - description: 24 MHz oscillator
             - description: 32 kHz clock from the CCU
 
         clock-names:
-          minItems: 3
-          maxItems: 3
           items:
             - const: bus
             - const: hosc
@@ -142,7 +136,6 @@ allOf:
       properties:
         clocks:
           minItems: 3
-          maxItems: 4
           items:
             - description: Bus clock for register access
             - description: 24 MHz oscillator
@@ -151,7 +144,6 @@ allOf:
 
         clock-names:
           minItems: 3
-          maxItems: 4
           items:
             - const: bus
             - const: hosc
@@ -174,14 +166,12 @@ allOf:
     then:
       properties:
         interrupts:
-          minItems: 1
           maxItems: 1
 
     else:
       properties:
         interrupts:
           minItems: 2
-          maxItems: 2
 
 required:
   - "#clock-cells"
index a2e984ea3553b92fe028bbbb625d6d1e921549f9..500c62becd6bc60215fbb1303fd33fbc7152d362 100644 (file)
@@ -31,11 +31,19 @@ properties:
           to that of the RTC's count register.
 
   clocks:
-    maxItems: 1
+    items:
+      - description: |
+          AHB clock
+      - description: |
+          Reference clock: divided by the prescaler to create a time-based
+          strobe (typically 1 Hz) for the calendar counter. By default, the rtc
+          on the PolarFire SoC shares it's reference with MTIMER so this will
+          be a 1 MHz clock.
 
   clock-names:
     items:
       - const: rtc
+      - const: rtcref
 
 required:
   - compatible
@@ -48,11 +56,12 @@ additionalProperties: false
 
 examples:
   - |
+    #include "dt-bindings/clock/microchip,mpfs-clock.h"
     rtc@20124000 {
         compatible = "microchip,mpfs-rtc";
         reg = <0x20124000 0x1000>;
-        clocks = <&clkcfg 21>;
-        clock-names = "rtc";
+        clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>;
+        clock-names = "rtc", "rtcref";
         interrupts = <80>, <81>;
     };
 ...
index d4688e317fc54891cb6488c5c074f9ab36968dfc..901c1e2cea28cbcbd7b2c94497ecf3c205b5394e 100644 (file)
@@ -100,7 +100,6 @@ allOf:
           maxItems: 3
         clock-names:
           minItems: 2
-          maxItems: 3
           items:
             - const: uart
             - pattern: '^clk_uart_baud[0-1]$'
@@ -118,11 +117,8 @@ allOf:
     then:
       properties:
         clocks:
-          minItems: 2
           maxItems: 2
         clock-names:
-          minItems: 2
-          maxItems: 2
           items:
             - const: uart
             - const: clk_uart_baud0
index a98ed66d092e8960ed1c469c468251b2304a63cf..0cabb773c3976afabec450797e8202fd40c6761e 100644 (file)
@@ -8,7 +8,7 @@ title: Samsung's Exynos USI (Universal Serial Interface) binding
 
 maintainers:
   - Sam Protsenko <semen.protsenko@linaro.org>
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   USI IP-core provides selectable serial protocol (UART, SPI or High-Speed I2C).
index c21c807b667c465810651cbfde51d1a73395f856..34f6ee9de392250598d4f8f6e80677cc59cff395 100644 (file)
@@ -89,7 +89,6 @@ allOf:
       properties:
         dmas:
           minItems: 1
-          maxItems: 2
           items:
             - description: RX DMA Channel
             - description: TX DMA Channel
index cea2bf3544f0ae5fa26dbc2ed5be0424553c8b2c..9bc4585bb6e519329498f9de51c65c8701782a1d 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Insignal Arndale boards audio complex
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
 properties:
index cb51af90435e7b350d21664d9d03415029531287..ac151d3c1d779b0bc17f161fd272ea012c7e6993 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung SMDK5250 audio complex with WM8994 codec
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
 properties:
index 0c3b3302b842412ecfee100b1b4ea2efdf2697c7..51a83d3c72742e6e57e9474c53411b00d56ee8cb 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Google Snow audio complex with MAX9809x codec
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
 properties:
index 74712d6f3ef490624abc1ca5a42644a16a0b9599..491e08019c040c650647c7c38e382ff190623af2 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos5433 TM2(E) audio complex with WM5110 codec
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
   - Sylwester Nawrocki <s.nawrocki@samsung.com>
 
 properties:
index b3dbcba33e41f67cd111f3ec587a5941a7b81dd2..fe2e15504ebc41119feefe1d57ccc743e7a78f71 100644 (file)
@@ -136,8 +136,7 @@ allOf:
         compatible:
           contains:
             const: st,stm32f4-sai
-
-  - then:
+    then:
       properties:
         clocks:
           items:
@@ -148,8 +147,7 @@ allOf:
           items:
             - const: x8k
             - const: x11k
-
-  - else:
+    else:
       properties:
         clocks:
           items:
index 6806f53a4aed4088aa960d45d205552a88b7d3e3..20ea5883b7ff6befdce054d46f3372c3f7f97f0a 100644 (file)
@@ -80,7 +80,6 @@ allOf:
     then:
       properties:
         clocks:
-          minItems: 6
           items:
             - description: AUXCLK clock for McASP used by CPB audio
             - description: Parent for CPB_McASP auxclk (for 48KHz)
@@ -107,7 +106,6 @@ allOf:
     then:
       properties:
         clocks:
-          maxItems: 4
           items:
             - description: AUXCLK clock for McASP used by CPB audio
             - description: Parent for CPB_McASP auxclk (for 48KHz)
index b104899205f6d2da17fd86325cb0fec4312e22d8..5de710adfa63cb12311aff4209b40a0f49d39d99 100644 (file)
@@ -124,7 +124,6 @@ properties:
     description: |
       Override the default TX fifo size.  Unit is words.  Ignored if 0.
     $ref: /schemas/types.yaml#/definitions/uint32
-    maxItems: 1
     default: 64
 
   renesas,rx-fifo-size:
@@ -132,7 +131,6 @@ properties:
     description: |
       Override the default RX fifo size.  Unit is words.  Ignored if 0.
     $ref: /schemas/types.yaml#/definitions/uint32
-    maxItems: 1
     default: 64
 
 required:
index f0db3fb3d68804f80e850137446af20ba036d919..25b1b6c12d4de7890119744088581d2c53612e88 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Peripheral-specific properties for Samsung S3C/S5P/Exynos SoC SPI controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description:
   See spi-peripheral-props.yaml for more info.
index bf9a76d931d243c6e76f65f0dc1f7c9dba9ad6d9..a50f24f9359de1b94e536b9725a1b47fd38bd836 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung S3C/S5P/Exynos SoC SPI controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description:
   All the SPI controller nodes should be represented in the aliases node using
index 668a9a41a7754c95c280ca9b32fe20e2d4ea0eb7..993430be355b47318105cceb6a274ce05f250d45 100644 (file)
@@ -136,14 +136,14 @@ required:
   - reg
 
 if:
-  properties:
-    compatible:
-      contains:
-        enum:
-          - qcom,rpm-msg-ram
-          - rockchip,rk3288-pmu-sram
-
-else:
+  not:
+    properties:
+      compatible:
+        contains:
+          enum:
+            - qcom,rpm-msg-ram
+            - rockchip,rk3288-pmu-sram
+then:
   required:
     - "#address-cells"
     - "#size-cells"
index f963204e0b162746e39e15ba2a7935ea9e33c1a7..1368d90da0e859ba67c94a14624c906d5a8ae48c 100644 (file)
@@ -67,7 +67,6 @@ then:
   properties:
     reg:
       minItems: 2
-      maxItems: 3
       items:
         - description: TSC1 registers
         - description: TSC2 registers
index 17129f75d9624935b9d3a5899c62ee47671c88f2..1344df708e2d290accd1fd52bf011a01ef03fd62 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC Thermal Management Unit (TMU)
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 description: |
   For multi-instance tmu each instance should have an alias correctly numbered
index d227dea368be8a17401b5be8392b2595b3c1d5dc..fb45f66d64549490b39cd41e2f76312818946e1b 100644 (file)
@@ -43,6 +43,9 @@ properties:
       - const: phy_clk
       - const: ref_clk
 
+  power-domains:
+    maxItems: 1
+
   reg:
     maxItems: 1
 
index 22b91a27d776215b04ccbf7d48d4fb9a6a074c58..6b9a3bcb3926f5195b34184a8c5515c9ee242d8a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC USB 3.0 DWC3 Controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
index fbf07d6e707afd6b788c3de2bd789438ac87ffaa..9c92defbba013ff2bbbf20e09bc00f9c05ab6d93 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Samsung Exynos SoC USB 2.0 EHCI/OHCI Controller
 
 maintainers:
-  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+  - Krzysztof Kozlowski <krzk@kernel.org>
 
 properties:
   compatible:
@@ -62,6 +62,7 @@ required:
   - interrupts
   - phys
   - phy-names
+  - reg
 
 allOf:
   - if:
index 55006678394a2b8333308a3ed69ce6aec5e02a98..36a76cbe90954f0e9c9985e981f564e9eda2de58 100644 (file)
@@ -185,6 +185,12 @@ DMA Fence Chain
 .. kernel-doc:: include/linux/dma-fence-chain.h
    :internal:
 
+DMA Fence unwrap
+~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/linux/dma-fence-unwrap.h
+   :internal:
+
 DMA Fence uABI/Sync File
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
index bbc53920d4dd699bcb62f88a3c528b299876cfbc..a1ddefa1f55f45849001fb1794712bc284ad2c1d 100644 (file)
@@ -417,30 +417,66 @@ struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
 is a typical example of a chained cascaded interrupt handler using
-the gpio_irq_chip:
+the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions
+call into the core gpiolib code:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->parent_handler = ftgpio_gpio_irq_handler;
   girq->num_parents = 1;
   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
@@ -458,23 +494,58 @@ the interrupt separately and go with it:
 
 .. code-block:: c
 
-  /* Typical state container with dynamic irqchip */
+  /* Typical state container */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
+  };
+
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
   };
 
   int irq; /* from platform etc */
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   ret = devm_request_threaded_irq(dev, irq, NULL,
                irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
   if (ret < 0)
@@ -482,7 +553,7 @@ the interrupt separately and go with it:
 
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   /* This will let us handle the parent IRQ in the driver */
   girq->parent_handler = NULL;
   girq->num_parents = 0;
@@ -500,24 +571,61 @@ In this case the typical set-up will look like this:
   /* Typical state container with dynamic irqchip */
   struct my_gpio {
       struct gpio_chip gc;
-      struct irq_chip irq;
       struct fwnode_handle *fwnode;
   };
 
-  int irq; /* from platform etc */
+  static void my_gpio_mask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      /*
+       * Perform any necessary action to mask the interrupt,
+       * and then call into the core code to synchronise the
+       * state.
+       */
+
+      gpiochip_disable_irq(gc, d->hwirq);
+      irq_mask_mask_parent(d);
+  }
+
+  static void my_gpio_unmask_irq(struct irq_data *d)
+  {
+      struct gpio_chip *gc = irq_desc_get_handler_data(d);
+
+      gpiochip_enable_irq(gc, d->hwirq);
+
+      /*
+       * Perform any necessary action to unmask the interrupt,
+       * after having called into the core code to synchronise
+       * the state.
+       */
+
+      irq_mask_unmask_parent(d);
+  }
+
+  /*
+   * Statically populate the irqchip. Note that it is made const
+   * (further indicated by the IRQCHIP_IMMUTABLE flag), and that
+   * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra
+   * callbacks to the structure.
+   */
+  static const struct irq_chip my_gpio_irq_chip = {
+      .name            = "my_gpio_irq",
+      .irq_ack         = my_gpio_ack_irq,
+      .irq_mask                = my_gpio_mask_irq,
+      .irq_unmask      = my_gpio_unmask_irq,
+      .irq_set_type    = my_gpio_set_irq_type,
+      .flags           = IRQCHIP_IMMUTABLE,
+      /* Provide the gpio resource callbacks */
+      GPIOCHIP_IRQ_RESOURCE_HELPERS,
+  };
+
   struct my_gpio *g;
   struct gpio_irq_chip *girq;
 
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
   /* Get a pointer to the gpio_irq_chip */
   girq = &g->gc.irq;
-  girq->chip = &g->irq;
+  gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip);
   girq->default_type = IRQ_TYPE_NONE;
   girq->handler = handle_bad_irq;
   girq->fwnode = g->fwnode;
@@ -605,8 +713,9 @@ When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .irq_disable() and .irq_enable() callbacks from the
 irqchip.
 
-When using the gpiolib irqchip helpers, these callbacks are automatically
-assigned.
+When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks
+are automatically assigned. This behaviour is deprecated and on its way
+to be removed from the kernel.
 
 
 Real-Time compliance for GPIO IRQ chips
index d477e296bda5f278dbf071baf7fb677b1772a379..311af516a3fd9c42a1eff43a2db4c392d0f449b9 100644 (file)
@@ -424,12 +424,6 @@ How commands are issued
 -----------------------
 
 Internal commands
-    First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
-    Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
-    mechanism when qc is not available, internal commands are currently
-    issued only during initialization and error recovery, so no other
-    command is active and allocation is guaranteed to succeed.
-
     Once allocated qc's taskfile is initialized for the command to be
     executed. qc currently has two mechanisms to notify completion. One
     is via ``qc->complete_fn()`` callback and the other is completion
@@ -447,11 +441,6 @@ SCSI commands
     translated. No qc is involved in processing a simulated scmd. The
     result is computed right away and the scmd is completed.
 
-    For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
-    qc and the scmd is translated into the qc. SCSI midlayer's
-    completion notification function pointer is stored into
-    ``qc->scsidone``.
-
     ``qc->complete_fn()`` callback is used for completion notification. ATA
     commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
     :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
index be793c49a772dc221052006a2391c5e858806dae..d7507becf67478da56deb15c60bf306e1a40e8df 100644 (file)
@@ -73,7 +73,7 @@ busy.
 If successful, the cache backend can then start setting up the cache.  In the
 event that the initialisation fails, the cache backend should call::
 
-       void fscache_relinquish_cookie(struct fscache_cache *cache);
+       void fscache_relinquish_cache(struct fscache_cache *cache);
 
 to reset and discard the cookie.
 
@@ -110,9 +110,9 @@ to withdraw them, calling::
 
 on the cookie that each object belongs to.  This schedules the specified cookie
 for withdrawal.  This gets offloaded to a workqueue.  The cache backend can
-test for completion by calling::
+wait for completion by calling::
 
-       bool fscache_are_objects_withdrawn(struct fscache_cookie *cache);
+       void fscache_wait_for_objects(struct fscache_cache *cache);
 
 Once all the cookies are withdrawn, a cache backend can withdraw all the
 volumes, calling::
@@ -125,7 +125,7 @@ outstanding accesses on the volume to complete before returning.
 When the the cache is completely withdrawn, fscache should be notified by
 calling::
 
-       void fscache_cache_relinquish(struct fscache_cache *cache);
+       void fscache_relinquish_cache(struct fscache_cache *cache);
 
 to clear fields in the cookie and discard the caller's ref on it.
 
index 5066113acad59aa0ba46de39da6e288066e8020a..7308d76a29dc78a5266fec862ff8d5536892120a 100644 (file)
@@ -404,22 +404,21 @@ schedule a write of that region::
 And if an error occurs before that point is reached, the marks can be removed
 by calling::
 
-       void fscache_clear_page_bits(struct fscache_cookie *cookie,
-                                    struct address_space *mapping,
+       void fscache_clear_page_bits(struct address_space *mapping,
                                     loff_t start, size_t len,
                                     bool caching)
 
-In both of these functions, the cookie representing the cache object to be
-written to and a pointer to the mapping to which the source pages are attached
-are passed in; start and len indicate the size of the region that's going to be
-written (it doesn't have to align to page boundaries necessarily, but it does
-have to align to DIO boundaries on the backing filesystem).  The caching
-parameter indicates if caching should be skipped, and if false, the functions
-do nothing.
-
-The write function takes some additional parameters: i_size indicates the size
-of the netfs file and term_func indicates an optional completion function, to
-which term_func_priv will be passed, along with the error or amount written.
+In these functions, a pointer to the mapping to which the source pages are
+attached is passed in and start and len indicate the size of the region that's
+going to be written (it doesn't have to align to page boundaries necessarily,
+but it does have to align to DIO boundaries on the backing filesystem).  The
+caching parameter indicates if caching should be skipped, and if false, the
+functions do nothing.
+
+The write function takes some additional parameters: the cookie representing
+the cache object to be written to, i_size indicates the size of the netfs file
+and term_func indicates an optional completion function, to which
+term_func_priv will be passed, along with the error or amount written.
 
 Note that the write function will always run asynchronously and will unmark all
 the pages upon completion before calling term_func.
index 54386a010a8d7003a36d8792330715316f7f3129..871d2da7a0a91e73f0f791f5627a57855741f20a 100644 (file)
@@ -76,7 +76,7 @@ The beginning of an extended attribute block is in
      - Checksum of the extended attribute block.
    * - 0x14
      - \_\_u32
-     - h\_reserved[2]
+     - h\_reserved[3]
      - Zero.
 
 The checksum is calculated against the FS UUID, the 64-bit block number
index 4a2426f0485a994482f11244cf1b146966a5fcd0..ad8dc8c040a2766d176728fe089edcf3e0950d8a 100644 (file)
@@ -235,12 +235,6 @@ offgrpjquota                Turn off group journalled quota.
 offprjjquota            Turn off project journalled quota.
 quota                   Enable plain user disk quota accounting.
 noquota                         Disable all plain disk quota option.
-whint_mode=%s           Control which write hints are passed down to block
-                        layer. This supports "off", "user-based", and
-                        "fs-based".  In "off" mode (default), f2fs does not pass
-                        down hints. In "user-based" mode, f2fs tries to pass
-                        down hints given by users. And in "fs-based" mode, f2fs
-                        passes down hints with its policy.
 alloc_mode=%s           Adjust block allocation policy, which supports "reuse"
                         and "default".
 fsync_mode=%s           Control the policy of fsync. Currently supports "posix",
@@ -751,70 +745,6 @@ In order to identify whether the data in the victim segment are valid or not,
 F2FS manages a bitmap. Each bit represents the validity of a block, and the
 bitmap is composed of a bit stream covering whole blocks in main area.
 
-Write-hint Policy
------------------
-
-1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
-
-2) whint_mode=user-based. F2FS tries to pass down hints given by
-users.
-
-===================== ======================== ===================
-User                  F2FS                     Block
-===================== ======================== ===================
-N/A                   META                     WRITE_LIFE_NOT_SET
-N/A                   HOT_NODE                 "
-N/A                   WARM_NODE                "
-N/A                   COLD_NODE                "
-ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
-extension list        "                        "
-
--- buffered io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        "
-WRITE_LIFE_MEDIUM     "                        "
-WRITE_LIFE_LONG       "                        "
-
--- direct io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
-===================== ======================== ===================
-
-3) whint_mode=fs-based. F2FS passes down hints with its policy.
-
-===================== ======================== ===================
-User                  F2FS                     Block
-===================== ======================== ===================
-N/A                   META                     WRITE_LIFE_MEDIUM;
-N/A                   HOT_NODE                 WRITE_LIFE_NOT_SET
-N/A                   WARM_NODE                "
-N/A                   COLD_NODE                WRITE_LIFE_NONE
-ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
-extension list        "                        "
-
--- buffered io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
-WRITE_LIFE_NONE       "                        "
-WRITE_LIFE_MEDIUM     "                        "
-WRITE_LIFE_LONG       "                        "
-
--- direct io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
-===================== ======================== ===================
-
 Fallocate(2) Policy
 -------------------
 
index 061744c436d99e0670462b0627ab1a177b2a3dbf..6a0dd99786f99b4fb0a36bfbb3cb46e4cde94864 100644 (file)
@@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
     HRTIMER:         0          0          0          0
        RCU:      1678       1769       2178       2250
 
-
-1.3 IDE devices in /proc/ide
-----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel  is  aware.  There is one subdirectory for each IDE controller, the
-file drivers  and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file 'drivers' contains general information about the drivers used for the
-IDE devices::
-
-  > cat /proc/ide/drivers
-  ide-cdrom version 4.53
-  ide-disk version 1.08
-
-More detailed  information  can  be  found  in  the  controller  specific
-subdirectories. These  are  named  ide0,  ide1  and  so  on.  Each  of  these
-directories contains the files shown in table 1-6.
-
-
-.. table:: Table 1-6: IDE controller info in  /proc/ide/ide?
-
- ======= =======================================
- File    Content
- ======= =======================================
- channel IDE channel (0 or 1)
- config  Configuration (only for PCI/IDE bridge)
- mate    Mate name
- model   Type/Chipset of IDE controller
- ======= =======================================
-
-Each device  connected  to  a  controller  has  a separate subdirectory in the
-controllers directory.  The  files  listed in table 1-7 are contained in these
-directories.
-
-
-.. table:: Table 1-7: IDE device information
-
- ================ ==========================================
- File             Content
- ================ ==========================================
- cache            The cache
- capacity         Capacity of the medium (in 512Byte blocks)
- driver           driver and version
- geometry         physical and logical geometry
- identify         device identify block
- media            media type
- model            device identifier
- settings         device setup
- smart_thresholds IDE disk management thresholds
- smart_values     IDE disk management values
- ================ ==========================================
-
-The most  interesting  file is ``settings``. This file contains a nice
-overview of the drive parameters::
-
-  # cat /proc/ide/ide0/hda/settings
-  name                    value           min             max             mode
-  ----                    -----           ---             ---             ----
-  bios_cyl                526             0               65535           rw
-  bios_head               255             0               255             rw
-  bios_sect               63              0               63              rw
-  breada_readahead        4               0               127             rw
-  bswap                   0               0               1               r
-  file_readahead          72              0               2097151         rw
-  io_32bit                0               0               3               rw
-  keepsettings            0               0               1               rw
-  max_kb_per_request      122             1               127             rw
-  multcount               0               0               8               rw
-  nice1                   1               0               1               rw
-  nowerr                  0               0               1               rw
-  pio_mode                write-only      0               255             w
-  slow                    0               0               1               rw
-  unmaskirq               0               0               1               rw
-  using_dma               0               0               1               rw
-
-
-1.4 Networking info in /proc/net
+1.3 Networking info in /proc/net
 --------------------------------
 
 The subdirectory  /proc/net  follows  the  usual  pattern. Table 1-8 shows the
@@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
 current slaves of the bond, the link status of the slaves, and how
 many times the slaves link has failed.
 
-1.5 SCSI info
+1.4 SCSI info
 -------------
 
 If you  have  a  SCSI  host adapter in your system, you'll find a subdirectory
@@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
     Total transfers 0 (0 reads and 0 writes)
 
 
-1.6 Parallel port info in /proc/parport
+1.5 Parallel port info in /proc/parport
 ---------------------------------------
 
 The directory  /proc/parport  contains information about the parallel ports of
@@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
            number or none).
  ========= ====================================================================
 
-1.7 TTY info in /proc/tty
+1.6 TTY info in /proc/tty
 -------------------------
 
 Information about  the  available  and actually used tty's can be found in the
@@ -1463,7 +1385,7 @@ To see  which  tty's  are  currently in use, you can simply look into the file
   unknown              /dev/tty        4    1-63 console
 
 
-1.8 Miscellaneous kernel statistics in /proc/stat
+1.7 Miscellaneous kernel statistics in /proc/stat
 -------------------------------------------------
 
 Various pieces   of  information about  kernel activity  are  available in the
@@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
 softirq.
 
 
-1.9 Ext4 file system parameters
+1.8 Ext4 file system parameters
 -------------------------------
 
 Information about mounted ext4 file systems can be found in
@@ -1552,7 +1474,7 @@ in Table 1-12, below.
  mb_groups       details of multiblock allocator buddy cache of free blocks
  ==============  ==========================================================
 
-1.10 /proc/consoles
+1.9 /proc/consoles
 -------------------
 Shows registered system console lines.
 
index 6b213fe9a33e95c6cc5803f9833e1222e852c5ee..394b9f15dce059348673453000fa3c8f5ac5062f 100644 (file)
@@ -306,8 +306,15 @@ Further notes:
 Mount options
 -------------
 
-zonefs define the "errors=<behavior>" mount option to allow the user to specify
-zonefs behavior in response to I/O errors, inode size inconsistencies or zone
+zonefs defines several mount options:
+* errors=<behavior>
+* explicit-open
+
+"errors=<behavior>" option
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The "errors=<behavior>" option mount option allows the user to specify zonefs
+behavior in response to I/O errors, inode size inconsistencies or zone
 condition changes. The defined behaviors are as follow:
 
 * remount-ro (default)
@@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
 read-only zone discovered at run-time, as indicated in the previous section.
 The size of the zone file is left unchanged from its last updated value.
 
+"explicit-open" option
+~~~~~~~~~~~~~~~~~~~~~~
+
 A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
 the number of zones that can be active, that is, zones that are in the
 implicit open, explicit open or closed conditions.  This potential limitation
@@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
 to the device on the last close() of a zone file if the zone is not full nor
 empty.
 
+Runtime sysfs attributes
+------------------------
+
+zonefs defines several sysfs attributes for mounted devices.  All attributes
+are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
+where <dev> is the name of the mounted zoned block device.
+
+The attributes defined are as follows.
+
+* **max_wro_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that can be open for writing.  This number corresponds
+  to the maximum number of explicitly or implicitly open zones that the device
+  supports.  A value of 0 means that the device has no limit and that any zone
+  (any file) can be open for writing and written at any time, regardless of the
+  state of other zones.  When the *explicit-open* mount option is used, zonefs
+  will fail any open() system call requesting to open a sequential zone file for
+  writing when the number of sequential zone files already open for writing has
+  reached the *max_wro_seq_files* limit.
+* **nr_wro_seq_files**:  This attribute reports the current number of sequential
+  zone files open for writing.  When the "explicit-open" mount option is used,
+  this number can never exceed *max_wro_seq_files*.  If the *explicit-open*
+  mount option is not used, the reported number can be greater than
+  *max_wro_seq_files*.  In such case, it is the responsibility of the
+  application to not write simultaneously more than *max_wro_seq_files*
+  sequential zone files.  Failure to do so can result in write errors.
+* **max_active_seq_files**:  This attribute reports the maximum number of
+  sequential zone files that are in an active state, that is, sequential zone
+  files that are partially writen (not empty nor full) or that have a zone that
+  is explicitly open (which happens only if the *explicit-open* mount option is
+  used).  This number is always equal to the maximum number of active zones that
+  the device supports.  A value of 0 means that the mounted device has no limit
+  on the number of sequential zone files that can be active.
+* **nr_active_seq_files**:  This attributes reports the current number of
+  sequential zone files that are active. If *max_active_seq_files* is not 0,
+  then the value of *nr_active_seq_files* can never exceed the value of
+  *nr_active_seq_files*, regardless of the use of the *explicit-open* mount
+  option.
+
 Zonefs User Space Tools
 =======================
 
index 525e6842dd3392a529672fbdf0b396b5cfaa5cb6..43be3782e5dfe857f00bfa448e14df5d11e0289d 100644 (file)
@@ -894,7 +894,7 @@ xmit_hash_policy
                Uses XOR of hardware MAC addresses and packet type ID
                field to generate the hash. The formula is
 
-               hash = source MAC XOR destination MAC XOR packet type ID
+               hash = source MAC[5] XOR destination MAC[5] XOR packet type ID
                slave number = hash modulo slave count
 
                This algorithm will place all traffic to a particular
@@ -910,7 +910,7 @@ xmit_hash_policy
                Uses XOR of hardware MAC addresses and IP addresses to
                generate the hash.  The formula is
 
-               hash = source MAC XOR destination MAC XOR packet type ID
+               hash = source MAC[5] XOR destination MAC[5] XOR packet type ID
                hash = hash XOR source IP XOR destination IP
                hash = hash XOR (hash RSHIFT 16)
                hash = hash XOR (hash RSHIFT 8)
index 89bb4fa4c362a5ae5a79d113fd26718bd590bedd..ddc1dd039337f7a77b1578ed927e3c9786823bc5 100644 (file)
@@ -10,21 +10,21 @@ in joining the effort.
 Design principles
 =================
 
-The Distributed Switch Architecture is a subsystem which was primarily designed
-to support Marvell Ethernet switches (MV88E6xxx, a.k.a Linkstreet product line)
-using Linux, but has since evolved to support other vendors as well.
+The Distributed Switch Architecture subsystem was primarily designed to
+support Marvell Ethernet switches (MV88E6xxx, a.k.a. Link Street product
+line) using Linux, but has since evolved to support other vendors as well.
 
 The original philosophy behind this design was to be able to use unmodified
 Linux tools such as bridge, iproute2, ifconfig to work transparently whether
 they configured/queried a switch port network device or a regular network
 device.
 
-An Ethernet switch is typically comprised of multiple front-panel ports, and one
-or more CPU or management port. The DSA subsystem currently relies on the
+An Ethernet switch typically comprises multiple front-panel ports and one
+or more CPU or management ports. The DSA subsystem currently relies on the
 presence of a management port connected to an Ethernet controller capable of
 receiving Ethernet frames from the switch. This is a very common setup for all
 kinds of Ethernet switches found in Small Home and Office products: routers,
-gateways, or even top-of-the rack switches. This host Ethernet controller will
+gateways, or even top-of-rack switches. This host Ethernet controller will
 be later referred to as "master" and "cpu" in DSA terminology and code.
 
 The D in DSA stands for Distributed, because the subsystem has been designed
@@ -33,14 +33,14 @@ using upstream and downstream Ethernet links between switches. These specific
 ports are referred to as "dsa" ports in DSA terminology and code. A collection
 of multiple switches connected to each other is called a "switch tree".
 
-For each front-panel port, DSA will create specialized network devices which are
+For each front-panel port, DSA creates specialized network devices which are
 used as controlling and data-flowing endpoints for use by the Linux networking
 stack. These specialized network interfaces are referred to as "slave" network
 interfaces in DSA terminology and code.
 
 The ideal case for using DSA is when an Ethernet switch supports a "switch tag"
 which is a hardware feature making the switch insert a specific tag for each
-Ethernet frames it received to/from specific ports to help the management
+Ethernet frame it receives to/from specific ports to help the management
 interface figure out:
 
 - what port is this frame coming from
@@ -125,7 +125,7 @@ other switches from the same fabric, and in this case, the outermost switch
 ports must decapsulate the packet.
 
 Note that in certain cases, it might be the case that the tagging format used
-by a leaf switch (not connected directly to the CPU) to not be the same as what
+by a leaf switch (not connected directly to the CPU) is not the same as what
 the network stack sees. This can be seen with Marvell switch trees, where the
 CPU port can be configured to use either the DSA or the Ethertype DSA (EDSA)
 format, but the DSA links are configured to use the shorter (without Ethertype)
@@ -270,21 +270,21 @@ These interfaces are specialized in order to:
   to/from specific switch ports
 - query the switch for ethtool operations: statistics, link state,
   Wake-on-LAN, register dumps...
-- external/internal PHY management: link, auto-negotiation etc.
+- manage external/internal PHY: link, auto-negotiation, etc.
 
 These slave network devices have custom net_device_ops and ethtool_ops function
 pointers which allow DSA to introduce a level of layering between the networking
-stack/ethtool, and the switch driver implementation.
+stack/ethtool and the switch driver implementation.
 
 Upon frame transmission from these slave network devices, DSA will look up which
-switch tagging protocol is currently registered with these network devices, and
+switch tagging protocol is currently registered with these network devices and
 invoke a specific transmit routine which takes care of adding the relevant
 switch tag in the Ethernet frames.
 
 These frames are then queued for transmission using the master network device
-``ndo_start_xmit()`` function, since they contain the appropriate switch tag, the
+``ndo_start_xmit()`` function. Since they contain the appropriate switch tag, the
 Ethernet switch will be able to process these incoming frames from the
-management interface and delivers these frames to the physical switch port.
+management interface and deliver them to the physical switch port.
 
 Graphical representation
 ------------------------
@@ -330,9 +330,9 @@ MDIO reads/writes towards specific PHY addresses. In most MDIO-connected
 switches, these functions would utilize direct or indirect PHY addressing mode
 to return standard MII registers from the switch builtin PHYs, allowing the PHY
 library and/or to return link status, link partner pages, auto-negotiation
-results etc..
+results, etc.
 
-For Ethernet switches which have both external and internal MDIO busses, the
+For Ethernet switches which have both external and internal MDIO buses, the
 slave MII bus can be utilized to mux/demux MDIO reads and writes towards either
 internal or external MDIO devices this switch might be connected to: internal
 PHYs, external PHYs, or even external switches.
@@ -349,7 +349,7 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as
   table indication (when cascading switches)
 
 - ``dsa_platform_data``: platform device configuration data which can reference
-  a collection of dsa_chip_data structure if multiples switches are cascaded,
+  a collection of dsa_chip_data structures if multiple switches are cascaded,
   the master network device this switch tree is attached to needs to be
   referenced
 
@@ -426,7 +426,7 @@ logic basically looks like this:
   "phy-handle" property, if found, this PHY device is created and registered
   using ``of_phy_connect()``
 
-- if Device Tree is used, and the PHY device is "fixed", that is, conforms to
+- if Device Tree is used and the PHY device is "fixed", that is, conforms to
   the definition of a non-MDIO managed PHY as defined in
   ``Documentation/devicetree/bindings/net/fixed-link.txt``, the PHY is registered
   and connected transparently using the special fixed MDIO bus driver
@@ -481,7 +481,7 @@ Device Tree
 DSA features a standardized binding which is documented in
 ``Documentation/devicetree/bindings/net/dsa/dsa.txt``. PHY/MDIO library helper
 functions such as ``of_get_phy_mode()``, ``of_phy_connect()`` are also used to query
-per-port PHY specific details: interface connection, MDIO bus location etc..
+per-port PHY specific details: interface connection, MDIO bus location, etc.
 
 Driver development
 ==================
@@ -509,7 +509,7 @@ Switch configuration
 
 - ``setup``: setup function for the switch, this function is responsible for setting
   up the ``dsa_switch_ops`` private structure with all it needs: register maps,
-  interrupts, mutexes, locks etc.. This function is also expected to properly
+  interrupts, mutexes, locks, etc. This function is also expected to properly
   configure the switch to separate all network interfaces from each other, that
   is, they should be isolated by the switch hardware itself, typically by creating
   a Port-based VLAN ID for each port and allowing only the CPU port and the
@@ -526,13 +526,13 @@ PHY devices and link management
 - ``get_phy_flags``: Some switches are interfaced to various kinds of Ethernet PHYs,
   if the PHY library PHY driver needs to know about information it cannot obtain
   on its own (e.g.: coming from switch memory mapped registers), this function
-  should return a 32-bits bitmask of "flags", that is private between the switch
+  should return a 32-bit bitmask of "flags" that is private between the switch
   driver and the Ethernet PHY driver in ``drivers/net/phy/\*``.
 
 - ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read
   the switch port MDIO registers. If unavailable, return 0xffff for each read.
   For builtin switch Ethernet PHYs, this function should allow reading the link
-  status, auto-negotiation results, link partner pages etc..
+  status, auto-negotiation results, link partner pages, etc.
 
 - ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write
   to the switch port MDIO registers. If unavailable return a negative error
@@ -554,7 +554,7 @@ Ethtool operations
 ------------------
 
 - ``get_strings``: ethtool function used to query the driver's strings, will
-  typically return statistics strings, private flags strings etc.
+  typically return statistics strings, private flags strings, etc.
 
 - ``get_ethtool_stats``: ethtool function used to query per-port statistics and
   return their values. DSA overlays slave network devices general statistics:
@@ -564,7 +564,7 @@ Ethtool operations
 - ``get_sset_count``: ethtool function used to query the number of statistics items
 
 - ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this
-  function may, for certain implementations also query the master network device
+  function may for certain implementations also query the master network device
   Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN
 
 - ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port,
@@ -607,14 +607,14 @@ Power management
   in a fully active state
 
 - ``port_enable``: function invoked by the DSA slave network device ndo_open
-  function when a port is administratively brought up, this function should be
-  fully enabling a given switch port. DSA takes care of marking the port with
+  function when a port is administratively brought up, this function should
+  fully enable a given switch port. DSA takes care of marking the port with
   ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it
   was not, and propagating these changes down to the hardware
 
 - ``port_disable``: function invoked by the DSA slave network device ndo_close
-  function when a port is administratively brought down, this function should be
-  fully disabling a given switch port. DSA takes care of marking the port with
+  function when a port is administratively brought down, this function should
+  fully disable a given switch port. DSA takes care of marking the port with
   ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
   disabled while being a bridge member
 
@@ -622,12 +622,12 @@ Bridge layer
 ------------
 
 - ``port_bridge_join``: bridge layer function invoked when a given switch port is
-  added to a bridge, this function should be doing the necessary at the switch
-  level to permit the joining port from being added to the relevant logical
+  added to a bridge, this function should do what's necessary at the switch
+  level to permit the joining port to be added to the relevant logical
   domain for it to ingress/egress traffic with other members of the bridge.
 
 - ``port_bridge_leave``: bridge layer function invoked when a given switch port is
-  removed from a bridge, this function should be doing the necessary at the
+  removed from a bridge, this function should do what's necessary at the
   switch level to deny the leaving port from ingress/egress traffic from the
   remaining bridge members. When the port leaves the bridge, it should be aged
   out at the switch hardware for the switch to (re) learn MAC addresses behind
@@ -663,7 +663,7 @@ Bridge layer
   point for drivers that need to configure the hardware for enabling this
   feature.
 
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoken when a driver
+- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
   leaves a bridge port which had the TX forwarding offload feature enabled.
 
 Bridge VLAN filtering
index b0024aa7b0514f7174ebf7512e2c7da256b494d1..66828293d9cb715391459d4dc37868217da55113 100644 (file)
@@ -267,6 +267,13 @@ ipfrag_max_dist - INTEGER
        from different IP datagrams, which could result in data corruption.
        Default: 64
 
+bc_forwarding - INTEGER
+       bc_forwarding enables the feature described in rfc1812#section-5.3.5.2
+       and rfc2644. It allows the router to forward directed broadcast.
+       To enable this feature, the 'all' entry and the input interface entry
+       should be set to 1.
+       Default: 0
+
 INET peer storage
 =================
 
index 6f8f36e10e8ba2ee62f2917b295b7a61103e4f4c..95999302d279fe4e96bede18aeaba84bceccc253 100644 (file)
@@ -244,10 +244,11 @@ disclosure of a particular issue, unless requested by a response team or by
 an involved disclosed party. The current ambassadors list:
 
   ============= ========================================================
-  ARM           Grant Likely <grant.likely@arm.com>
   AMD          Tom Lendacky <tom.lendacky@amd.com>
-  IBM Z         Christian Borntraeger <borntraeger@de.ibm.com>
-  IBM Power     Anton Blanchard <anton@linux.ibm.com>
+  Ampere       Darren Hart <darren@os.amperecomputing.com>
+  ARM          Catalin Marinas <catalin.marinas@arm.com>
+  IBM Power    Anton Blanchard <anton@linux.ibm.com>
+  IBM Z                Christian Borntraeger <borntraeger@de.ibm.com>
   Intel                Tony Luck <tony.luck@intel.com>
   Qualcomm     Trilok Soni <tsoni@codeaurora.org>
 
index c74f4a81588b24c55e99a194419c5f393be742de..572a3289c9cbf3b893404b01fe372403ee6010c7 100644 (file)
@@ -437,6 +437,20 @@ in a private repository which allows interested people to easily pull the
 series for testing. The usual way to offer this is a git URL in the cover
 letter of the patch series.
 
+Testing
+^^^^^^^
+
+Code should be tested before submitting to the tip maintainers.  Anything
+other than minor changes should be built, booted and tested with
+comprehensive (and heavyweight) kernel debugging options enabled.
+
+These debugging options can be found in kernel/configs/x86_debug.config
+and can be added to an existing kernel config by running:
+
+       make x86_debug.config
+
+Some of these options are x86-specific and can be left out when testing
+on other architectures.
 
 Coding style notes
 ------------------
index 16335de04e8c6d55e8832a86f6c7b653cdace85e..6ed8d2fa6f9ef6ff647e3f74c4f1bc9a25f07659 100644 (file)
@@ -17,3 +17,4 @@ Security Documentation
    tpm/index
    digsig
    landlock
+   secrets/index
diff --git a/Documentation/security/secrets/coco.rst b/Documentation/security/secrets/coco.rst
new file mode 100644 (file)
index 0000000..262e7ab
--- /dev/null
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Confidential Computing secrets
+==============================
+
+This document describes how Confidential Computing secret injection is handled
+from the firmware to the operating system, in the EFI driver and the efi_secret
+kernel module.
+
+
+Introduction
+============
+
+Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
+Virtualization) allows guest owners to inject secrets into the VMs
+memory without the host/hypervisor being able to read them.  In SEV,
+secret injection is performed early in the VM launch process, before the
+guest starts running.
+
+The efi_secret kernel module allows userspace applications to access these
+secrets via securityfs.
+
+
+Secret data flow
+================
+
+The guest firmware may reserve a designated memory area for secret injection,
+and publish its location (base GPA and length) in the EFI configuration table
+under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
+(``adf956ad-e98c-484c-ae11-b51c7d336447``).  This memory area should be marked
+by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
+be use it for its own purposes.
+
+During the VM's launch, the virtual machine manager may inject a secret to that
+area.  In AMD SEV and SEV-ES this is performed using the
+``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_).  The strucutre of the injected
+Guest Owner secret data should be a GUIDed table of secret values; the binary
+format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
+"Structure of the EFI secret area".
+
+On kernel start, the kernel's EFI driver saves the location of the secret area
+(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
+Later it checks if the secret area is populated: it maps the area and checks
+whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
+(``1e74f542-71dd-4d66-963e-ef4287ff173b``).  If the secret area is populated,
+the EFI driver will autoload the efi_secret kernel module, which exposes the
+secrets to userspace applications via securityfs.  The details of the
+efi_secret filesystem interface are in [secrets-coco-abi]_.
+
+
+Application usage example
+=========================
+
+Consider a guest performing computations on encrypted files.  The Guest Owner
+provides the decryption key (= secret) using the secret injection mechanism.
+The guest application reads the secret from the efi_secret filesystem and
+proceeds to decrypt the files into memory and then performs the needed
+computations on the content.
+
+In this example, the host can't read the files from the disk image
+because they are encrypted.  Host can't read the decryption key because
+it is passed using the secret injection mechanism (= secure channel).
+Host can't read the decrypted content from memory because it's a
+confidential (memory-encrypted) guest.
+
+Here is a simple example for usage of the efi_secret module in a guest
+to which an EFI secret area with 4 secrets was injected during launch::
+
+       # ls -la /sys/kernel/security/secrets/coco
+       total 0
+       drwxr-xr-x 2 root root 0 Jun 28 11:54 .
+       drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+       -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+       -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+       -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+       -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+       # hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+       00000000  74 68 65 73 65 2d 61 72  65 2d 74 68 65 2d 6b 61  |these-are-the-ka|
+       00000010  74 61 2d 73 65 63 72 65  74 73 00 01 02 03 04 05  |ta-secrets......|
+       00000020  06 07                                             |..|
+       00000022
+
+       # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+       # ls -la /sys/kernel/security/secrets/coco
+       total 0
+       drwxr-xr-x 2 root root 0 Jun 28 11:55 .
+       drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+       -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+       -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+       -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+
+References
+==========
+
+See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
+
+.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
+.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
+.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
diff --git a/Documentation/security/secrets/index.rst b/Documentation/security/secrets/index.rst
new file mode 100644 (file)
index 0000000..ced34e9
--- /dev/null
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Secrets documentation
+=====================
+
+.. toctree::
+
+   coco
index bd9363025fcbc1d843976d59985b22077585013e..a10380cb78e5338eb41656173e7b71a5536e4a84 100644 (file)
@@ -121,26 +121,36 @@ even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
 instead of SipHash's 128-bit key. However, this may appeal to some
 high-performance `jhash` users.
 
-Danger!
-
-Do not ever use HalfSipHash except for as a hashtable key function, and only
-then when you can be absolutely certain that the outputs will never be
-transmitted out of the kernel. This is only remotely useful over `jhash` as a
-means of mitigating hashtable flooding denial of service attacks.
-
-Generating a HalfSipHash key
-============================
+HalfSipHash support is provided through the "hsiphash" family of functions.
+
+.. warning::
+   Do not ever use the hsiphash functions except for as a hashtable key
+   function, and only then when you can be absolutely certain that the outputs
+   will never be transmitted out of the kernel. This is only remotely useful
+   over `jhash` as a means of mitigating hashtable flooding denial of service
+   attacks.
+
+On 64-bit kernels, the hsiphash functions actually implement SipHash-1-3, a
+reduced-round variant of SipHash, instead of HalfSipHash-1-3. This is because in
+64-bit code, SipHash-1-3 is no slower than HalfSipHash-1-3, and can be faster.
+Note, this does *not* mean that in 64-bit kernels the hsiphash functions are the
+same as the siphash ones, or that they are secure; the hsiphash functions still
+use a less secure reduced-round algorithm and truncate their outputs to 32
+bits.
+
+Generating a hsiphash key
+=========================
 
 Keys should always be generated from a cryptographically secure source of
-random numbers, either using get_random_bytes or get_random_once:
+random numbers, either using get_random_bytes or get_random_once::
 
-hsiphash_key_t key;
-get_random_bytes(&key, sizeof(key));
+       hsiphash_key_t key;
+       get_random_bytes(&key, sizeof(key));
 
 If you're not deriving your key from here, you're doing it wrong.
 
-Using the HalfSipHash functions
-===============================
+Using the hsiphash functions
+============================
 
 There are two variants of the function, one that takes a list of integers, and
 one that takes a buffer::
@@ -183,7 +193,7 @@ You may then iterate like usual over the returned hash bucket.
 Performance
 ===========
 
-HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
-this will not be a problem, as the hashtable lookup isn't the bottleneck. And
-in general, this is probably a good sacrifice to make for the security and DoS
-resistance of HalfSipHash.
+hsiphash() is roughly 3 times slower than jhash(). For many replacements, this
+will not be a problem, as the hashtable lookup isn't the bottleneck. And in
+general, this is probably a good sacrifice to make for the security and DoS
+resistance of hsiphash().
index 682948fc88a34a42a647bc90dae310c1ee6c5803..2ad91dbebd7cb61e0056421c5ab117bb4e5a00cb 100644 (file)
@@ -718,6 +718,9 @@ CDROMPLAYBLK
 
 
 CDROMGETSPINDOWN
+       Obsolete, was ide-cd only
+
+
        usage::
 
          char spindown;
@@ -736,6 +739,9 @@ CDROMGETSPINDOWN
 
 
 CDROMSETSPINDOWN
+       Obsolete, was ide-cd only
+
+
        usage::
 
          char spindown
diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst
new file mode 100644 (file)
index 0000000..bf593e8
--- /dev/null
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+The Definitive SEV Guest API Documentation
+===================================================================
+
+1. General description
+======================
+
+The SEV API is a set of ioctls that are used by the guest or hypervisor
+to get or set a certain aspect of the SEV virtual machine. The ioctls belong
+to the following classes:
+
+ - Hypervisor ioctls: These query and set global attributes which affect the
+   whole SEV firmware.  These ioctl are used by platform provisioning tools.
+
+ - Guest ioctls: These query and set attributes of the SEV virtual machine.
+
+2. API description
+==================
+
+This section describes ioctls that is used for querying the SEV guest report
+from the SEV firmware. For each ioctl, the following information is provided
+along with a description:
+
+  Technology:
+      which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all.
+
+  Type:
+      hypervisor or guest. The ioctl can be used inside the guest or the
+      hypervisor.
+
+  Parameters:
+      what parameters are accepted by the ioctl.
+
+  Returns:
+      the return value.  General error numbers (-ENOMEM, -EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device.
+The ioctl accepts struct snp_user_guest_request. The input and output structure is
+specified through the req_data and resp_data field respectively. If the ioctl fails
+to execute due to a firmware error, then fw_err code will be set otherwise the
+fw_err will be set to 0x00000000000000ff.
+
+The firmware checks that the message sequence counter is one greater than
+the guests message sequence counter. If guest driver fails to increment message
+counter (e.g. counter overflow), then -EIO will be returned.
+
+::
+
+        struct snp_guest_request_ioctl {
+                /* Message version number */
+                __u32 msg_version;
+
+                /* Request and response structure address */
+                __u64 req_data;
+                __u64 resp_data;
+
+                /* firmware error code on failure (see psp-sev.h) */
+                __u64 fw_err;
+        };
+
+2.1 SNP_GET_REPORT
+------------------
+
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_REPORT ioctl can be used to query the attestation report from the
+SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command
+provided by the SEV-SNP firmware to query the attestation report.
+
+On success, the snp_report_resp.data will contains the report. The report
+contain the format described in the SEV-SNP specification. See the SEV-SNP
+specification for further details.
+
+2.2 SNP_GET_DERIVED_KEY
+-----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in): struct snp_derived_key_req
+:Returns (out): struct snp_derived_key_resp on success, -negative on error
+
+The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key.
+The derived key can be used by the guest for any purpose, such as sealing keys
+or communicating with external entities.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the
+SEV-SNP firmware to derive the key. See SEV-SNP specification for further details
+on the various fields passed in the key derivation request.
+
+On success, the snp_derived_key_resp.data contains the derived key value. See
+the SEV-SNP specification for further details.
+
+
+2.3 SNP_GET_EXT_REPORT
+----------------------
+:Technology: sev-snp
+:Type: guest ioctl
+:Parameters (in/out): struct snp_ext_report_req
+:Returns (out): struct snp_report_resp on success, -negative on error
+
+The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is
+related to the additional certificate data that is returned with the report.
+The certificate data returned is being provided by the hypervisor through the
+SNP_SET_EXT_CONFIG.
+
+The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP
+firmware to get the attestation report.
+
+On success, the snp_ext_report_resp.data will contain the attestation report
+and snp_ext_report_req.certs_address will contain the certificate blob. If the
+length of the blob is smaller than expected then snp_ext_report_req.certs_len will
+be updated with the expected value.
+
+See GHCB specification for further detail on how to parse the certificate blob.
+
+3. SEV-SNP CPUID Enforcement
+============================
+
+SEV-SNP guests can access a special page that contains a table of CPUID values
+that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware
+command. It provides the following assurances regarding the validity of CPUID
+values:
+
+ - Its address is obtained via bootloader/firmware (via CC blob), and those
+   binaries will be measured as part of the SEV-SNP attestation report.
+ - Its initial state will be encrypted/pvalidated, so attempts to modify
+   it during run-time will result in garbage being written, or #VC exceptions
+   being generated due to changes in validation state if the hypervisor tries
+   to swap the backing page.
+ - Attempts to bypass PSP checks by the hypervisor by using a normal page, or
+   a non-CPUID encrypted page will change the measurement provided by the
+   SEV-SNP attestation report.
+ - The CPUID page contents are *not* measured, but attempts to modify the
+   expected contents of a CPUID page as part of guest initialization will be
+   gated by the PSP CPUID enforcement policy checks performed on the page
+   during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner
+   implements their own checks of the CPUID values.
+
+It is important to note that this last assurance is only useful if the kernel
+has taken care to make use of the SEV-SNP CPUID throughout all stages of boot.
+Otherwise, guest owner attestation provides no assurance that the kernel wasn't
+fed incorrect values at some point during boot.
+
+
+Reference
+---------
+
+SEV-SNP and GHCB specification: developer.amd.com/sev
+
+The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0.
index edea7fea95a84f1a2f990bd37cd4d5a176590b5b..492f0920b9885c828a6be62461315b3bdb9ce5cb 100644 (file)
@@ -13,6 +13,7 @@ Linux Virtualization Support
    guest-halt-polling
    ne_overview
    acrn/index
+   coco/sev-guest
 
 .. only:: html and subproject
 
index d13fa66004672c9226b91f9b91e5b048af2943c2..4a900cdbc62e96a9f644bde210af299c3ce3f0c6 100644 (file)
@@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field.
   #define KVM_SYSTEM_EVENT_RESET          2
   #define KVM_SYSTEM_EVENT_CRASH          3
                        __u32 type;
-                       __u64 flags;
+                        __u32 ndata;
+                        __u64 data[16];
                } system_event;
 
 If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
 a system-level event using some architecture specific mechanism (hypercall
 or some special instruction). In case of ARM64, this is triggered using
-HVC instruction based PSCI call from the vcpu. The 'type' field describes
-the system-level event type. The 'flags' field describes architecture
-specific flags for the system-level event.
+HVC instruction based PSCI call from the vcpu.
 
+The 'type' field describes the system-level event type.
 Valid values for 'type' are:
 
  - KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
@@ -6010,10 +6010,20 @@ Valid values for 'type' are:
    to ignore the request, or to gather VM memory core dump and/or
    reset/shutdown of the VM.
 
-Valid flags are:
+If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
+architecture specific information for the system-level event.  Only
+the first `ndata` items (possibly zero) of the data array are valid.
 
- - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
-   a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.
+ - for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if
+   the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI
+   specification.
+
+ - for RISC-V, data[0] is set to the value of the second argument of the
+   ``sbi_system_reset`` call.
+
+Previous versions of Linux defined a `flags` member in this struct.  The
+field is now aliased to `data[0]`.  Userspace can assume that it is only
+written if ndata is greater than 0.
 
 ::
 
@@ -6190,6 +6200,7 @@ Valid values for 'type' are:
                        unsigned long args[6];
                        unsigned long ret[2];
                } riscv_sbi;
+
 If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has
 done a SBI call which is not handled by KVM RISC-V kernel module. The details
 of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The
index db43ee571f5aa36763d4cfe60c05284f7be08b2c..31f62b64e07b9f75c2be45ad28c133e53bf9ad04 100644 (file)
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =================
 KVM VCPU Requests
 =================
index 1c6847fff304975a1be800d2e0c335a7cc819c59..2d307811978c45ea57067163d364ee86680c5971 100644 (file)
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ======================================
 Secure Encrypted Virtualization (SEV)
 ======================================
index 806f049b69755fcdb375f284347d5decf182d1c1..410e0aa634939ef4c1b8e819caffc03ac2ad55bd 100644 (file)
@@ -1,3 +1,4 @@
+.. SPDX-License-Identifier: GPL-2.0
 
 =======================================
 Known limitations of CPU virtualization
@@ -36,4 +37,3 @@ Nested virtualization features
 ------------------------------
 
 TBD
-
index bd70c69468aebb236937232d41adce56654e3149..a27e6768d9008f7f01029b3dd4862943de9d9f13 100644 (file)
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==============================
 Running nested guests with KVM
 ==============================
index 65204d7f004f238f37b0654cd91bcc2033fdf7a1..7e0c3f574e78206a3aea7f466284c2b9760586ba 100644 (file)
@@ -110,7 +110,7 @@ Usage
    If you want to sort by the page nums of buf, use the ``-m`` parameter.
    The detailed parameters are:
 
-   fundamental function:
+   fundamental function::
 
        Sort:
                -a              Sort by memory allocation time.
@@ -122,7 +122,7 @@ Usage
                -s              Sort by stack trace.
                -t              Sort by times (default).
 
-   additional function:
+   additional function::
 
        Cull:
                --cull <rules>
@@ -153,6 +153,7 @@ Usage
 
 STANDARD FORMAT SPECIFIERS
 ==========================
+::
 
        KEY             LONG            DESCRIPTION
        p               pid             process ID
index 5d54c39a063ff2c637d311357291ea0d68db2b27..08246e8ac83542064456ea14493aeeaf32023ce2 100644 (file)
@@ -140,9 +140,8 @@ from #define X86_FEATURE_UMIP (16*32 + 2).
 
 In addition, there exists a variety of custom command-line parameters that
 disable specific features. The list of parameters includes, but is not limited
-to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
-"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
-respectively.
+to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
+"no5lvl".
 
 e: The feature was known to be non-functional.
 ----------------------------------------------
index 91b2fa4566184c00f7620a4394497be0c0bee1b3..51982dee6c2a17dc8cfef7cae2114299f0174fc8 100644 (file)
@@ -26,6 +26,7 @@ x86-specific Documentation
    intel_txt
    amd-memory-encryption
    amd_hsmp
+   tdx
    pti
    mds
    microcode
diff --git a/Documentation/x86/tdx.rst b/Documentation/x86/tdx.rst
new file mode 100644 (file)
index 0000000..b8fa432
--- /dev/null
@@ -0,0 +1,218 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Intel Trust Domain Extensions (TDX)
+=====================================
+
+Intel's Trust Domain Extensions (TDX) protect confidential guest VMs from
+the host and physical attacks by isolating the guest register state and by
+encrypting the guest memory. In TDX, a special module running in a special
+mode sits between the host and the guest and manages the guest/host
+separation.
+
+Since the host cannot directly access guest registers or memory, much
+normal functionality of a hypervisor must be moved into the guest. This is
+implemented using a Virtualization Exception (#VE) that is handled by the
+guest kernel. A #VE is handled entirely inside the guest kernel, but some
+require the hypervisor to be consulted.
+
+TDX includes new hypercall-like mechanisms for communicating from the
+guest to the hypervisor or the TDX module.
+
+New TDX Exceptions
+==================
+
+TDX guests behave differently from bare-metal and traditional VMX guests.
+In TDX guests, otherwise normal instructions or memory accesses can cause
+#VE or #GP exceptions.
+
+Instructions marked with an '*' conditionally cause exceptions.  The
+details for these instructions are discussed below.
+
+Instruction-based #VE
+---------------------
+
+- Port I/O (INS, OUTS, IN, OUT)
+- HLT
+- MONITOR, MWAIT
+- WBINVD, INVD
+- VMCALL
+- RDMSR*,WRMSR*
+- CPUID*
+
+Instruction-based #GP
+---------------------
+
+- All VMX instructions: INVEPT, INVVPID, VMCLEAR, VMFUNC, VMLAUNCH,
+  VMPTRLD, VMPTRST, VMREAD, VMRESUME, VMWRITE, VMXOFF, VMXON
+- ENCLS, ENCLU
+- GETSEC
+- RSM
+- ENQCMD
+- RDMSR*,WRMSR*
+
+RDMSR/WRMSR Behavior
+--------------------
+
+MSR access behavior falls into three categories:
+
+- #GP generated
+- #VE generated
+- "Just works"
+
+In general, the #GP MSRs should not be used in guests.  Their use likely
+indicates a bug in the guest.  The guest may try to handle the #GP with a
+hypercall but it is unlikely to succeed.
+
+The #VE MSRs are typically able to be handled by the hypervisor.  Guests
+can make a hypercall to the hypervisor to handle the #VE.
+
+The "just works" MSRs do not need any special guest handling.  They might
+be implemented by directly passing through the MSR to the hardware or by
+trapping and handling in the TDX module.  Other than possibly being slow,
+these MSRs appear to function just as they would on bare metal.
+
+CPUID Behavior
+--------------
+
+For some CPUID leaves and sub-leaves, the virtualized bit fields of CPUID
+return values (in guest EAX/EBX/ECX/EDX) are configurable by the
+hypervisor. For such cases, the Intel TDX module architecture defines two
+virtualization types:
+
+- Bit fields for which the hypervisor controls the value seen by the guest
+  TD.
+
+- Bit fields for which the hypervisor configures the value such that the
+  guest TD either sees their native value or a value of 0.  For these bit
+  fields, the hypervisor can mask off the native values, but it can not
+  turn *on* values.
+
+A #VE is generated for CPUID leaves and sub-leaves that the TDX module does
+not know how to handle. The guest kernel may ask the hypervisor for the
+value with a hypercall.
+
+#VE on Memory Accesses
+======================
+
+There are essentially two classes of TDX memory: private and shared.
+Private memory receives full TDX protections.  Its content is protected
+against access from the hypervisor.  Shared memory is expected to be
+shared between guest and hypervisor and does not receive full TDX
+protections.
+
+A TD guest is in control of whether its memory accesses are treated as
+private or shared.  It selects the behavior with a bit in its page table
+entries.  This helps ensure that a guest does not place sensitive
+information in shared memory, exposing it to the untrusted hypervisor.
+
+#VE on Shared Memory
+--------------------
+
+Access to shared mappings can cause a #VE.  The hypervisor ultimately
+controls whether a shared memory access causes a #VE, so the guest must be
+careful to only reference shared pages it can safely handle a #VE.  For
+instance, the guest should be careful not to access shared memory in the
+#VE handler before it reads the #VE info structure (TDG.VP.VEINFO.GET).
+
+Shared mapping content is entirely controlled by the hypervisor. The guest
+should only use shared mappings for communicating with the hypervisor.
+Shared mappings must never be used for sensitive memory content like kernel
+stacks.  A good rule of thumb is that hypervisor-shared memory should be
+treated the same as memory mapped to userspace.  Both the hypervisor and
+userspace are completely untrusted.
+
+MMIO for virtual devices is implemented as shared memory.  The guest must
+be careful not to access device MMIO regions unless it is also prepared to
+handle a #VE.
+
+#VE on Private Pages
+--------------------
+
+An access to private mappings can also cause a #VE.  Since all kernel
+memory is also private memory, the kernel might theoretically need to
+handle a #VE on arbitrary kernel memory accesses.  This is not feasible, so
+TDX guests ensure that all guest memory has been "accepted" before memory
+is used by the kernel.
+
+A modest amount of memory (typically 512M) is pre-accepted by the firmware
+before the kernel runs to ensure that the kernel can start up without
+being subjected to a #VE.
+
+The hypervisor is permitted to unilaterally move accepted pages to a
+"blocked" state. However, if it does this, page access will not generate a
+#VE.  It will, instead, cause a "TD Exit" where the hypervisor is required
+to handle the exception.
+
+Linux #VE handler
+=================
+
+Just like page faults or #GP's, #VE exceptions can be either handled or be
+fatal.  Typically, an unhandled userspace #VE results in a SIGSEGV.
+An unhandled kernel #VE results in an oops.
+
+Handling nested exceptions on x86 is typically nasty business.  A #VE
+could be interrupted by an NMI which triggers another #VE and hilarity
+ensues.  The TDX #VE architecture anticipated this scenario and includes a
+feature to make it slightly less nasty.
+
+During #VE handling, the TDX module ensures that all interrupts (including
+NMIs) are blocked.  The block remains in place until the guest makes a
+TDG.VP.VEINFO.GET TDCALL.  This allows the guest to control when interrupts
+or a new #VE can be delivered.
+
+However, the guest kernel must still be careful to avoid potential
+#VE-triggering actions (discussed above) while this block is in place.
+While the block is in place, any #VE is elevated to a double fault (#DF)
+which is not recoverable.
+
+MMIO handling
+=============
+
+In non-TDX VMs, MMIO is usually implemented by giving a guest access to a
+mapping which will cause a VMEXIT on access, and then the hypervisor
+emulates the access.  That is not possible in TDX guests because VMEXIT
+will expose the register state to the host. TDX guests don't trust the host
+and can't have their state exposed to the host.
+
+In TDX, MMIO regions typically trigger a #VE exception in the guest.  The
+guest #VE handler then emulates the MMIO instruction inside the guest and
+converts it into a controlled TDCALL to the host, rather than exposing
+guest state to the host.
+
+MMIO addresses on x86 are just special physical addresses. They can
+theoretically be accessed with any instruction that accesses memory.
+However, the kernel instruction decoding method is limited. It is only
+designed to decode instructions like those generated by io.h macros.
+
+MMIO access via other means (like structure overlays) may result in an
+oops.
+
+Shared Memory Conversions
+=========================
+
+All TDX guest memory starts out as private at boot.  This memory can not
+be accessed by the hypervisor.  However, some kernel users like device
+drivers might have a need to share data with the hypervisor.  To do this,
+memory must be converted between shared and private.  This can be
+accomplished using some existing memory encryption helpers:
+
+ * set_memory_decrypted() converts a range of pages to shared.
+ * set_memory_encrypted() converts memory back to private.
+
+Device drivers are the primary user of shared memory, but there's no need
+to touch every driver. DMA buffers and ioremap() do the conversions
+automatically.
+
+TDX uses SWIOTLB for most DMA allocations. The SWIOTLB buffer is
+converted to shared on boot.
+
+For coherent DMA allocation, the DMA buffer gets converted on the
+allocation. Check force_dma_unencrypted() for details.
+
+References
+==========
+
+TDX reference material is collected here:
+
+https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html
index 07aa0007f346e86085def5a0a70311de051c3e02..03ec9cf011812a4c31c08ae5dd327a600bf6d633 100644 (file)
@@ -157,15 +157,6 @@ Rebooting
      newer BIOS, or newer board) using this option will ignore the built-in
      quirk table, and use the generic default reboot actions.
 
-Non Executable Mappings
-=======================
-
-  noexec=on|off
-    on
-      Enable(default)
-    off
-      Disable
-
 NUMA
 ====
 
@@ -310,3 +301,17 @@ Miscellaneous
     Do not use GB pages for kernel direct mappings.
   gbpages
     Use GB pages for kernel direct mappings.
+
+
+AMD SEV (Secure Encrypted Virtualization)
+=========================================
+Options relating to AMD SEV, specified via the following format:
+
+::
+
+   sev=option1[,option2]
+
+The available options are:
+
+   debug
+     Enable debug messages.
index f088f58816668fa455a0317b068e3200b471f68f..45aa9cceb4f1940eb9cda3a7176805986008c8ce 100644 (file)
@@ -19,6 +19,7 @@ Offset/Size   Proto   Name                    Meaning
 058/008                ALL     tboot_addr              Physical address of tboot shared page
 060/010                ALL     ist_info                Intel SpeedStep (IST) BIOS support information
                                                (struct ist_info)
+070/008                ALL     acpi_rsdp_addr          Physical address of ACPI RSDP table
 080/010                ALL     hd0_info                hd0 disk parameter, OBSOLETE!!
 090/010                ALL     hd1_info                hd1 disk parameter, OBSOLETE!!
 0A0/010                ALL     sys_desc_table          System description table (struct sys_desc_table),
@@ -27,6 +28,7 @@ Offset/Size   Proto   Name                    Meaning
 0C0/004                ALL     ext_ramdisk_image       ramdisk_image high 32bits
 0C4/004                ALL     ext_ramdisk_size        ramdisk_size high 32bits
 0C8/004                ALL     ext_cmd_line_ptr        cmd_line_ptr high 32bits
+13C/004                ALL     cc_blob_address         Physical address of Confidential Computing blob
 140/080                ALL     edid_info               Video mode setup (struct edid_info)
 1C0/020                ALL     efi_info                EFI 32 information (struct efi_info)
 1E0/004                ALL     alt_mem_k               Alternative mem check, in KB
index fd768d43e048299a0cb36fefc86e78fe0388c98f..496f5e281776e6bba57caf8ae1a4c07cd84ed3a8 100644 (file)
@@ -201,6 +201,7 @@ F:  include/net/ieee80211_radiotap.h
 F:     include/net/iw_handler.h
 F:     include/net/wext.h
 F:     include/uapi/linux/nl80211.h
+F:     include/uapi/linux/wireless.h
 F:     net/wireless/
 
 8169 10/100/1000 GIGABIT ETHERNET DRIVER
@@ -2636,13 +2637,14 @@ F:      sound/soc/rockchip/
 N:     rockchip
 
 ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
 C:     irc://irc.libera.chat/linux-exynos
 Q:     https://patchwork.kernel.org/project/linux-samsung-soc/list/
+B:     mailto:linux-samsung-soc@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git
 F:     Documentation/arm/samsung/
 F:     Documentation/devicetree/bindings/arm/samsung/
@@ -3569,8 +3571,9 @@ M:        Andy Gospodarek <andy@greyhouse.net>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://sourceforge.net/projects/bonding/
+F:     Documentation/networking/bonding.rst
 F:     drivers/net/bonding/
-F:     include/net/bonding.h
+F:     include/net/bond*
 F:     include/uapi/linux/if_bonding.h
 
 BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
@@ -3742,7 +3745,7 @@ F:        include/linux/platform_data/b53.h
 
 BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
 M:     Nicolas Saenz Julienne <nsaenz@kernel.org>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -3757,7 +3760,7 @@ BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
 M:     Ray Jui <rjui@broadcom.com>
 M:     Scott Branden <sbranden@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Maintained
 T:     git git://github.com/broadcom/mach-bcm
 F:     arch/arm/mach-bcm/
@@ -3777,7 +3780,7 @@ F:        arch/mips/include/asm/mach-bcm47xx/*
 
 BROADCOM BCM4908 ETHERNET DRIVER
 M:     Rafał Miłecki <rafal@milecki.pl>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
@@ -3786,7 +3789,7 @@ F:        drivers/net/ethernet/broadcom/unimac.h
 
 BROADCOM BCM4908 PINMUX DRIVER
 M:     Rafał Miłecki <rafal@milecki.pl>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-gpio@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/pinctrl/brcm,bcm4908-pinctrl.yaml
@@ -3796,7 +3799,7 @@ BROADCOM BCM5301X ARM ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
 M:     Hauke Mehrtens <hauke@hauke-m.de>
 M:     Rafał Miłecki <zajec5@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/bcm470*
@@ -3807,7 +3810,7 @@ F:        arch/arm/mach-bcm/bcm_5301x.c
 BROADCOM BCM53573 ARM ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
 M:     Rafał Miłecki <rafal@milecki.pl>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/bcm47189*
@@ -3815,7 +3818,7 @@ F:        arch/arm/boot/dts/bcm53573*
 
 BROADCOM BCM63XX ARM ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
@@ -3829,7 +3832,7 @@ F:        drivers/usb/gadget/udc/bcm63xx_udc.*
 
 BROADCOM BCM7XXX ARM ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
@@ -3847,21 +3850,21 @@ N:      bcm7120
 BROADCOM BDC DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
 L:     linux-usb@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Maintained
 F:     Documentation/devicetree/bindings/usb/brcm,bdc.yaml
 F:     drivers/usb/gadget/udc/bdc/
 
 BROADCOM BMIPS CPUFREQ DRIVER
 M:     Markus Mayer <mmayer@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 F:     drivers/cpufreq/bmips-cpufreq.c
 
 BROADCOM BMIPS MIPS ARCHITECTURE
 M:     Florian Fainelli <f.fainelli@gmail.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-mips@vger.kernel.org
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
@@ -3912,7 +3915,9 @@ BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER
 M:     Michael Chan <michael.chan@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Supported
+F:     drivers/firmware/broadcom/tee_bnxt_fw.c
 F:     drivers/net/ethernet/broadcom/bnxt/
+F:     include/linux/firmware/broadcom/tee_bnxt_fw.h
 
 BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
 M:     Arend van Spriel <aspriel@gmail.com>
@@ -3927,53 +3932,53 @@ F:      drivers/net/wireless/broadcom/brcm80211/
 BROADCOM BRCMSTB GPIO DRIVER
 M:     Doug Berger <opendmb@gmail.com>
 M:     Florian Fainelli <f.fainelli@gmail.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Supported
 F:     Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml
 F:     drivers/gpio/gpio-brcmstb.c
 
 BROADCOM BRCMSTB I2C DRIVER
 M:     Kamal Dasu <kdasu.kdev@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-i2c@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Supported
 F:     Documentation/devicetree/bindings/i2c/brcm,brcmstb-i2c.yaml
 F:     drivers/i2c/busses/i2c-brcmstb.c
 
 BROADCOM BRCMSTB UART DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-serial@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml
 F:     drivers/tty/serial/8250/8250_bcm7271.c
 
 BROADCOM BRCMSTB USB EHCI DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-usb@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     Documentation/devicetree/bindings/usb/brcm,bcm7445-ehci.yaml
 F:     drivers/usb/host/ehci-brcm.*
 
 BROADCOM BRCMSTB USB PIN MAP DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-usb@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     Documentation/devicetree/bindings/usb/brcm,usb-pinmap.yaml
 F:     drivers/usb/misc/brcmstb-usb-pinmap.c
 
 BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-kernel@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     drivers/phy/broadcom/phy-brcm-usb*
 
 BROADCOM ETHERNET PHY DRIVERS
 M:     Florian Fainelli <f.fainelli@gmail.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt
@@ -3984,7 +3989,7 @@ F:        include/linux/brcmphy.h
 BROADCOM GENET ETHERNET DRIVER
 M:     Doug Berger <opendmb@gmail.com>
 M:     Florian Fainelli <f.fainelli@gmail.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
@@ -3998,7 +4003,7 @@ F:        include/linux/platform_data/mdio-bcm-unimac.h
 BROADCOM IPROC ARM ARCHITECTURE
 M:     Ray Jui <rjui@broadcom.com>
 M:     Scott Branden <sbranden@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
@@ -4026,7 +4031,7 @@ N:        stingray
 
 BROADCOM IPROC GBIT ETHERNET DRIVER
 M:     Rafał Miłecki <rafal@milecki.pl>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/brcm,amac.yaml
@@ -4035,7 +4040,7 @@ F:        drivers/net/ethernet/broadcom/unimac.h
 
 BROADCOM KONA GPIO DRIVER
 M:     Ray Jui <rjui@broadcom.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Supported
 F:     Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
 F:     drivers/gpio/gpio-bcm-kona.c
@@ -4068,7 +4073,7 @@ F:        drivers/firmware/broadcom/*
 BROADCOM PMB (POWER MANAGEMENT BUS) DRIVER
 M:     Rafał Miłecki <rafal@milecki.pl>
 M:     Florian Fainelli <f.fainelli@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
@@ -4084,7 +4089,7 @@ F:        include/linux/bcma/
 
 BROADCOM SPI DRIVER
 M:     Kamal Dasu <kdasu.kdev@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Maintained
 F:     Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.yaml
 F:     drivers/spi/spi-bcm-qspi.*
@@ -4093,7 +4098,7 @@ F:        drivers/spi/spi-iproc-qspi.c
 
 BROADCOM STB AVS CPUFREQ DRIVER
 M:     Markus Mayer <mmayer@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
@@ -4101,7 +4106,7 @@ F:        drivers/cpufreq/brcmstb*
 
 BROADCOM STB AVS TMON DRIVER
 M:     Markus Mayer <mmayer@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml
@@ -4109,7 +4114,7 @@ F:        drivers/thermal/broadcom/brcmstb*
 
 BROADCOM STB DPFE DRIVER
 M:     Markus Mayer <mmayer@broadcom.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml
@@ -4118,8 +4123,8 @@ F:        drivers/memory/brcmstb_dpfe.c
 BROADCOM STB NAND FLASH DRIVER
 M:     Brian Norris <computersforpeace@gmail.com>
 M:     Kamal Dasu <kdasu.kdev@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-mtd@lists.infradead.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     drivers/mtd/nand/raw/brcmnand/
 F:     include/linux/platform_data/brcmnand.h
@@ -4128,7 +4133,7 @@ BROADCOM STB PCIE DRIVER
 M:     Jim Quinlan <jim2101024@gmail.com>
 M:     Nicolas Saenz Julienne <nsaenz@kernel.org>
 M:     Florian Fainelli <f.fainelli@gmail.com>
-M:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
@@ -4136,7 +4141,7 @@ F:        drivers/pci/controller/pcie-brcmstb.c
 
 BROADCOM SYSTEMPORT ETHERNET DRIVER
 M:     Florian Fainelli <f.fainelli@gmail.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/bcmsysport.*
@@ -4153,7 +4158,7 @@ F:        drivers/net/ethernet/broadcom/tg3.*
 
 BROADCOM VK DRIVER
 M:     Scott Branden <scott.branden@broadcom.com>
-L:     bcm-kernel-feedback-list@broadcom.com
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 S:     Supported
 F:     drivers/misc/bcm-vk/
 F:     include/uapi/linux/misc/bcm_vk.h
@@ -4791,6 +4796,7 @@ F:        .clang-format
 CLANG/LLVM BUILD SUPPORT
 M:     Nathan Chancellor <nathan@kernel.org>
 M:     Nick Desaulniers <ndesaulniers@google.com>
+R:     Tom Rix <trix@redhat.com>
 L:     llvm@lists.linux.dev
 S:     Supported
 W:     https://clangbuiltlinux.github.io/
@@ -5434,6 +5440,7 @@ F:        net/ax25/sysctl_net_ax25.c
 
 DATA ACCESS MONITOR
 M:     SeongJae Park <sj@kernel.org>
+L:     damon@lists.linux.dev
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -5715,7 +5722,7 @@ W:        http://lanana.org/docs/device-list/index.html
 
 DEVICE RESOURCE MANAGEMENT HELPERS
 M:     Hans de Goede <hdegoede@redhat.com>
-R:     Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+R:     Matti Vaittinen <mazziesaccount@gmail.com>
 S:     Maintained
 F:     include/linux/devm-helpers.h
 
@@ -5912,7 +5919,7 @@ R:        Benjamin Gaignard <benjamin.gaignard@collabora.com>
 R:     Liam Mark <lmark@codeaurora.org>
 R:     Laura Abbott <labbott@redhat.com>
 R:     Brian Starkey <Brian.Starkey@arm.com>
-R:     John Stultz <john.stultz@linaro.org>
+R:     John Stultz <jstultz@google.com>
 L:     linux-media@vger.kernel.org
 L:     dri-devel@lists.freedesktop.org
 L:     linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
@@ -6582,7 +6589,7 @@ F:        drivers/gpu/drm/gma500/
 DRM DRIVERS FOR HISILICON
 M:     Xinliang Liu <xinliang.liu@linaro.org>
 M:     Tian Tao  <tiantao6@hisilicon.com>
-R:     John Stultz <john.stultz@linaro.org>
+R:     John Stultz <jstultz@google.com>
 R:     Xinwei Kong <kong.kongxinwei@hisilicon.com>
 R:     Chen Feng <puck.chen@hisilicon.com>
 L:     dri-devel@lists.freedesktop.org
@@ -7378,7 +7385,6 @@ L:        linux-mm@kvack.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
 F:     arch/alpha/kernel/binfmt_loader.c
-F:     arch/x86/ia32/ia32_aout.c
 F:     fs/*binfmt_*.c
 F:     fs/exec.c
 F:     include/linux/binfmts.h
@@ -7494,7 +7500,7 @@ F:        Documentation/hwmon/f71805f.rst
 F:     drivers/hwmon/f71805f.c
 
 FADDR2LINE
-M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Josh Poimboeuf <jpoimboe@kernel.org>
 S:     Maintained
 F:     scripts/faddr2line
 
@@ -8107,7 +8113,7 @@ M:        Ingo Molnar <mingo@redhat.com>
 R:     Peter Zijlstra <peterz@infradead.org>
 R:     Darren Hart <dvhart@infradead.org>
 R:     Davidlohr Bueso <dave@stgolabs.net>
-R:     André Almeida <andrealmeid@collabora.com>
+R:     André Almeida <andrealmeid@igalia.com>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
@@ -8380,7 +8386,7 @@ M:        Linus Walleij <linus.walleij@linaro.org>
 M:     Bartosz Golaszewski <brgl@bgdev.pl>
 L:     linux-gpio@vger.kernel.org
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
 F:     Documentation/ABI/obsolete/sysfs-gpio
 F:     Documentation/ABI/testing/gpio-cdev
 F:     Documentation/admin-guide/gpio/
@@ -8675,7 +8681,6 @@ F:        include/linux/cciss*.h
 F:     include/uapi/linux/cciss*.h
 
 HFI1 DRIVER
-M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
@@ -8844,7 +8849,7 @@ F:        Documentation/devicetree/bindings/net/hisilicon*.txt
 F:     drivers/net/ethernet/hisilicon/
 
 HIKEY960 ONBOARD USB GPIO HUB DRIVER
-M:     John Stultz <john.stultz@linaro.org>
+M:     John Stultz <jstultz@google.com>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 F:     drivers/misc/hisi_hikey_usb.c
@@ -9336,14 +9341,12 @@ F:      drivers/pci/hotplug/rpaphp*
 
 IBM Power SRIOV Virtual NIC Device Driver
 M:     Dany Madden <drt@linux.ibm.com>
-M:     Sukadev Bhattiprolu <sukadev@linux.ibm.com>
 R:     Thomas Falcon <tlfalcon@linux.ibm.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/ibm/ibmvnic.*
 
 IBM Power Virtual Accelerator Switchboard
-M:     Sukadev Bhattiprolu <sukadev@linux.ibm.com>
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Supported
 F:     arch/powerpc/include/asm/vas.h
@@ -9598,6 +9601,7 @@ F:        drivers/iio/pressure/dps310.c
 
 INFINIBAND SUBSYSTEM
 M:     Jason Gunthorpe <jgg@nvidia.com>
+M:     Leon Romanovsky <leonro@nvidia.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 W:     https://github.com/linux-rdma/rdma-core
@@ -10128,7 +10132,7 @@ S:      Supported
 F:     drivers/net/wireless/intel/iwlegacy/
 
 INTEL WIRELESS WIFI LINK (iwlwifi)
-M:     Luca Coelho <luciano.coelho@intel.com>
+M:     Gregory Greenman <gregory.greenman@intel.com>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 W:     https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
@@ -10237,8 +10241,6 @@ F:      drivers/net/ethernet/sgi/ioc3-eth.c
 IOMAP FILESYSTEM LIBRARY
 M:     Christoph Hellwig <hch@infradead.org>
 M:     Darrick J. Wong <djwong@kernel.org>
-M:     linux-xfs@vger.kernel.org
-M:     linux-fsdevel@vger.kernel.org
 L:     linux-xfs@vger.kernel.org
 L:     linux-fsdevel@vger.kernel.org
 S:     Supported
@@ -10369,6 +10371,7 @@ F:      include/linux/isapnp.h
 ISCSI
 M:     Lee Duncan <lduncan@suse.com>
 M:     Chris Leech <cleech@redhat.com>
+M:     Mike Christie <michael.christie@oracle.com>
 L:     open-iscsi@googlegroups.com
 L:     linux-scsi@vger.kernel.org
 S:     Maintained
@@ -10546,6 +10549,7 @@ M:      Andrey Ryabinin <ryabinin.a.a@gmail.com>
 R:     Alexander Potapenko <glider@google.com>
 R:     Andrey Konovalov <andreyknvl@gmail.com>
 R:     Dmitry Vyukov <dvyukov@google.com>
+R:     Vincenzo Frascino <vincenzo.frascino@arm.com>
 L:     kasan-dev@googlegroups.com
 S:     Maintained
 F:     Documentation/dev-tools/kasan.rst
@@ -11208,7 +11212,7 @@ F:      scripts/spdxcheck.py
 
 LINEAR RANGES HELPERS
 M:     Mark Brown <broonie@kernel.org>
-R:     Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+R:     Matti Vaittinen <mazziesaccount@gmail.com>
 F:     lib/linear_ranges.c
 F:     lib/test_linear_ranges.c
 F:     include/linux/linear_range.h
@@ -11345,7 +11349,7 @@ F:      drivers/mmc/host/litex_mmc.c
 N:     litex
 
 LIVE PATCHING
-M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Josh Poimboeuf <jpoimboe@kernel.org>
 M:     Jiri Kosina <jikos@kernel.org>
 M:     Miroslav Benes <mbenes@suse.cz>
 M:     Petr Mladek <pmladek@suse.com>
@@ -11905,7 +11909,7 @@ F:      drivers/iio/proximity/mb1232.c
 
 MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
 R:     Iskren Chernev <iskren.chernev@gmail.com>
-R:     Krzysztof Kozlowski <krzk@kernel.org>
+R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Matheus Castello <matheus@castello.eng.br>
 L:     linux-pm@vger.kernel.org
@@ -11915,7 +11919,7 @@ F:      drivers/power/supply/max17040_battery.c
 
 MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
 R:     Hans de Goede <hdegoede@redhat.com>
-R:     Krzysztof Kozlowski <krzk@kernel.org>
+R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
 R:     Purism Kernel Team <kernel@puri.sm>
@@ -11967,10 +11971,11 @@ F:    Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
 F:     drivers/power/supply/max77976_charger.c
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-pm@vger.kernel.org
 S:     Supported
+B:     mailto:linux-samsung-soc@vger.kernel.org
 F:     Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
 F:     Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml
 F:     drivers/power/supply/max14577_charger.c
@@ -11978,10 +11983,11 @@ F:    drivers/power/supply/max77693_charger.c
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:     Chanwoo Choi <cw00.choi@samsung.com>
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
+B:     mailto:linux-samsung-soc@vger.kernel.org
 F:     Documentation/devicetree/bindings/*/maxim,max14577.yaml
 F:     Documentation/devicetree/bindings/*/maxim,max77686.yaml
 F:     Documentation/devicetree/bindings/*/maxim,max77693.yaml
@@ -12401,7 +12407,7 @@ F:      drivers/mmc/host/mtk-sd.c
 
 MEDIATEK MT76 WIRELESS LAN DRIVER
 M:     Felix Fietkau <nbd@nbd.name>
-M:     Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
+M:     Lorenzo Bianconi <lorenzo@kernel.org>
 M:     Ryder Lee <ryder.lee@mediatek.com>
 R:     Shayne Chen <shayne.chen@mediatek.com>
 R:     Sean Wang <sean.wang@mediatek.com>
@@ -12672,9 +12678,10 @@ F:     mm/memblock.c
 F:     tools/testing/memblock/
 
 MEMORY CONTROLLER DRIVERS
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
+B:     mailto:krzysztof.kozlowski@linaro.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
 F:     Documentation/devicetree/bindings/memory-controllers/
 F:     drivers/memory/
@@ -13623,6 +13630,7 @@ F:      net/core/drop_monitor.c
 
 NETWORKING DRIVERS
 M:     "David S. Miller" <davem@davemloft.net>
+M:     Eric Dumazet <edumazet@google.com>
 M:     Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
@@ -13670,6 +13678,7 @@ F:      tools/testing/selftests/drivers/net/dsa/
 
 NETWORKING [GENERAL]
 M:     "David S. Miller" <davem@davemloft.net>
+M:     Eric Dumazet <edumazet@google.com>
 M:     Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
@@ -13816,10 +13825,11 @@ F:    include/uapi/linux/nexthop.h
 F:     net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-nfc@lists.01.org (subscribers-only)
 L:     netdev@vger.kernel.org
 S:     Maintained
+B:     mailto:linux-nfc@lists.01.org
 F:     Documentation/devicetree/bindings/net/nfc/
 F:     drivers/nfc/
 F:     include/linux/platform_data/nfcmrvl.h
@@ -14133,7 +14143,7 @@ F:      Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
 F:     drivers/regulator/pf8x00-regulator.c
 
 NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -14215,7 +14225,7 @@ F:      lib/objagg.c
 F:     lib/test_objagg.c
 
 OBJTOOL
-M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Josh Poimboeuf <jpoimboe@kernel.org>
 M:     Peter Zijlstra <peterz@infradead.org>
 S:     Supported
 F:     tools/objtool/
@@ -14363,7 +14373,6 @@ F:      arch/arm/*omap*/*pm*
 F:     drivers/cpufreq/omap-cpufreq.c
 
 OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
-M:     Rajendra Nayak <rnayak@codeaurora.org>
 M:     Paul Walmsley <paul@pwsan.com>
 L:     linux-omap@vger.kernel.org
 S:     Maintained
@@ -14656,7 +14665,6 @@ F:      drivers/rtc/rtc-optee.c
 
 OPA-VNIC DRIVER
 M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
-M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/ulp/opa_vnic
@@ -14687,7 +14695,7 @@ F:      scripts/dtc/
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:     Rob Herring <robh+dt@kernel.org>
-M:     Krzysztof Kozlowski <krzk+dt@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
 L:     devicetree@vger.kernel.org
 S:     Maintained
 C:     irc://irc.libera.chat/devicetree
@@ -15467,7 +15475,8 @@ F:      tools/perf/
 PERFORMANCE EVENTS TOOLING ARM64
 R:     John Garry <john.garry@huawei.com>
 R:     Will Deacon <will@kernel.org>
-R:     Mathieu Poirier <mathieu.poirier@linaro.org>
+R:     James Clark <james.clark@arm.com>
+R:     Mike Leach <mike.leach@linaro.org>
 R:     Leo Yan <leo.yan@linaro.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
@@ -15599,7 +15608,7 @@ F:      drivers/pinctrl/renesas/
 
 PIN CONTROLLER - SAMSUNG
 M:     Tomasz Figa <tomasz.figa@gmail.com>
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -15607,6 +15616,7 @@ L:      linux-samsung-soc@vger.kernel.org
 S:     Maintained
 C:     irc://irc.libera.chat/linux-exynos
 Q:     https://patchwork.kernel.org/project/linux-samsung-soc/list/
+B:     mailto:linux-samsung-soc@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git
 F:     Documentation/devicetree/bindings/pinctrl/samsung,pinctrl*yaml
 F:     drivers/pinctrl/samsung/
@@ -16098,7 +16108,6 @@ F:      include/uapi/linux/qemu_fw_cfg.h
 
 QIB DRIVER
 M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
-M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/qib/
@@ -16616,7 +16625,6 @@ F:      drivers/net/ethernet/rdc/r6040.c
 
 RDMAVT - RDMA verbs software
 M:     Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
-M:     Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/sw/rdmavt
@@ -17011,8 +17019,7 @@ S:      Odd Fixes
 F:     drivers/tty/serial/rp2.*
 
 ROHM BD99954 CHARGER IC
-R:     Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
-L:     linux-power@fi.rohmeurope.com
+R:     Matti Vaittinen <mazziesaccount@gmail.com>
 S:     Supported
 F:     drivers/power/supply/bd99954-charger.c
 F:     drivers/power/supply/bd99954-charger.h
@@ -17035,8 +17042,7 @@ F:      drivers/regulator/bd9571mwv-regulator.c
 F:     include/linux/mfd/bd9571mwv.h
 
 ROHM POWER MANAGEMENT IC DEVICE DRIVERS
-R:     Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
-L:     linux-power@fi.rohmeurope.com
+R:     Matti Vaittinen <mazziesaccount@gmail.com>
 S:     Supported
 F:     drivers/clk/clk-bd718x7.c
 F:     drivers/gpio/gpio-bd71815.c
@@ -17278,7 +17284,7 @@ W:      http://www.ibm.com/developerworks/linux/linux390/
 F:     drivers/s390/scsi/zfcp_*
 
 S3C ADC BATTERY DRIVER
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-samsung-soc@vger.kernel.org
 S:     Odd Fixes
 F:     drivers/power/supply/s3c_adc_battery.c
@@ -17323,15 +17329,16 @@ F:    Documentation/admin-guide/LSM/SafeSetID.rst
 F:     security/safesetid/
 
 SAMSUNG AUDIO (ASoC) DRIVERS
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
+B:     mailto:linux-samsung-soc@vger.kernel.org
 F:     Documentation/devicetree/bindings/sound/samsung*
 F:     sound/soc/samsung/
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -17366,11 +17373,12 @@ S:    Maintained
 F:     drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-kernel@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Supported
+B:     mailto:linux-samsung-soc@vger.kernel.org
 F:     Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml
 F:     Documentation/devicetree/bindings/mfd/samsung,s2m*.yaml
 F:     Documentation/devicetree/bindings/mfd/samsung,s5m*.yaml
@@ -17392,7 +17400,7 @@ F:      drivers/media/platform/samsung/s3c-camif/
 F:     include/media/drv-intf/s3c_camif.h
 
 SAMSUNG S3FWRN5 NFC DRIVER
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Krzysztof Opasiak <k.opasiak@samsung.com>
 L:     linux-nfc@lists.01.org (subscribers-only)
 S:     Maintained
@@ -17414,7 +17422,7 @@ S:      Supported
 F:     drivers/media/i2c/s5k5baf.c
 
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Vladimir Zapolskiy <vz@mleia.com>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
@@ -17449,7 +17457,7 @@ F:      include/linux/clk/samsung.h
 F:     include/linux/platform_data/clk-s3c2410.h
 
 SAMSUNG SPI DRIVERS
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 M:     Andi Shyti <andi@etezian.org>
 L:     linux-spi@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
@@ -17467,7 +17475,7 @@ F:      drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
 M:     Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
 L:     linux-pm@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -17651,8 +17659,8 @@ K:      \bTIF_SECCOMP\b
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
 M:     Al Cooper <alcooperx@gmail.com>
+R:     Broadcom Kernel Team <bcm-kernel-feedback-list@broadcom.com>
 L:     linux-mmc@vger.kernel.org
-L:     bcm-kernel-feedback-list@broadcom.com
 S:     Maintained
 F:     drivers/mmc/host/sdhci-brcmstb*
 
@@ -18785,7 +18793,7 @@ F:      include/dt-bindings/reset/starfive-jh7100.h
 
 STATIC BRANCH/CALL
 M:     Peter Zijlstra <peterz@infradead.org>
-M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Josh Poimboeuf <jpoimboe@kernel.org>
 M:     Jason Baron <jbaron@akamai.com>
 R:     Steven Rostedt <rostedt@goodmis.org>
 R:     Ard Biesheuvel <ardb@kernel.org>
@@ -19786,7 +19794,7 @@ F:      drivers/net/wireless/ti/
 F:     include/linux/wl12xx.h
 
 TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
-M:     John Stultz <john.stultz@linaro.org>
+M:     John Stultz <jstultz@google.com>
 M:     Thomas Gleixner <tglx@linutronix.de>
 R:     Stephen Boyd <sboyd@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -21118,7 +21126,7 @@ F:      include/linux/regulator/
 K:     regulator_get_optional
 
 VOLTAGE AND CURRENT REGULATOR IRQ HELPERS
-R:     Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+R:     Matti Vaittinen <mazziesaccount@gmail.com>
 F:     drivers/regulator/irq_helpers.c
 
 VRF
@@ -21231,10 +21239,8 @@ S:     Maintained
 F:     drivers/hid/hid-wiimote*
 
 WILOCITY WIL6210 WIRELESS DRIVER
-M:     Maya Erez <merez@codeaurora.org>
 L:     linux-wireless@vger.kernel.org
-L:     wil6210@qti.qualcomm.com
-S:     Supported
+S:     Orphan
 W:     https://wireless.wiki.kernel.org/en/users/Drivers/wil6210
 F:     drivers/net/wireless/ath/wil6210/
 
@@ -21438,6 +21444,15 @@ F:     arch/x86/include/asm/uv/
 F:     arch/x86/kernel/apic/x2apic_uv_x.c
 F:     arch/x86/platform/uv/
 
+X86 STACK UNWINDING
+M:     Josh Poimboeuf <jpoimboe@kernel.org>
+M:     Peter Zijlstra <peterz@infradead.org>
+S:     Supported
+F:     arch/x86/include/asm/unwind*.h
+F:     arch/x86/kernel/dumpstack.c
+F:     arch/x86/kernel/stacktrace.c
+F:     arch/x86/kernel/unwind_*.c
+
 X86 VDSO
 M:     Andy Lutomirski <luto@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -21600,7 +21615,6 @@ F:      drivers/xen/*swiotlb*
 XFS FILESYSTEM
 C:     irc://irc.oftc.net/xfs
 M:     Darrick J. Wong <djwong@kernel.org>
-M:     linux-xfs@vger.kernel.org
 L:     linux-xfs@vger.kernel.org
 S:     Supported
 W:     http://xfs.org/
index 8c7de9a72ea26f88316c70279164056897631541..7d5b0bfe79602de49777f68a7e0e9d38da485767 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 18
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
 NAME = Superb Owl
 
 # *DOCUMENTATION*
index 29b0167c088b8af08b7fd53f08b32f4d15296e76..513a29c62a9b37b25c163b220924776e85f8de59 100644 (file)
@@ -35,6 +35,7 @@ config KPROBES
        depends on MODULES
        depends on HAVE_KPROBES
        select KALLSYMS
+       select TASKS_RCU if PREEMPTION
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
@@ -854,10 +855,8 @@ config HAVE_ARCH_HUGE_VMAP
 
 #
 #  Archs that select this would be capable of PMD-sized vmaps (i.e.,
-#  arch_vmap_pmd_supported() returns true), and they must make no assumptions
-#  that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
-#  can be used to prohibit arch-specific allocations from using hugepages to
-#  help with this (e.g., modules may require it).
+#  arch_vmap_pmd_supported() returns true). The VM_ALLOW_HUGE_VMAP flag
+#  must be used to enable allocations to use hugepages.
 #
 config HAVE_ARCH_HUGE_VMALLOC
        depends on HAVE_ARCH_HUGE_VMAP
index dcaa44e408ace2adc3bdad5c8545ccaf046d55f9..f48ba03e9b5e7dbf440571ebed1f15a11974abac 100644 (file)
                        cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
                                   <&creg_gpio 1 GPIO_ACTIVE_LOW>;
 
-                       spi-flash@0 {
+                       flash@0 {
                                compatible = "sst26wf016b", "jedec,spi-nor";
                                reg = <0>;
                                #address-cells = <1>;
index 088d348781c1c62847ccc389377bbe940f3548db..1b0ffaeee16d0e2efb720a91b8a7b3d0eb7d815a 100644 (file)
@@ -5,7 +5,7 @@
 
 #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
-#define ATOMIC_OP(op, c_op, asm_op)                                    \
+#define ATOMIC_OP(op, asm_op)                                  \
 static inline void arch_atomic_##op(int i, atomic_t *v)                        \
 {                                                                      \
        unsigned int val;                                               \
@@ -21,7 +21,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v)                       \
        : "cc");                                                        \
 }                                                                      \
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
+#define ATOMIC_OP_RETURN(op, asm_op)                           \
 static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)        \
 {                                                                      \
        unsigned int val;                                               \
@@ -42,7 +42,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)       \
 #define arch_atomic_add_return_relaxed         arch_atomic_add_return_relaxed
 #define arch_atomic_sub_return_relaxed         arch_atomic_sub_return_relaxed
 
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+#define ATOMIC_FETCH_OP(op, asm_op)                            \
 static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
 {                                                                      \
        unsigned int val, orig;                                         \
@@ -69,23 +69,23 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)      \
 #define arch_atomic_fetch_or_relaxed           arch_atomic_fetch_or_relaxed
 #define arch_atomic_fetch_xor_relaxed          arch_atomic_fetch_xor_relaxed
 
-#define ATOMIC_OPS(op, c_op, asm_op)                                   \
-       ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
-       ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(op, asm_op)                                 \
+       ATOMIC_OP(op, asm_op)                                   \
+       ATOMIC_OP_RETURN(op, asm_op)                            \
+       ATOMIC_FETCH_OP(op, asm_op)
 
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
 
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)                                   \
-       ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(op, asm_op)                                 \
+       ATOMIC_OP(op, asm_op)                                   \
+       ATOMIC_FETCH_OP(op, asm_op)
 
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
 
 #define arch_atomic_andnot             arch_atomic_andnot
 
index 7848348719b26fbbc90b61cff8cf3b1a99ebe6cf..64ca25d199beaa28b91be8ab17f74b953ac7004f 100644 (file)
@@ -98,9 +98,6 @@
 /*
  * 1st level paging: pgd
  */
-#define pgd_index(addr)                ((addr) >> PGDIR_SHIFT)
-#define pgd_offset(mm, addr)   (((mm)->pgd) + pgd_index(addr))
-#define pgd_offset_k(addr)     pgd_offset(&init_mm, addr)
 #define pgd_ERROR(e) \
        pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
index 03f8b1be0c3a82ba27b54e0ba96243a4766f8b66..1e1db51b69414362f67e62813df484a39f1690ec 100644 (file)
@@ -366,7 +366,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
        case op_SP:     /* LD_S|LDB_S b,[sp,u7], ST_S|STB_S b,[sp,u7] */
                /* note: we are ignoring possibility of:
                 * ADD_S, SUB_S, PUSH_S, POP_S as these should not
-                * cause unaliged exception anyway */
+                * cause unaligned exception anyway */
                state->write = BITS(state->words[0], 6, 6);
                state->zz = BITS(state->words[0], 5, 5);
                if (state->zz)
@@ -503,7 +503,6 @@ int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
 {
        struct disasm_state instr;
 
-       memset(&instr, 0, sizeof(struct disasm_state));
        disasm_instr(pc, &instr, 0, regs, cregs);
 
        *next_pc = pc + instr.instr_len;
index dd77a0c8f740b6c8be4c688a909b825467e3ff5d..66ba549b520fc02a5518be15c795ae466b6539ee 100644 (file)
@@ -196,6 +196,7 @@ tracesys_exit:
        st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
        ;POST Sys Call Ptrace Hook
+       mov r0, sp              ; pt_regs needed
        bl  @syscall_trace_exit
        b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
        ; we'd done before calling post hook above
index f748483628f2c22ec98408c056d417f0fe580179..3c1590c27fae3b3e72d37d01a25c5ca691c29bf3 100644 (file)
@@ -319,7 +319,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
        regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
 
        /*
-        * handler returns using sigreturn stub provided already by userpsace
+        * handler returns using sigreturn stub provided already by userspace
         * If not, nuke the process right away
         */
        if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
index 78e6d069b1c1279eb556b143fe6413ac5857208e..d947473f1e6da5f906ba25c7841c99bb981055ad 100644 (file)
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
 
 struct plat_smp_ops  __weak plat_smp_ops;
 
-/* XXX: per cpu ? Only needed once in early seconday boot */
+/* XXX: per cpu ? Only needed once in early secondary boot */
 struct task_struct *secondary_idle_tsk;
 
 /* Called from start_kernel */
@@ -274,7 +274,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
         * and read back old value
         */
        do {
-               new = old = READ_ONCE(*ipi_data_ptr);
+               new = old = *ipi_data_ptr;
                new |= 1U << msg;
        } while (cmpxchg(ipi_data_ptr, old, new) != old);
 
index d63ebd81f1c6d65bd98350aa4b3579c208a56093..99a9b92ed98d629e75783f71eef34f72d84800fd 100644 (file)
@@ -237,7 +237,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
        if (state.fault)
                goto fault;
 
-       /* clear any remanants of delay slot */
+       /* clear any remnants of delay slot */
        if (delay_mode(regs)) {
                regs->ret = regs->bta & ~1U;
                regs->status32 &= ~STATUS_DE_MASK;
index 8aa1231865d15a39b52c11e63eac1b86686d0d11..5446967ea98d3c1715cf5cbb6acaba7ecf2df108 100644 (file)
@@ -401,7 +401,7 @@ static inline void __before_dc_op(const int op)
 {
        if (op == OP_FLUSH_N_INV) {
                /* Dcache provides 2 cmd: FLUSH or INV
-                * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+                * INV in turn has sub-modes: DISCARD or FLUSH-BEFORE
                 * flush-n-inv is achieved by INV cmd but with IM=1
                 * So toggle INV sub-mode depending on op request and default
                 */
index c9629cb5ccd1eeba1729040b8d5e8bd420db313a..7da42a5b959cf5ceee61b0a560a4ac137fc3d4db 100644 (file)
                                compatible = "ti,am3359-tscadc";
                                reg = <0x0 0x1000>;
                                interrupts = <16>;
+                               clocks = <&adc_tsc_fck>;
+                               clock-names = "fck";
                                status = "disabled";
                                dmas = <&edma 53 0>, <&edma 57 0>;
                                dma-names = "fifo0", "fifo1";
index 0d2fac98ce7d2355a4195d43b964cf1e7cebd45c..c8b80f156ec981bac2de6c8440deb878179ec7aa 100644 (file)
 
        /* HS USB Host PHY on PORT 1 */
        hsusb1_phy: hsusb1_phy {
+               pinctrl-names = "default";
+               pinctrl-0 = <&hsusb1_rst_pins>;
                compatible = "usb-nop-xceiv";
                reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */
                #phy-cells = <0>;
 };
 
 &davinci_emac {
-            status = "okay";
+       pinctrl-names = "default";
+       pinctrl-0 = <&ethernet_pins>;
+       status = "okay";
 };
 
 &davinci_mdio {
 };
 
 &i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c2_pins>;
        clock-frequency = <400000>;
        /* User DIP swithes [1:8] / User LEDS [1:2] */
        tca6416: gpio@21 {
 };
 
 &i2c3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c3_pins>;
        clock-frequency = <400000>;
 };
 
 };
 
 &usbhshost {
+       pinctrl-names = "default";
+       pinctrl-0 = <&hsusb1_pins>;
        port1-mode = "ehci-phy";
 };
 
 };
 
 &omap3_pmx_core {
-       pinctrl-names = "default";
-       pinctrl-0 = <&hsusb1_rst_pins>;
+
+       ethernet_pins: pinmux_ethernet_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */
+                       OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */
+                       OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */
+                       OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */
+                       OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */
+                       OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */
+                       OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */
+                       OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */
+                       OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */
+                       OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */
+               >;
+       };
+
+       i2c2_pins: pinmux_i2c2_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c2_scl */
+                       OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c2_sda */
+               >;
+       };
+
+       i2c3_pins: pinmux_i2c3_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c3_scl */
+                       OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c3_sda */
+               >;
+       };
 
        leds_pins: pinmux_leds_pins {
                pinctrl-single,pins = <
 };
 
 &omap3_pmx_core2 {
-       pinctrl-names = "default";
-       pinctrl-0 = <&hsusb1_pins>;
 
        hsusb1_pins: pinmux_hsusb1_pins {
                pinctrl-single,pins = <
index 8b669e2eafec4813fa8294c7de5a8d079b30189c..f7b680f6c48adba4d8e6210d4be3b460e7426998 100644 (file)
@@ -69,6 +69,8 @@
 };
 
 &i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
        clock-frequency = <400000>;
 
        s35390a: s35390a@30 {
 
 &omap3_pmx_core {
 
+       i2c1_pins: pinmux_i2c1_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c1_scl */
+                       OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0)  /* i2c1_sda */
+               >;
+       };
+
        wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins {
                pinctrl-single,pins = <
                        OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4)  /* mmc1_dat7.gpio_129 */
index e71ccfd1df631209c17fe9c42da3dbbae09466d8..ff4c07c69af1cd291de0aff45ee2fd3ecaa6b1ac 100644 (file)
        lm25066@40 {
                compatible = "lm25066";
                reg = <0x40>;
+               shunt-resistor-micro-ohms = <1000>;
        };
 
        /* 12VSB PMIC */
        lm25066@41 {
                compatible = "lm25066";
                reg = <0x41>;
+               shunt-resistor-micro-ohms = <10000>;
        };
 };
 
        gpio-line-names =
                /*  A */ "LOCATORLED_STATUS_N", "BMC_MAC2_INTB", "NMI_BTN_N", "BMC_NMI",
                        "", "", "", "",
-               /*  B */ "DDR_MEM_TEMP", "", "", "", "", "", "", "",
+               /*  B */ "POST_COMPLETE_N", "", "", "", "", "", "", "",
                /*  C */ "", "", "", "", "PCIE_HP_SEL_N", "PCIE_SATA_SEL_N", "LOCATORBTN", "",
                /*  D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON",
                        "", "", "", "PSU_FAN_FAIL_N",
index e4775bbceecc6143927b3954e3c99d44018a55c4..7cd4f075e32501158f373e2057c76e1126f69d05 100644 (file)
                groups = "FWSPID";
        };
 
-       pinctrl_fwqspid_default: fwqspid_default {
-               function = "FWSPID";
-               groups = "FWQSPID";
+       pinctrl_fwqspi_default: fwqspi_default {
+               function = "FWQSPI";
+               groups = "FWQSPI";
        };
 
        pinctrl_fwspiwp_default: fwspiwp_default {
        };
 
        pinctrl_qspi1_default: qspi1_default {
-               function = "QSPI1";
+               function = "SPI1";
                groups = "QSPI1";
        };
 
        pinctrl_qspi2_default: qspi2_default {
-               function = "QSPI2";
+               function = "SPI2";
                groups = "QSPI2";
        };
 
index 3d5ce9da42c3c2bc282f1df808ae6161e086cba0..9d2a0ce4ca061d7ffd47e501ba11623a43101f71 100644 (file)
                                reg = <0x1e6f2000 0x1000>;
                        };
 
+                       video: video@1e700000 {
+                               compatible = "aspeed,ast2600-video-engine";
+                               reg = <0x1e700000 0x1000>;
+                               clocks = <&syscon ASPEED_CLK_GATE_VCLK>,
+                                        <&syscon ASPEED_CLK_GATE_ECLK>;
+                               clock-names = "vclk", "eclk";
+                               interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+                               status = "disabled";
+                       };
+
                        gpio0: gpio@1e780000 {
                                #gpio-cells = <2>;
                                gpio-controller;
index c1c8650dafce3d18e4e9fda013f13c952aaf667a..3542ad8a243ea9daad77ff2bdd805e99f6982b12 100644 (file)
@@ -44,7 +44,7 @@
        status = "okay";
 
        /* spi0.0: 4M Flash Macronix MX25R4035FM1IL0 */
-       spi-flash@0 {
+       flash@0 {
                compatible = "mxicy,mx25u4035", "jedec,spi-nor";
                spi-max-frequency = <33000000>;
                reg = <0>;
index 47a00062f01f1ed13faf804e37c9099aacb0016e..9cf60b6f695c461835dcff0ce92c362b2b52204a 100644 (file)
        cs-gpios = <&pioA 3 GPIO_ACTIVE_HIGH>, <&pioC 11 GPIO_ACTIVE_LOW>, <0>, <0>;
        status = "okay";
 
-       m25p80@0 {
+       flash@0 {
                compatible = "jedec,spi-nor";
                spi-max-frequency = <20000000>;
                reg = <0>;
index 1e2a28c2f365d39a685f4524bcf0feced1dd431c..2fb51b9aca2ae3f78a7c21a5c962421895e78db8 100644 (file)
                nand0: nand@40000000 {
                        nand-bus-width = <8>;
                        nand-ecc-mode = "soft";
-                       nand-on-flash-bbt = <1>;
+                       nand-on-flash-bbt;
                        status = "okay";
                };
 
index 21c86171e4626143da9839e1d55e4a36ff39ac6f..ba621783acdbcc3daa32e4b14d602e6ced226c1a 100644 (file)
        pinctrl-0 = <&pinctrl_qspi1_default>;
        status = "disabled";
 
-       qspi1_flash: spi_flash@0 {
+       qspi1_flash: flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
                compatible = "jedec,spi-nor";
index c145c4e5ef582e3c59beced917d3986dfcb35089..5e8755f22784f3ef133ac87dd1e085f82e8d41d5 100644 (file)
 &qspi1 {
        status = "okay";
 
-       qspi1_flash: spi_flash@0 {
+       qspi1_flash: flash@0 {
                status = "okay";
        };
 };
index 9bf2ec0ba3e2fcaede51572dd328e9ce607c4cdb..cdfe891f9a9e65be58b660b3461724cfafedc5ad 100644 (file)
                                pinctrl-0 = <&pinctrl_spi0_default>;
                                status = "okay";
 
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        reg = <0>;
                                        spi-max-frequency = <50000000>;
index d72c042f2850790f1c23d958933fb8803be5946e..a49c2966b41e259d4d3c5353dadc01291b84813b 100644 (file)
@@ -57,8 +57,8 @@
                        };
 
                        spi0: spi@f0004000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_spi0_cs>;
+                               pinctrl-names = "default", "cs";
+                               pinctrl-1 = <&pinctrl_spi0_cs>;
                                cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
                                status = "okay";
                        };
                        };
 
                        spi1: spi@f8008000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_spi1_cs>;
+                               pinctrl-names = "default", "cs";
+                               pinctrl-1 = <&pinctrl_spi1_cs>;
                                cs-gpios = <&pioC 25 0>;
                                status = "okay";
                        };
index 710cb72bda5aeab56b8d711beda8ec1d3f38c3d6..fd1086f52b404bd35f37db3ffebf4274a50923fb 100644 (file)
@@ -49,7 +49,7 @@
                                cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
                                status = "okay";
 
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index d241c24f0d836a1ae61f4f8f8922985fd3966b36..e519d27479362ba81ef672a4616d8f8fb0d193da 100644 (file)
@@ -81,8 +81,8 @@
                        };
 
                        spi1: spi@fc018000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_spi0_cs>;
+                               pinctrl-names = "default", "cs";
+                               pinctrl-1 = <&pinctrl_spi1_cs>;
                                cs-gpios = <&pioB 21 0>;
                                status = "okay";
                        };
                                                atmel,pins =
                                                        <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
                                        };
-                                       pinctrl_spi0_cs: spi0_cs_default {
+                                       pinctrl_spi1_cs: spi1_cs_default {
                                                atmel,pins =
                                                        <AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
                                        };
index fe432b6b7e951d388066898cef9e544d90feef4e..7017f626f362d7b645dae64d8736ea8e42df5633 100644 (file)
@@ -65,7 +65,7 @@
                        spi0: spi@f8010000 {
                                cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
                                status = "okay";
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index 08685a10eda1d67ea61e3dd2f03aa770d39c4716..d83f76a6cd6adf5ca6f36cf771f333b40dca9894 100644 (file)
        pinctrl_flx3_default: flx3_default {
                pinmux = <PIN_PD16__FLEXCOM3_IO0>,
                         <PIN_PD17__FLEXCOM3_IO1>;
-               bias-disable;
+               bias-pull-up;
        };
 
        pinctrl_flx4_default: flx4_default {
                         <PIN_PB21__QSPI0_INT>;
                bias-disable;
                slew-rate = <0>;
-               atmel,drive-strength = <ATMEL_PIO_DRVSTR_HI>;
+               atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>;
        };
 
        pinctrl_sdmmc0_default: sdmmc0_default {
index a51a3372afa12be4a29c89b3a7f320032642931d..ebeaa6ab500e4e22960a370d8931eecf3a4780d4 100644 (file)
@@ -59,7 +59,7 @@
                        spi0: spi@f8010000 {
                                cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
                                status = "okay";
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "n25q32b", "jedec,spi-nor";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index e1ef4e44e663d2c7c794dc54e48138ba60922017..4624a6f076f8155f329bb9eb0e3cbe59ee3e3d6a 100644 (file)
@@ -73,7 +73,7 @@
                        spi0: spi@fffe0000 {
                                status = "okay";
                                cs-gpios = <&pioA 3 0>, <0>, <0>, <0>;
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <15000000>;
                                        reg = <0>;
@@ -94,7 +94,7 @@
                        status = "okay";
                };
 
-               nor_flash@10000000 {
+               flash@10000000 {
                        compatible = "cfi-flash";
                        reg = <0x10000000 0x800000>;
                        linux,mtd-name = "physmap-flash.0";
index ce96345d28a396e6fb2a76ed1a39e6b5d1e03387..6381088ba24f3ef440264c58f8d17741872ca9f7 100644 (file)
@@ -92,7 +92,7 @@
 
                        spi0: spi@fffc8000 {
                                cs-gpios = <0>, <&pioC 11 0>, <0>, <0>;
-                               mtd_dataflash@1 {
+                               flash@1 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <50000000>;
                                        reg = <1>;
index beed819609e8d365add7df4cbf1af61f39354ed2..8f11c0b7d76d7bca38f274d3743fbc0ed7819b4f 100644 (file)
                                cs-gpios = <&pioA 3 0>, <0>, <&pioA 28 0>, <0>;
                                status = "okay";
 
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        reg = <0>;
                                        spi-max-frequency = <15000000>;
index 71f60576761a0cc35519582a7c6f18e8074d48cb..42e7340202355caca30666fd1202433ec816bd68 100644 (file)
@@ -95,7 +95,7 @@
                        spi0: spi@fffa4000 {
                                status = "okay";
                                cs-gpios = <&pioA 5 0>, <0>, <0>, <0>;
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index 87bb39060e8becd02c140e5d5effde8fef8295f0..85c17dd1c8d5caa3f271e17a20b94b7e80a14098 100644 (file)
 
                        spi0: spi@fffc8000 {
                                cs-gpios = <0>, <&pioC 11 0>, <0>, <0>;
-                               mtd_dataflash@1 {
+                               flash@1 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <50000000>;
                                        reg = <1>;
                24c512@50 {
                        compatible = "atmel,24c512";
                        reg = <0x50>;
+                       vcc-supply = <&reg_3v3>;
                };
 
                wm8731: wm8731@1b {
                        compatible = "wm8731";
                        reg = <0x1b>;
+
+                       /* PCK0 at 12MHz */
+                       clocks = <&pmc PMC_TYPE_SYSTEM 8>;
+                       clock-names = "mclk";
+                       assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>;
+                       assigned-clock-rates = <12000000>;
+
+                       HPVDD-supply = <&vcc_dac>;
+                       AVDD-supply = <&vcc_dac>;
+                       DCVDD-supply = <&reg_3v3>;
+                       DBVDD-supply = <&reg_3v3>;
                };
        };
 
                atmel,ssc-controller = <&ssc0>;
                atmel,audio-codec = <&wm8731>;
        };
+
+       reg_5v: fixedregulator0 {
+               compatible = "regulator-fixed";
+               regulator-name = "5V";
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+       };
+
+       reg_3v3: fixedregulator1 {
+               compatible = "regulator-fixed";
+               regulator-name = "3V3";
+               vin-supply = <&reg_5v>;
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+       };
+
+       reg_1v: fixedregulator2 {
+               compatible = "regulator-fixed";
+               regulator-name = "1V";
+               vin-supply = <&reg_5v>;
+               regulator-min-microvolt = <1000000>;
+               regulator-max-microvolt = <1000000>;
+       };
+
+       vcc_dac: fixedregulator3 {
+               compatible = "regulator-fixed";
+               regulator-name = "VCC_DAC";
+               vin-supply = <&reg_3v3>;
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+       };
 };
index b6256a20fbc78421b132fb719969dc40a7f5980c..e5db198a87a85e816f00bf460c2655f23c7f1912 100644 (file)
                        spi0: spi@fffa4000{
                                status = "okay";
                                cs-gpios = <&pioB 3 0>, <0>, <0>, <0>;
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <13000000>;
                                        reg = <0>;
index 2bc4e6e0a923bfb1e2d076facbb99dbd06886a37..c905d7bfc771f1c9ceb89f2a4f33401b79e62fe2 100644 (file)
                        spi0: spi@f0000000 {
                                status = "okay";
                                cs-gpios = <&pioA 14 0>, <0>, <0>, <0>;
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index 62981b39c8159255400a8de1292857bf60da994a..d74b8d9d84aa41473889e8c3bd3f4c6c6a68559a 100644 (file)
                        spi0: spi@fffcc000 {
                                status = "okay";
                                cs-gpios = <&pioA 28 0>, <0>, <0>, <0>;
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        spi-max-frequency = <15000000>;
                                        reg = <0>;
index 6d1264de606069c41116d1c330caab15e472ccde..5f4eaa618ab47c7ee5b5cb3a43170544afbbc730 100644 (file)
        cs-gpios = <&pioA 14 0>, <0>, <0>, <0>;
        status = "disabled"; /* conflicts with mmc1 */
 
-       m25p80@0 {
+       flash@0 {
                compatible = "atmel,at25df321a";
                spi-max-frequency = <50000000>;
                reg = <0>;
index 87c517d65f62fee6b439c8d32fcd9361e52a3f3e..e9aecac4f5b5b462d2c077d34d377bf1fa1437b1 100644 (file)
        status = "okay";
        pinctrl-names = "default";
        pinctrl-0 = <&spi1_pins &spi1_cs0_pin>;
-       flash: m25p80@0 {
+       flash: flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
                compatible = "jedec,spi-nor";
index 5126e2d72ed7893bb76ac9eb807aba84b21c6443..778796c10af86d2e9a5bd841426db0e68fb6396a 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&mcspi1_pins>;
 
-       m25p80@0 {
+       flash@0 {
                compatible = "w25x32";
                spi-max-frequency = <48000000>;
                reg = <0>;
index 0a11bacffc1f17a76f9469a8af69da3a7deeadef..5733e3a4ea8e71e54a5c0bc3750ccb57f9470271 100644 (file)
                        reg = <0x1d0010 0x4>;
                        reg-names = "sysc";
                        ti,sysc-midle = <SYSC_IDLE_FORCE>,
-                                       <SYSC_IDLE_NO>,
-                                       <SYSC_IDLE_SMART>;
+                                       <SYSC_IDLE_NO>;
                        ti,sysc-sidle = <SYSC_IDLE_FORCE>,
                                        <SYSC_IDLE_NO>,
                                        <SYSC_IDLE_SMART>;
+                       power-domains = <&prm_vpe>;
                        clocks = <&vpe_clkctrl DRA7_VPE_VPE_CLKCTRL 0>;
                        clock-names = "fck";
                        #address-cells = <1>;
index 097ec35c62d808c74298ecedebc82ee54da79c2e..0d58da1c0cc51ae417f443d05473c08c77520d2a 100644 (file)
@@ -26,7 +26,7 @@
                                pinctrl-0 = <&mmc0_4bit_pins_a
                                             &mmc0_sck_cfg
                                             &en_sd_pwr>;
-                               broken-cd = <1>;
+                               broken-cd;
                                bus-width = <4>;
                                vmmc-supply = <&reg_vddio_sd0>;
                                status = "okay";
index ed2739e390856b68fd1bff0608b11f09f5e5876e..bd763bae596b0222bdd749c77fd3a1587fb18533 100644 (file)
        codec: sgtl5000@a {
                compatible = "fsl,sgtl5000";
                reg = <0x0a>;
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinctrl_sgtl5000>;
                clocks = <&clks IMX6QDL_CLK_CKO>;
                VDDA-supply = <&reg_module_3v3_audio>;
                VDDIO-supply = <&reg_module_3v3>;
                        MX6QDL_PAD_DISP0_DAT21__AUD4_TXD        0x130b0
                        MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS       0x130b0
                        MX6QDL_PAD_DISP0_DAT23__AUD4_RXD        0x130b0
-                       /* SGTL5000 sys_mclk */
-                       MX6QDL_PAD_GPIO_5__CCM_CLKO1            0x130b0
                >;
        };
 
                >;
        };
 
+       pinctrl_sgtl5000: sgtl5000grp {
+               fsl,pins = <
+                       MX6QDL_PAD_GPIO_5__CCM_CLKO1    0x130b0
+               >;
+       };
+
        pinctrl_spdif: spdifgrp {
                fsl,pins = <
                        MX6QDL_PAD_GPIO_16__SPDIF_IN  0x1b0b0
index 563bf9d44fe0d0fc6c1c18d133fe75feb79263fe..0b90c3f59f8987a0521a7a97246b25cecc2cd12a 100644 (file)
                regulators {
                        bcore1 {
                                regulator-name = "bcore1";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        bcore2 {
                                regulator-name = "bcore2";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        bpro {
                                regulator-name = "bpro";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        bperi {
                                regulator-name = "bperi";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        bmem {
                                regulator-name = "bmem";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo2 {
                                regulator-name = "ldo2";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <1800000>;
                        };
 
                        ldo3 {
                                regulator-name = "ldo3";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo4 {
                                regulator-name = "ldo4";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo5 {
                                regulator-name = "ldo5";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo6 {
                                regulator-name = "ldo6";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo7 {
                                regulator-name = "ldo7";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo8 {
                                regulator-name = "ldo8";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo9 {
                                regulator-name = "ldo9";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo10 {
                                regulator-name = "ldo10";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        ldo11 {
                                regulator-name = "ldo11";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <300000>;
                                regulator-max-microvolt = <3300000>;
                        };
 
                        bio {
                                regulator-name = "bio";
-                               regulator-always-on = <1>;
+                               regulator-always-on;
                                regulator-min-microvolt = <1800000>;
                                regulator-max-microvolt = <1800000>;
                        };
index 7cda6944501d94db8900d62a2413b6b610902a5f..205e4d4627028c3ec5e71851799f740fb2b36c8e 100644 (file)
@@ -72,8 +72,8 @@
                        st,settling = <2>;
                        st,fraction-z = <7>;
                        st,i-drive = <1>;
-                       touchscreen-inverted-x = <1>;
-                       touchscreen-inverted-y = <1>;
+                       touchscreen-inverted-x;
+                       touchscreen-inverted-y;
                };
        };
 };
index 7f35a06dff95b5430cf8a1a6e3c9c0f809b1e91f..951a2a6c5a65783db36e66124ea1f7dd0d71c48a 100644 (file)
@@ -37,7 +37,7 @@
 
        reg_sd1_vmmc: regulator-sd1-vmmc {
                compatible = "regulator-gpio";
-               gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>;
+               gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>;
                pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_snvs_reg_sd>;
                regulator-always-on;
index 2a0a98fe67f06ea46f19a3bc3f6f7bab0ff80f19..3240c67e0c3920c802425b3a947a5bc109d81973 100644 (file)
        model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
        compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
 };
+
+&omap3_pmx_core2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&hsusb2_2_pins>;
+       hsusb2_2_pins: pinmux_hsusb2_2_pins {
+               pinctrl-single,pins = <
+                       OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3)            /* etk_d10.hsusb2_clk */
+                       OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3)            /* etk_d11.hsusb2_stp */
+                       OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d12.hsusb2_dir */
+                       OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d13.hsusb2_nxt */
+                       OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d14.hsusb2_data0 */
+                       OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d15.hsusb2_data1 */
+               >;
+       };
+};
index a604d92221a4f3817e56e2ddf70df5d0d6097882..c757f0d7781c1b093717cb538c701d009131a6c2 100644 (file)
        model = "LogicPD Zoom DM3730 SOM-LV Development Kit";
        compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3";
 };
+
+&omap3_pmx_core2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&hsusb2_2_pins>;
+       hsusb2_2_pins: pinmux_hsusb2_2_pins {
+               pinctrl-single,pins = <
+                       OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3)            /* etk_d10.hsusb2_clk */
+                       OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3)            /* etk_d11.hsusb2_stp */
+                       OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d12.hsusb2_dir */
+                       OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d13.hsusb2_nxt */
+                       OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d14.hsusb2_data0 */
+                       OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d15.hsusb2_data1 */
+               >;
+       };
+};
index b56524cc7fe27a2302676630607701de95713987..55b619c99e24d67ce8fa11ff53b003a6019a206b 100644 (file)
        };
 };
 
-&omap3_pmx_core2 {
-       pinctrl-names = "default";
-       pinctrl-0 = <&hsusb2_2_pins>;
-       hsusb2_2_pins: pinmux_hsusb2_2_pins {
-               pinctrl-single,pins = <
-                       OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3)            /* etk_d10.hsusb2_clk */
-                       OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3)            /* etk_d11.hsusb2_stp */
-                       OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d12.hsusb2_dir */
-                       OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d13.hsusb2_nxt */
-                       OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d14.hsusb2_data0 */
-                       OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3)    /* etk_d15.hsusb2_data1 */
-               >;
-       };
-};
-
 &uart2 {
        interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>;
        pinctrl-names = "default";
index b4664ab002566b5f1173cdeeebc1de78ecfc98ba..d3da8b1b473b8ee6aa9cac0882eef809bc8c2b26 100644 (file)
                gpmc,device-width = <2>;
                gpmc,wait-pin = <0>;
                gpmc,burst-length = <4>;
-               gpmc,cycle2cycle-samecsen = <1>;
-               gpmc,cycle2cycle-diffcsen = <1>;
+               gpmc,cycle2cycle-samecsen;
+               gpmc,cycle2cycle-diffcsen;
                gpmc,cs-on-ns = <0>;
                gpmc,cs-rd-off-ns = <45>;
                gpmc,cs-wr-off-ns = <45>;
index 7e3d8147e2c1cb6ee764337e3257a0beb13a4ea1..0365f06165e90f5ad7f40b48c30fd23647d73e80 100644 (file)
@@ -31,6 +31,8 @@
        aliases {
                display0 = &lcd;
                display1 = &tv0;
+               /delete-property/ mmc2;
+               /delete-property/ mmc3;
        };
 
        ldo_3v3: fixedregulator {
index cbe42c4153a0fd51f55e3df254c1e932249713bb..b4d286a6fab1c59c2f5c7a8909cee00c5ee427ab 100644 (file)
@@ -76,7 +76,7 @@
                pinconf {
                        pins = "gpio20", "gpio21";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
                pinconf {
                        pins = "gpio24", "gpio25";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
                pinconf {
                        pins = "gpio8", "gpio9";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
                pinconf {
                        pins = "gpio12", "gpio13";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
                pinconf {
                        pins = "gpio16", "gpio17";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
                pinconf {
                        pins = "gpio84", "gpio85";
                        drive-strength = <2>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
index 996f4458d9fc531a6d36b712ad969322bceaf7a6..8cb04aa8ed2fe9ab0f2cc122cdc72f6c3f484169 100644 (file)
 
                        snps,axi-config = <&stmmac_axi_setup>;
                        snps,pbl = <32>;
-                       snps,aal = <1>;
+                       snps,aal;
 
                        qcom,nss-common = <&nss_common>;
                        qcom,qsgmii-csr = <&qsgmii_csr>;
 
                        snps,axi-config = <&stmmac_axi_setup>;
                        snps,pbl = <32>;
-                       snps,aal = <1>;
+                       snps,aal;
 
                        qcom,nss-common = <&nss_common>;
                        qcom,qsgmii-csr = <&qsgmii_csr>;
 
                        snps,axi-config = <&stmmac_axi_setup>;
                        snps,pbl = <32>;
-                       snps,aal = <1>;
+                       snps,aal;
 
                        qcom,nss-common = <&nss_common>;
                        qcom,qsgmii-csr = <&qsgmii_csr>;
 
                        snps,axi-config = <&stmmac_axi_setup>;
                        snps,pbl = <32>;
-                       snps,aal = <1>;
+                       snps,aal;
 
                        qcom,nss-common = <&nss_common>;
                        qcom,qsgmii-csr = <&qsgmii_csr>;
index a499de8a7a6455df763d004c2fc60fc78e42704f..3652c9e2412442850bbc7e1d5b1097ec7a414a40 100644 (file)
@@ -26,7 +26,7 @@
                        spi0: spi@f0004000 {
                                dmas = <0>, <0>;        /*  Do not use DMA for spi0 */
 
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index fa9e5e2a745d39d8cebcb4ebd7b68c61b732b13a..5d9e97fecf834fbbee149ca8b830e0d7b568c782 100644 (file)
@@ -25,7 +25,7 @@
                        spi0: spi@f0004000 {
                                dmas = <0>, <0>;        /*  Do not use DMA for spi0 */
 
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "atmel,at25df321a";
                                        spi-max-frequency = <50000000>;
                                        reg = <0>;
index 4decd3a91a764774da31f3098c7b98bef637bca4..f691c8f08d0477830ca225670347db8c883ce74e 100644 (file)
                                #size-cells = <0>;
                                clocks = <&pmc PMC_TYPE_PERIPHERAL 39>;
                                atmel,fifo-size = <32>;
-                               dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>,
-                                       <&dma0 AT91_XDMAC_DT_PERID(8)>;
-                               dma-names = "rx", "tx";
+                               dmas = <&dma0 AT91_XDMAC_DT_PERID(8)>,
+                                       <&dma0 AT91_XDMAC_DT_PERID(7)>;
+                               dma-names = "tx", "rx";
                                status = "disabled";
                        };
                };
                                #size-cells = <0>;
                                clocks = <&pmc PMC_TYPE_PERIPHERAL 46>;
                                atmel,fifo-size = <32>;
-                               dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>,
-                                       <&dma0 AT91_XDMAC_DT_PERID(22)>;
-                               dma-names = "rx", "tx";
+                               dmas = <&dma0 AT91_XDMAC_DT_PERID(22)>,
+                                       <&dma0 AT91_XDMAC_DT_PERID(21)>;
+                               dma-names = "tx", "rx";
                                status = "disabled";
                        };
                };
                                #size-cells = <0>;
                                clocks = <&pmc PMC_TYPE_PERIPHERAL 47>;
                                atmel,fifo-size = <32>;
-                               dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>,
-                                       <&dma0 AT91_XDMAC_DT_PERID(24)>;
-                               dma-names = "rx", "tx";
+                               dmas = <&dma0 AT91_XDMAC_DT_PERID(24)>,
+                                       <&dma0 AT91_XDMAC_DT_PERID(23)>;
+                               dma-names = "tx", "rx";
                                status = "disabled";
                        };
                };
index 4cbadcb410841d1bce8ebfd4558d08bbe73e83db..ddd1cf4d05543e67e61aa91db1a07fba5dcac4d9 100644 (file)
                                        };
                                };
 
-                               m25p80@1 {
+                               flash@1 {
                                        compatible = "st,m25p80";
                                        reg = <1>;
                                        spi-max-frequency = <12000000>;
index fd194ebeedc92c02ce558836ad51f62415d5e5bc..3a51a41eb5e4d4ffebfae79f604817dfb0b65fb3 100644 (file)
                                cs-gpios = <&gpiopinctrl 80 0>, <&gpiopinctrl 24 0>,
                                           <&gpiopinctrl 85 0>;
 
-                               m25p80@0 {
+                               flash@0 {
                                        compatible = "m25p80";
                                        reg = <0>;
                                        spi-max-frequency = <12000000>;
index 33ae5e0590df6196e75f25cfd10f9d62c31f39b8..ac53ee3c496b283d8373187ba5fb31a61620cff8 100644 (file)
        #size-cells = <0>;
        status = "okay";
 
-       flash0: is25lp016d@0 {
+       flash0: flash@0 {
                compatible = "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <133000000>;
index e222d2d2cb4496df351d862281583da3db74517d..d142dd30e16b32cfe50f2c4e76096de9703ea5a2 100644 (file)
        #size-cells = <0>;
        status = "okay";
 
-       flash0: mx66l51235l@0 {
+       flash0: flash@0 {
                compatible = "jedec,spi-nor";
                reg = <0>;
                spi-rx-bus-width = <4>;
                #size-cells = <1>;
        };
 
-       flash1: mx66l51235l@1 {
+       flash1: flash@1 {
                compatible = "jedec,spi-nor";
                reg = <1>;
                spi-rx-bus-width = <4>;
index 8a0cfbfd0c452bc5e37b0c8dc9b61d7bf0633cb1..b6cb9cdf8197336112c8b160d22d97c6fde8b30b 100644 (file)
@@ -60,7 +60,7 @@
                        spi0: spi@fffa4000 {
                                cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>;
                                status = "okay";
-                               mtd_dataflash@0 {
+                               flash@0 {
                                        compatible = "atmel,at45", "atmel,dataflash";
                                        reg = <0>;
                                        spi-max-frequency = <15000000>;
index a7acfee11ffcb63f0df4f80ed4a0b175946ee778..a80bc8a43091db668b8743b726ff024f1a07b96c 100644 (file)
@@ -49,11 +49,13 @@ CONFIG_ATA=y
 CONFIG_PATA_FTIDE010=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
+CONFIG_NET_DSA_REALTEK=y
 CONFIG_NET_DSA_REALTEK_SMI=y
+CONFIG_NET_DSA_REALTEK_RTL8366RB=y
 CONFIG_GEMINI_ETHERNET=y
+CONFIG_MARVELL_PHY=y
 CONFIG_MDIO_BITBANG=y
 CONFIG_MDIO_GPIO=y
-CONFIG_MARVELL_PHY=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_KEYBOARD_GPIO=y
 # CONFIG_INPUT_MOUSE is not set
@@ -66,6 +68,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_I2C_GPIO=y
 CONFIG_SPI=y
 CONFIG_SPI_GPIO=y
+CONFIG_SENSORS_DRIVETEMP=y
 CONFIG_SENSORS_GPIO_FAN=y
 CONFIG_SENSORS_LM75=y
 CONFIG_THERMAL=y
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
deleted file mode 100644 (file)
index 015b7ef..0000000
+++ /dev/null
@@ -1,365 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_EXPERT=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_ARCH_PXA=y
-CONFIG_MACH_INTELMOTE2=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT=y
-CONFIG_AEABI=y
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="root=/dev/mtdblock2 rootfstype=jffs2 console=ttyS2,115200 mem=32M"
-CONFIG_KEXEC=y
-CONFIG_FPE_NWFPE=y
-CONFIG_BINFMT_AOUT=m
-CONFIG_BINFMT_MISC=m
-CONFIG_PM=y
-CONFIG_APM_EMULATION=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_SCTP=y
-CONFIG_NF_CT_PROTO_UDPLITE=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_BRIDGE=m
-# CONFIG_BRIDGE_IGMP_SNOOPING is not set
-CONFIG_IEEE802154=y
-# CONFIG_WIRELESS is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_FW_LOADER=m
-CONFIG_CONNECTOR=m
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_AFS_PARTS=y
-CONFIG_MTD_AR7_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_GEOMETRY=y
-# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
-# CONFIG_MTD_CFI_I2 is not set
-CONFIG_MTD_OTP=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PXA2XX=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-# CONFIG_WLAN is not set
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-# CONFIG_INPUT_MOUSEDEV is not set
-CONFIG_INPUT_EVDEV=y
-# CONFIG_KEYBOARD_ATKBD is not set
-CONFIG_KEYBOARD_GPIO=y
-CONFIG_KEYBOARD_PXA27x=y
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_UINPUT=y
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_PXA=y
-CONFIG_SERIAL_PXA_CONSOLE=y
-CONFIG_LEGACY_PTY_COUNT=8
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_PXA=y
-CONFIG_SPI=y
-CONFIG_SPI_PXA2XX=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_POWER_SUPPLY=y
-# CONFIG_HWMON is not set
-CONFIG_PMIC_DA903X=y
-CONFIG_REGULATOR=y
-CONFIG_REGULATOR_DEBUG=y
-CONFIG_REGULATOR_DA903X=y
-CONFIG_MEDIA_SUPPORT=y
-CONFIG_VIDEO_DEV=y
-CONFIG_MEDIA_TUNER_CUSTOMISE=y
-# CONFIG_MEDIA_TUNER_SIMPLE is not set
-# CONFIG_MEDIA_TUNER_TDA8290 is not set
-# CONFIG_MEDIA_TUNER_TDA827X is not set
-# CONFIG_MEDIA_TUNER_TDA18271 is not set
-# CONFIG_MEDIA_TUNER_TDA9887 is not set
-# CONFIG_MEDIA_TUNER_TEA5761 is not set
-# CONFIG_MEDIA_TUNER_TEA5767 is not set
-# CONFIG_MEDIA_TUNER_MT20XX is not set
-# CONFIG_MEDIA_TUNER_MT2060 is not set
-# CONFIG_MEDIA_TUNER_MT2266 is not set
-# CONFIG_MEDIA_TUNER_MT2131 is not set
-# CONFIG_MEDIA_TUNER_QT1010 is not set
-# CONFIG_MEDIA_TUNER_XC2028 is not set
-# CONFIG_MEDIA_TUNER_XC5000 is not set
-# CONFIG_MEDIA_TUNER_MXL5005S is not set
-# CONFIG_MEDIA_TUNER_MXL5007T is not set
-# CONFIG_MEDIA_TUNER_MC44S803 is not set
-# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
-CONFIG_VIDEO_PXA27x=y
-# CONFIG_V4L_USB_DRIVERS is not set
-# CONFIG_RADIO_ADAPTERS is not set
-CONFIG_FB=y
-CONFIG_FB_PXA=y
-CONFIG_FB_PXA_OVERLAY=y
-CONFIG_FB_PXA_PARAMETERS=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_MINI_4x6=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-# CONFIG_SND_DRIVERS is not set
-# CONFIG_SND_ARM is not set
-# CONFIG_SND_SPI is not set
-# CONFIG_SND_USB is not set
-CONFIG_SND_SOC=y
-CONFIG_SND_PXA2XX_SOC=y
-# CONFIG_USB_HID is not set
-CONFIG_USB=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_GADGET=y
-CONFIG_USB_PXA27X=y
-CONFIG_USB_ETH=m
-# CONFIG_USB_ETH_RNDIS is not set
-CONFIG_MMC=y
-CONFIG_SDIO_UART=m
-CONFIG_MMC_PXA=y
-CONFIG_MMC_SPI=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_LP3944=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_LEDS_TRIGGER_BACKLIGHT=y
-CONFIG_LEDS_TRIGGER_GPIO=y
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_PXA=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=m
-CONFIG_AUTOFS4_FS=y
-CONFIG_FUSE_FS=m
-CONFIG_CUSE=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_FS_WBUF_VERIFY=y
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_COMPRESSION_OPTIONS=y
-CONFIG_JFFS2_LZO=y
-CONFIG_JFFS2_RUBIN=y
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_ROMFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_SMB_FS=m
-CONFIG_CIFS=m
-CONFIG_CIFS_STATS=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_PROVE_LOCKING=y
-# CONFIG_FTRACE is not set
-CONFIG_DEBUG_USER=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_GHASH=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC16=y
index 6e0c8c19b35cd6c3df30fb4f9b4d53965964bef5..d6a6811f0539b0a7877e8b37535921696d66e975 100644 (file)
@@ -673,6 +673,7 @@ CONFIG_VIDEO_STI_DELTA=m
 CONFIG_VIDEO_RENESAS_FDP1=m
 CONFIG_VIDEO_RENESAS_JPU=m
 CONFIG_VIDEO_RENESAS_VSP1=m
+CONFIG_VIDEO_TEGRA_VDE=m
 CONFIG_V4L_TEST_DRIVERS=y
 CONFIG_VIDEO_VIVID=m
 CONFIG_VIDEO_ADV7180=m
index 289d022acc4ba2831517d9e02d7ee51677797468..c209722399d7510bb9e139244c2731e334337098 100644 (file)
@@ -286,7 +286,8 @@ CONFIG_SERIO_NVEC_PS2=y
 CONFIG_NVEC_POWER=y
 CONFIG_NVEC_PAZ00=y
 CONFIG_STAGING_MEDIA=y
-CONFIG_TEGRA_VDE=y
+CONFIG_V4L_MEM2MEM_DRIVERS=y
+CONFIG_VIDEO_TEGRA_VDE=y
 CONFIG_CHROME_PLATFORMS=y
 CONFIG_CROS_EC=y
 CONFIG_CROS_EC_I2C=m
index 3b30913d7d8d33ccfc3abe49ce7d20ce29eb76f8..a352207a64d74d054fffe201c7a17c8c520f4029 100644 (file)
@@ -20,7 +20,6 @@ CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_CMA=y
 CONFIG_NET=y
@@ -41,6 +40,8 @@ CONFIG_MAC80211_LEDS=y
 CONFIG_CAIF=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_GNSS=y
+CONFIG_GNSS_SIRF_SERIAL=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
 CONFIG_NETDEVICES=y
@@ -83,6 +84,8 @@ CONFIG_SPI_GPIO=y
 CONFIG_SPI_PL022=y
 CONFIG_GPIO_STMPE=y
 CONFIG_GPIO_TC3589X=y
+CONFIG_BATTERY_SAMSUNG_SDI=y
+CONFIG_AB8500_BM=y
 CONFIG_SENSORS_IIO_HWMON=y
 CONFIG_SENSORS_NTC_THERMISTOR=y
 CONFIG_THERMAL=y
@@ -98,10 +101,13 @@ CONFIG_VIDEO_V4L2_SUBDEV_API=y
 CONFIG_V4L2_FLASH_LED_CLASS=y
 CONFIG_DRM=y
 CONFIG_DRM_PANEL_NOVATEK_NT35510=y
+CONFIG_DRM_PANEL_NOVATEK_NT35560=y
+CONFIG_DRM_PANEL_SAMSUNG_DB7430=y
 CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=y
+CONFIG_DRM_PANEL_SAMSUNG_S6D27A1=y
 CONFIG_DRM_PANEL_SAMSUNG_S6E63M0=y
 CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI=y
-CONFIG_DRM_PANEL_SONY_ACX424AKP=y
+CONFIG_DRM_PANEL_WIDECHIPS_WS2401=y
 CONFIG_DRM_LIMA=y
 CONFIG_DRM_MCDE=y
 CONFIG_FB=y
@@ -129,6 +135,7 @@ CONFIG_LEDS_LM3530=y
 CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_LP55XX_COMMON=y
 CONFIG_LEDS_LP5521=y
+CONFIG_LEDS_REGULATOR=y
 CONFIG_LEDS_RT8515=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
@@ -144,17 +151,22 @@ CONFIG_IIO_SW_TRIGGER=y
 CONFIG_BMA180=y
 CONFIG_BMC150_ACCEL=y
 CONFIG_IIO_ST_ACCEL_3AXIS=y
+# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set
 CONFIG_IIO_RESCALE=y
 CONFIG_MPU3050_I2C=y
 CONFIG_IIO_ST_GYRO_3AXIS=y
+# CONFIG_IIO_ST_GYRO_SPI_3AXIS is not set
 CONFIG_INV_MPU6050_I2C=y
 CONFIG_BH1780=y
 CONFIG_GP2AP002=y
+CONFIG_TSL2772=y
 CONFIG_AK8974=y
 CONFIG_IIO_ST_MAGN_3AXIS=y
+# CONFIG_IIO_ST_MAGN_SPI_3AXIS is not set
 CONFIG_YAMAHA_YAS530=y
 CONFIG_IIO_HRTIMER_TRIGGER=y
 CONFIG_IIO_ST_PRESS=y
+# CONFIG_IIO_ST_PRESS_SPI is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -173,10 +185,9 @@ CONFIG_CRYPTO_DEV_UX500_CRYP=y
 CONFIG_CRYPTO_DEV_UX500_HASH=y
 CONFIG_CRYPTO_DEV_UX500_DEBUG=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_FTRACE is not set
 CONFIG_DEBUG_USER=y
index 413abfb42989e9f3edc846aefc1ecef53a02daa1..f82a819eb0dbb49b54e64bd4b3cb3d506316212b 100644 (file)
@@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void)          \
        return read_sysreg(a32);                \
 }                                              \
 
+CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
 CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
 CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
 CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
@@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
 
 /* Low-level accessors */
 
-static inline void gic_write_eoir(u32 irq)
-{
-       write_sysreg(irq, ICC_EOIR1);
-       isb();
-}
-
 static inline void gic_write_dir(u32 val)
 {
        write_sysreg(val, ICC_DIR);
index 0c70eb688a00cb82484e2f474fcfa0e1ca796ce6..2a0739a2350bef8e2590f7b2d7464e9d60717568 100644 (file)
@@ -440,6 +440,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                       unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
 #endif
 
 /*
index 06508698abb853fdf05ca391e6fcd798edd52fb3..7a8682468a847b2313816f5798244b9ac2da5b03 100644 (file)
@@ -1145,7 +1145,7 @@ vector_bhb_loop8_\name:
 
        @ bhb workaround
        mov     r0, #8
-3:     b       . + 4
+3:     W(b)    . + 4
        subs    r0, r0, #1
        bne     3b
        dsb
index 428012687a802a9f2cbeeb9e24f950e5bd5f960f..7f7f6bae21c2d7dd0ffb8f17e58b9fb5ea25cb47 100644 (file)
@@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void)
        int ret;
        u32 val;
        struct davinci_soc_info *soc_info = &davinci_soc_info;
-       u8 rmii_en = soc_info->emac_pdata->rmii_en;
+       u8 rmii_en;
 
        if (!machine_is_davinci_da850_evm())
                return 0;
 
+       rmii_en = soc_info->emac_pdata->rmii_en;
+
        cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG);
 
        val = __raw_readl(cfg_chip3_base);
index cc75087134d38c75ee8c08e30d637221303e6c14..4fa6ea5461b79b33d4ef79f9d2d42daddfb2a499 100644 (file)
@@ -148,8 +148,10 @@ static struct clk_hw *ep93xx_clk_register_gate(const char *name,
        psc->lock = &clk_lock;
 
        clk = clk_register(NULL, &psc->hw);
-       if (IS_ERR(clk))
+       if (IS_ERR(clk)) {
                kfree(psc);
+               return ERR_CAST(clk);
+       }
 
        return &psc->hw;
 }
@@ -207,7 +209,7 @@ static int ep93xx_mux_determine_rate(struct clk_hw *hw,
                                struct clk_rate_request *req)
 {
        unsigned long rate = req->rate;
-       struct clk *best_parent = 0;
+       struct clk *best_parent = NULL;
        unsigned long __parent_rate;
        unsigned long best_rate = 0, actual_rate, mclk_rate;
        unsigned long best_parent_rate;
index f7d993628cb700f87b62108141da7658d03c17bf..a9c1efcf7c9cfd8aa323d759f5c13f1eb309d138 100644 (file)
@@ -17,7 +17,6 @@ menuconfig ARCH_EXYNOS
        select EXYNOS_PMU
        select EXYNOS_SROM
        select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
-       select GPIOLIB
        select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5
        select HAVE_ARM_SCU if SMP
        select PINCTRL
index 2882674a1c39914090eeb829e92e3a960097a132..7135a0ac994956be091a373922c929bac4ca4305 100644 (file)
@@ -7,6 +7,8 @@
 #include <asm/traps.h>
 #include <asm/ptrace.h>
 
+#include "iop3xx.h"
+
 void iop_enable_cp6(void)
 {
        u32 temp;
index 5c3845730dbf547d8c34040d069bab4b51006be5..0b80f8bcd3047762da5f7c6388464edc828e30ea 100644 (file)
@@ -314,10 +314,12 @@ void __init omap_gic_of_init(void)
 
        np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic");
        gic_dist_base_addr = of_iomap(np, 0);
+       of_node_put(np);
        WARN_ON(!gic_dist_base_addr);
 
        np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer");
        twd_base = of_iomap(np, 0);
+       of_node_put(np);
        WARN_ON(!twd_base);
 
 skip_errata_init:
index e5c2fce281cd66f0c6cb13bde87a6ac415b262dd..abdb99fe1e972c44adb427000c9ad2860e9d72fd 100644 (file)
@@ -4,10 +4,7 @@ menuconfig ARCH_SUNXI
        depends on ARCH_MULTI_V5 || ARCH_MULTI_V7
        select ARCH_HAS_RESET_CONTROLLER
        select CLKSRC_MMIO
-       select GENERIC_IRQ_CHIP
        select GPIOLIB
-       select IRQ_DOMAIN_HIERARCHY
-       select IRQ_FASTEOI_HIERARCHY_HANDLERS
        select PINCTRL
        select PM_OPP
        select SUN4I_TIMER
@@ -22,10 +19,12 @@ if ARCH_MULTI_V7
 config MACH_SUN4I
        bool "Allwinner A10 (sun4i) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
 
 config MACH_SUN5I
        bool "Allwinner A10s / A13 (sun5i) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
        select SUN5I_HSTIMER
 
 config MACH_SUN6I
@@ -34,6 +33,8 @@ config MACH_SUN6I
        select ARM_GIC
        select MFD_SUN6I_PRCM
        select SUN5I_HSTIMER
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
 
 config MACH_SUN7I
        bool "Allwinner A20 (sun7i) SoCs support"
@@ -43,17 +44,21 @@ config MACH_SUN7I
        select ARCH_SUPPORTS_BIG_ENDIAN
        select HAVE_ARM_ARCH_TIMER
        select SUN5I_HSTIMER
+       select SUNXI_NMI_INTC
 
 config MACH_SUN8I
        bool "Allwinner sun8i Family SoCs support"
        default ARCH_SUNXI
        select ARM_GIC
        select MFD_SUN6I_PRCM
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
 
 config MACH_SUN9I
        bool "Allwinner (sun9i) SoCs support"
        default ARCH_SUNXI
        select ARM_GIC
+       select SUNXI_NMI_INTC
 
 config ARCH_SUNXI_MC_SMP
        bool
@@ -69,6 +74,7 @@ if ARCH_MULTI_V5
 config MACH_SUNIV
        bool "Allwinner ARMv5 F-series (suniv) SoCs support"
        default ARCH_SUNXI
+       select SUN4I_INTC
        help
          Support for Allwinner suniv ARMv5 SoCs.
          (F1C100A, F1C100s, F1C200s, F1C500, F1C600)
index 1da11bdb1dfbd6f1ce906b83ba2d84ecd396f316..6e6985e756afb7eeda5f0b0b0f5cce94fa3c197b 100644 (file)
@@ -122,13 +122,13 @@ static inline bool cluster_is_a15(u32 cluster)
 }
 
 /**
- * ve_spc_global_wakeup_irq()
+ * ve_spc_global_wakeup_irq() - sets/clears global wakeup IRQs
+ *
+ * @set: if true, global wake-up IRQs are set, if false they are cleared
  *
  * Function to set/clear global wakeup IRQs. Not protected by locking since
  * it might be used in code paths where normal cacheable locks are not
  * working. Locking must be provided by the caller to ensure atomicity.
- *
- * @set: if true, global wake-up IRQs are set, if false they are cleared
  */
 void ve_spc_global_wakeup_irq(bool set)
 {
@@ -145,15 +145,15 @@ void ve_spc_global_wakeup_irq(bool set)
 }
 
 /**
- * ve_spc_cpu_wakeup_irq()
- *
- * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since
- * it might be used in code paths where normal cacheable locks are not
- * working. Locking must be provided by the caller to ensure atomicity.
+ * ve_spc_cpu_wakeup_irq() - sets/clears per-CPU wake-up IRQs
  *
  * @cluster: mpidr[15:8] bitfield describing cluster affinity level
  * @cpu: mpidr[7:0] bitfield describing cpu affinity level
  * @set: if true, wake-up IRQs are set, if false they are cleared
+ *
+ * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since
+ * it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
  */
 void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set)
 {
@@ -200,14 +200,14 @@ void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr)
 }
 
 /**
- * ve_spc_powerdown()
+ * ve_spc_powerdown() - enables/disables cluster powerdown
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @enable: if true enables powerdown, if false disables it
  *
  * Function to enable/disable cluster powerdown. Not protected by locking
  * since it might be used in code paths where normal cacheable locks are not
  * working. Locking must be provided by the caller to ensure atomicity.
- *
- * @cluster: mpidr[15:8] bitfield describing cluster affinity level
- * @enable: if true enables powerdown, if false disables it
  */
 void ve_spc_powerdown(u32 cluster, bool enable)
 {
@@ -228,7 +228,7 @@ static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster)
 }
 
 /**
- * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
+ * ve_spc_cpu_in_wfi() - Checks if the specified CPU is in WFI or not
  *
  * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster
  * @cluster: mpidr[15:8] bitfield describing cluster affinity level
@@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void)
                }
 
                cluster = topology_physical_package_id(cpu_dev->id);
-               if (init_opp_table[cluster])
+               if (cluster < 0 || init_opp_table[cluster])
                        continue;
 
                if (ve_init_opp_table(cpu_dev))
index aa08bcb72db935f6e0b8012366936472ab08b10e..290702328a33704f1f1ee552b599231a2c58f122 100644 (file)
@@ -493,3 +493,11 @@ void __init early_ioremap_init(void)
 {
        early_ioremap_setup();
 }
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                unsigned long flags)
+{
+       unsigned long pfn = PHYS_PFN(offset);
+
+       return memblock_is_map_memory(pfn);
+}
index 06dbfb968182de7c585e41308da09eec15360600..fb9f3eb6bf483d22041a06fc9eb17bdf87866558 100644 (file)
@@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void)
 {
        if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
                cpu_v7_spectre_v2_init();
+       cpu_v7_spectre_bhb_init();
 }
 
 void cpu_v7_bugs_init(void)
index ec5b082f3de6e35914ab6a73fe8e292fa6c08f7e..07eb69f9e7df3d2cbf77719f0c3df62f232ceac7 100644 (file)
@@ -337,12 +337,15 @@ int __init arch_xen_unpopulated_init(struct resource **res)
 
        if (!nr_reg) {
                pr_err("No extended regions are found\n");
+               of_node_put(np);
                return -EINVAL;
        }
 
        regs = kcalloc(nr_reg, sizeof(*regs), GFP_KERNEL);
-       if (!regs)
+       if (!regs) {
+               of_node_put(np);
                return -ENOMEM;
+       }
 
        /*
         * Create resource from extended regions provided by the hypervisor to be
@@ -403,8 +406,8 @@ int __init arch_xen_unpopulated_init(struct resource **res)
        *res = &xen_resource;
 
 err:
+       of_node_put(np);
        kfree(regs);
-
        return rc;
 }
 #endif
@@ -424,8 +427,10 @@ static void __init xen_dt_guest_init(void)
 
        if (of_address_to_resource(xen_node, GRANT_TABLE_INDEX, &res)) {
                pr_err("Xen grant table region is not found\n");
+               of_node_put(xen_node);
                return;
        }
+       of_node_put(xen_node);
        xen_grant_frames = res.start;
 }
 
index 57c4c995965f8291bf3c89303bc8c00dc0276dd4..20ea89d9ac2fa7cc1564f43ed518a6b18e52270b 100644 (file)
@@ -175,8 +175,6 @@ config ARM64
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
-       select HAVE_DYNAMIC_FTRACE_WITH_REGS \
-               if $(cc-option,-fpatchable-function-entry=2)
        select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
                if DYNAMIC_FTRACE_WITH_REGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -228,6 +226,17 @@ config ARM64
        help
          ARM 64-bit (AArch64) Linux support.
 
+config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
+       def_bool CC_IS_CLANG
+       # https://github.com/ClangBuiltLinux/linux/issues/1507
+       depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
+       select HAVE_DYNAMIC_FTRACE_WITH_REGS
+
+config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
+       def_bool CC_IS_GCC
+       depends on $(cc-option,-fpatchable-function-entry=2)
+       select HAVE_DYNAMIC_FTRACE_WITH_REGS
+
 config 64BIT
        def_bool y
 
@@ -678,7 +687,7 @@ config ARM64_ERRATUM_2051678
        default y
        help
          This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678.
-         Affected Coretex-A510 might not respect the ordering rules for
+         Affected Cortex-A510 might not respect the ordering rules for
          hardware update of the page table's dirty bit. The workaround
          is to not enable the feature on affected CPUs.
 
index 30b123cde02c506f5fe8a4be98a7191293292fd5..6a6457fed7b20ae9081f887945c3b4958a9fe247 100644 (file)
@@ -11,12 +11,11 @@ config ARCH_ACTIONS
 config ARCH_SUNXI
        bool "Allwinner sunxi 64-bit SoC Family"
        select ARCH_HAS_RESET_CONTROLLER
-       select GENERIC_IRQ_CHIP
-       select IRQ_DOMAIN_HIERARCHY
-       select IRQ_FASTEOI_HIERARCHY_HANDLERS
        select PINCTRL
        select RESET_CONTROLLER
        select SUN4I_TIMER
+       select SUN6I_R_INTC
+       select SUNXI_NMI_INTC
        help
          This enables support for Allwinner sunxi based SoCs like the A64.
 
@@ -253,6 +252,7 @@ config ARCH_INTEL_SOCFPGA
 
 config ARCH_SYNQUACER
        bool "Socionext SynQuacer SoC Family"
+       select IRQ_FASTEOI_HIERARCHY_HANDLERS
 
 config ARCH_TEGRA
        bool "NVIDIA Tegra SoC Family"
index d61f43052a344597a131a2d582e8443b2e5e77f5..8e9ad1e51d665e33f949c1237809af6771020b00 100644 (file)
                compatible = "operating-points-v2";
                opp-shared;
 
-               opp-100000000 {
-                       opp-hz = /bits/ 64 <100000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-250000000 {
-                       opp-hz = /bits/ 64 <250000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-500000000 {
-                       opp-hz = /bits/ 64 <500000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-667000000 {
-                       opp-hz = /bits/ 64 <667000000>;
-                       opp-microvolt = <731000>;
-               };
-
                opp-1000000000 {
                        opp-hz = /bits/ 64 <1000000000>;
                        opp-microvolt = <761000>;
                compatible = "operating-points-v2";
                opp-shared;
 
-               opp-100000000 {
-                       opp-hz = /bits/ 64 <100000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-250000000 {
-                       opp-hz = /bits/ 64 <250000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-500000000 {
-                       opp-hz = /bits/ 64 <500000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-667000000 {
-                       opp-hz = /bits/ 64 <667000000>;
-                       opp-microvolt = <731000>;
-               };
-
                opp-1000000000 {
                        opp-hz = /bits/ 64 <1000000000>;
                        opp-microvolt = <731000>;
index 1e5d0ee5d541b88059ae8b3096a15e341b11a246..44c23c984034cc4799583e92282ae7799e11e710 100644 (file)
                compatible = "operating-points-v2";
                opp-shared;
 
-               opp-100000000 {
-                       opp-hz = /bits/ 64 <100000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-250000000 {
-                       opp-hz = /bits/ 64 <250000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-500000000 {
-                       opp-hz = /bits/ 64 <500000000>;
-                       opp-microvolt = <731000>;
-               };
-
-               opp-667000000 {
-                       opp-hz = /bits/ 64 <667000000>;
-                       opp-microvolt = <731000>;
-               };
-
                opp-1000000000 {
                        opp-hz = /bits/ 64 <1000000000>;
                        opp-microvolt = <731000>;
                compatible = "operating-points-v2";
                opp-shared;
 
-               opp-100000000 {
-                       opp-hz = /bits/ 64 <100000000>;
-                       opp-microvolt = <751000>;
-               };
-
-               opp-250000000 {
-                       opp-hz = /bits/ 64 <250000000>;
-                       opp-microvolt = <751000>;
-               };
-
-               opp-500000000 {
-                       opp-hz = /bits/ 64 <500000000>;
-                       opp-microvolt = <751000>;
-               };
-
-               opp-667000000 {
-                       opp-hz = /bits/ 64 <667000000>;
-                       opp-microvolt = <751000>;
-               };
-
                opp-1000000000 {
                        opp-hz = /bits/ 64 <1000000000>;
                        opp-microvolt = <771000>;
index bf9ae1e1016bfe73584e2745ba7fe962365c0eca..480afa2cc61f5b3d656f725c2d4c274328e4d991 100644 (file)
 
                cpu0: cpu@0 {
                        device_type = "cpu";
-                       compatible = "arm,cortex-a35","arm,armv8";
+                       compatible = "arm,cortex-a35";
                        reg = <0x0 0x0>;
                        enable-method = "psci";
                };
 
                cpu1: cpu@1 {
                        device_type = "cpu";
-                       compatible = "arm,cortex-a35","arm,armv8";
+                       compatible = "arm,cortex-a35";
                        reg = <0x0 0x1>;
                        enable-method = "psci";
                };
 
                cpu2: cpu@2 {
                        device_type = "cpu";
-                       compatible = "arm,cortex-a35","arm,armv8";
+                       compatible = "arm,cortex-a35";
                        reg = <0x0 0x2>;
                        enable-method = "psci";
                };
 
                cpu3: cpu@3 {
                        device_type = "cpu";
-                       compatible = "arm,cortex-a35","arm,armv8";
+                       compatible = "arm,cortex-a35";
                        reg = <0x0 0x3>;
                        enable-method = "psci";
                };
index 5751c48620edf426b17e70fb4f16100a692321e1..cadba194b149b5ae343fc78f02d738b3f0ced762 100644 (file)
                "",
                "eMMC_RST#", /* BOOT_12 */
                "eMMC_DS", /* BOOT_13 */
+               "", "",
                /* GPIOC */
                "SD_D0_B", /* GPIOC_0 */
                "SD_D1_B", /* GPIOC_1 */
index 3c07a89bfd27d42c5be06a950b0caec3e91562ac..80737731af3fe28f6d06641dfa75b48136f032bb 100644 (file)
                compatible = "operating-points-v2";
                opp-shared;
 
-               opp-100000000 {
-                       opp-hz = /bits/ 64 <100000000>;
-                       opp-microvolt = <730000>;
-               };
-
-               opp-250000000 {
-                       opp-hz = /bits/ 64 <250000000>;
-                       opp-microvolt = <730000>;
-               };
-
-               opp-500000000 {
-                       opp-hz = /bits/ 64 <500000000>;
-                       opp-microvolt = <730000>;
-               };
-
-               opp-667000000 {
-                       opp-hz = /bits/ 64 <666666666>;
-                       opp-microvolt = <750000>;
-               };
-
                opp-1000000000 {
                        opp-hz = /bits/ 64 <1000000000>;
                        opp-microvolt = <770000>;
index 1dc9d187601c54d4dd5dbe21521e79a14fe3e8ff..a0bd540f27d3d43e0e4819e99671ab348c72eced 100644 (file)
                pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
 
                ti,x-min = /bits/ 16 <125>;
-               touchscreen-size-x = /bits/ 16 <4008>;
+               touchscreen-size-x = <4008>;
                ti,y-min = /bits/ 16 <282>;
-               touchscreen-size-y = /bits/ 16 <3864>;
+               touchscreen-size-y = <3864>;
                ti,x-plate-ohms = /bits/ 16 <180>;
-               touchscreen-max-pressure = /bits/ 16 <255>;
-               touchscreen-average-samples = /bits/ 16 <10>;
+               touchscreen-max-pressure = <255>;
+               touchscreen-average-samples = <10>;
                ti,debounce-tol = /bits/ 16 <3>;
                ti,debounce-rep = /bits/ 16 <1>;
                ti,settle-delay-usec = /bits/ 16 <150>;
index 73addc0b8e57a5a23d3ff48ba02ae3478601e605..cce55c3c5df01295b9a417e7b8a327edf3124e89 100644 (file)
 
 &usbotg1 {
        dr_mode = "otg";
+       over-current-active-low;
        vbus-supply = <&reg_usb_otg1_vbus>;
        status = "okay";
 };
 
 &usbotg2 {
        dr_mode = "host";
+       disable-over-current;
        status = "okay";
 };
 
                fsl,pins = <
                        MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
                        MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI    0xd6
-                       MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
+                       MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO    0xd6
                        MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13      0xd6
                >;
        };
index 1e7badb2a82ed40222cefc0ba369af781bb9f9fc..f61e4847fa49e399d144c8ac1feaf6350dea6d57 100644 (file)
 
 &usbotg1 {
        dr_mode = "otg";
+       over-current-active-low;
        vbus-supply = <&reg_usb_otg1_vbus>;
        status = "okay";
 };
 
 &usbotg2 {
        dr_mode = "host";
+       disable-over-current;
        vbus-supply = <&reg_usb_otg2_vbus>;
        status = "okay";
 };
                fsl,pins = <
                        MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
                        MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI    0xd6
-                       MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
+                       MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO    0xd6
                        MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13      0xd6
                >;
        };
index 426483ec1f88bdf28d25da5992467d38a008f4d1..02361964896615ac4ce7315db171890c1e522896 100644 (file)
 
 &usbotg1 {
        dr_mode = "otg";
+       over-current-active-low;
        vbus-supply = <&reg_usb_otg1_vbus>;
        status = "okay";
 };
 
 &usbotg2 {
        dr_mode = "host";
+       disable-over-current;
        vbus-supply = <&reg_usb_otg2_vbus>;
        status = "okay";
 };
                fsl,pins = <
                        MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
                        MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI    0xd6
-                       MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK    0xd6
+                       MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO    0xd6
                        MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13      0xd6
                >;
        };
index 7dfee715a2c4db6f0c182ff754a730a6e44e0928..d8ce217c60166283bfaa3a6f9ed28f18032b3408 100644 (file)
                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
                rohm,reset-snvs-powered;
 
+               #clock-cells = <0>;
+               clocks = <&osc_32k 0>;
+               clock-output-names = "clk-32k-out";
+
                regulators {
                        buck1_reg: BUCK1 {
                                regulator-name = "buck1";
index b16c7caf34c1163fa97aeac515983c21140d88de..87b5e23c766f7361cdb6b613603e2d80c23c60b8 100644 (file)
                pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
 
                ti,x-min = /bits/ 16 <125>;
-               touchscreen-size-x = /bits/ 16 <4008>;
+               touchscreen-size-x = <4008>;
                ti,y-min = /bits/ 16 <282>;
-               touchscreen-size-y = /bits/ 16 <3864>;
+               touchscreen-size-y = <3864>;
                ti,x-plate-ohms = /bits/ 16 <180>;
-               touchscreen-max-pressure = /bits/ 16 <255>;
-               touchscreen-average-samples = /bits/ 16 <10>;
+               touchscreen-max-pressure = <255>;
+               touchscreen-average-samples = <10>;
                ti,debounce-tol = /bits/ 16 <3>;
                ti,debounce-rep = /bits/ 16 <1>;
                ti,settle-delay-usec = /bits/ 16 <150>;
index 99f0f50266743198112dd91005814f74da76dc5d..5c0ca249056159c95e0156a7d0560b8f6e658198 100644 (file)
                                ranges;
 
                                sai2: sai@30020000 {
-                                       compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+                                       compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
                                        reg = <0x30020000 0x10000>;
                                        interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
                                        clocks = <&clk IMX8MN_CLK_SAI2_IPG>,
                                };
 
                                sai3: sai@30030000 {
-                                       compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+                                       compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
                                        reg = <0x30030000 0x10000>;
                                        interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
                                        clocks = <&clk IMX8MN_CLK_SAI3_IPG>,
                                };
 
                                sai5: sai@30050000 {
-                                       compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+                                       compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
                                        reg = <0x30050000 0x10000>;
                                        interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
                                        clocks = <&clk IMX8MN_CLK_SAI5_IPG>,
                                };
 
                                sai6: sai@30060000 {
-                                       compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+                                       compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
                                        reg = <0x30060000  0x10000>;
                                        interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
                                        clocks = <&clk IMX8MN_CLK_SAI6_IPG>,
                                };
 
                                sai7: sai@300b0000 {
-                                       compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+                                       compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
                                        reg = <0x300b0000 0x10000>;
                                        interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
                                        clocks = <&clk IMX8MN_CLK_SAI7_IPG>,
index 38ffcd145b33a97126465efc00c02e0b7a348e74..899e8e7dbc24f20ac2097245f71f3cb43345dc1f 100644 (file)
                #address-cells = <1>;
                #size-cells = <1>;
                spi-max-frequency = <84000000>;
-               spi-tx-bus-width = <4>;
+               spi-tx-bus-width = <1>;
                spi-rx-bus-width = <4>;
        };
 };
index be8c76a0554c6661f68597e7460782fa6dcc1c17..4f767012f1f508836f7f6a2d8b9d3e7274169991 100644 (file)
                };
 
                clk: clock-controller {
-                       compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
+                       compatible = "fsl,imx8qm-clk", "fsl,scu-clk";
                        #clock-cells = <2>;
                };
 
index f0f81c23c16f2e787a5b2584d18a5400d1858ceb..b9a48cfd760faf2382f6498777563762732a8c55 100644 (file)
                                pins = "gpio47", "gpio48";
                                function = "blsp_i2c3";
                                drive-strength = <16>;
-                               bias-disable = <0>;
+                               bias-disable;
                        };
 
                        blsp1_i2c3_sleep: blsp1-i2c2-sleep {
                                pins = "gpio47", "gpio48";
                                function = "gpio";
                                drive-strength = <2>;
-                               bias-disable = <0>;
+                               bias-disable;
                        };
 
                        blsp2_uart3_4pins_default: blsp2-uart2-4pins {
index e90f99ef532346929a22c7bcc506f56f9cf1a5e0..e47c74e513afddec831849f8dd6cddf099631c39 100644 (file)
@@ -33,7 +33,7 @@ ap_h1_spi: &spi0 {};
 };
 
 &alc5682 {
-       realtek,dmic-clk-driving-high = "true";
+       realtek,dmic-clk-driving-high;
 };
 
 &cpu6_alert0 {
index 1084d5ce9ac7a9cbd0ec6e7702c2122f0ec2aa5e..07b729f9fec5e68958b99c00034eaec9f8e30549 100644 (file)
                        pins = "gpio6", "gpio25", "gpio26";
                        function = "gpio";
                        drive-strength = <8>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
index 8553c8bf79bd40015052c915bbea2dfb5b5c6da0..103cc40816fd3779027c334deb854f1dcd39cf88 100644 (file)
                config {
                        pins = "gpio6", "gpio11";
                        drive-strength = <8>;
-                       bias-disable = <0>;
+                       bias-disable;
                };
        };
 
index fb99cc2827c76133fdfd71b97177f518b9386916..7ab3627cc347d6a15c6fd9b8efad50a0f62468ca 100644 (file)
        status = "okay";
 };
 
+&rxmacro {
+       status = "okay";
+};
+
 &slpi {
        status = "okay";
        firmware-name = "qcom/sm8250/slpi.mbn";
 };
 
 &swr1 {
+       status = "okay";
+
        wcd_rx: wcd9380-rx@0,4 {
                compatible = "sdw20217010d00";
                reg = <0 4>;
 };
 
 &swr2 {
+       status = "okay";
+
        wcd_tx: wcd9380-tx@0,3 {
                compatible = "sdw20217010d00";
                reg = <0 3>;
        };
 };
 
+&txmacro {
+       status = "okay";
+};
+
 &uart12 {
        status = "okay";
 };
index af8f226364361bcd4e0f00b2f073a389ad3f64ea..1304b86af1a00772ac0478d607f42950b5e318e6 100644 (file)
                        pinctrl-0 = <&rx_swr_active>;
                        compatible = "qcom,sm8250-lpass-rx-macro";
                        reg = <0 0x3200000 0 0x1000>;
+                       status = "disabled";
 
                        clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                                <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK  LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                swr1: soundwire-controller@3210000 {
                        reg = <0 0x3210000 0 0x2000>;
                        compatible = "qcom,soundwire-v1.5.1";
+                       status = "disabled";
                        interrupts = <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&rxmacro>;
                        clock-names = "iface";
                        pinctrl-0 = <&tx_swr_active>;
                        compatible = "qcom,sm8250-lpass-tx-macro";
                        reg = <0 0x3220000 0 0x1000>;
+                       status = "disabled";
 
                        clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                                 <&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK  LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
                        compatible = "qcom,soundwire-v1.5.1";
                        interrupts-extended = <&intc GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>;
                        interrupt-names = "core";
+                       status = "disabled";
 
                        clocks = <&txmacro>;
                        clock-names = "iface";
index a01886b467edab74cbfcde84a633424deb8a3b95..067fe4a6b178c30e0895721f00fef271bf7ca924 100644 (file)
@@ -16,6 +16,7 @@
 
        aliases {
                ethernet0 = &gmac0;
+               ethernet1 = &gmac1;
                mmc0 = &sdmmc0;
                mmc1 = &sdhci;
        };
@@ -78,7 +79,6 @@
        assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>;
        assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>;
        clock_in_out = "input";
-       phy-handle = <&rgmii_phy0>;
        phy-mode = "rgmii";
        pinctrl-names = "default";
        pinctrl-0 = <&gmac0_miim
        snps,reset-active-low;
        /* Reset time is 20ms, 100ms for rtl8211f */
        snps,reset-delays-us = <0 20000 100000>;
+       tx_delay = <0x4f>;
+       rx_delay = <0x0f>;
+       status = "okay";
+
+       fixed-link {
+               speed = <1000>;
+               full-duplex;
+               pause;
+       };
+};
+
+&gmac1 {
+       assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
+       assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru CLK_MAC1_2TOP>;
+       clock_in_out = "output";
+       phy-handle = <&rgmii_phy1>;
+       phy-mode = "rgmii";
+       pinctrl-names = "default";
+       pinctrl-0 = <&gmac1m1_miim
+                    &gmac1m1_tx_bus2
+                    &gmac1m1_rx_bus2
+                    &gmac1m1_rgmii_clk
+                    &gmac1m1_rgmii_bus>;
+
+       snps,reset-gpio = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>;
+       snps,reset-active-low;
+       /* Reset time is 20ms, 100ms for rtl8211f */
+       snps,reset-delays-us = <0 20000 100000>;
+
        tx_delay = <0x3c>;
        rx_delay = <0x2f>;
+
        status = "okay";
 };
 
        status = "disabled";
 };
 
-&mdio0 {
-       rgmii_phy0: ethernet-phy@0 {
+&mdio1 {
+       rgmii_phy1: ethernet-phy@0 {
                compatible = "ethernet-phy-ieee802.3-c22";
                reg = <0x0>;
        };
        pmuio2-supply = <&vcc3v3_pmu>;
        vccio1-supply = <&vccio_acodec>;
        vccio3-supply = <&vccio_sd>;
-       vccio4-supply = <&vcc_1v8>;
+       vccio4-supply = <&vcc_3v3>;
        vccio5-supply = <&vcc_3v3>;
-       vccio6-supply = <&vcc_3v3>;
+       vccio6-supply = <&vcc_1v8>;
        vccio7-supply = <&vcc_3v3>;
        status = "okay";
 };
index 8bd5afc7b692ea16df704f6d51cf7cca2f977d6c..48d4473e8eee2cbddd32b999315f0fed267378b9 100644 (file)
  * sets the GP register's most significant bits to 0 with an explicit cast.
  */
 
-static inline void gic_write_eoir(u32 irq)
-{
-       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
-       isb();
-}
-
 static __always_inline void gic_write_dir(u32 irq)
 {
        write_sysreg_s(irq, SYS_ICC_DIR_EL1);
index 03f52f84a4f3f8a23d8e825ee1b64b321fdf59f7..c762038ba40093c9690d1eae8e3d6ab38d71d5d7 100644 (file)
@@ -14,7 +14,7 @@
        14472:  .string file;                                   \
                .popsection;                                    \
                                                                \
-               .long 14472b - 14470b;                          \
+               .long 14472b - .;                               \
                .short line;
 #else
 #define _BUGVERBOSE_LOCATION(file, line)
@@ -25,7 +25,7 @@
 #define __BUG_ENTRY(flags)                             \
                .pushsection __bug_table,"aw";          \
                .align 2;                               \
-       14470:  .long 14471f - 14470b;                  \
+       14470:  .long 14471f - .;                       \
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)               \
                .short flags;                           \
                .popsection;                            \
index 232b439cbaf3d8511199fa69399ce27d47c336c6..ff8f4511df71f73d671bbb57449e57ed94f3e7d9 100644 (file)
@@ -75,6 +75,7 @@
 #define ARM_CPU_PART_CORTEX_A77                0xD0D
 #define ARM_CPU_PART_NEOVERSE_V1       0xD40
 #define ARM_CPU_PART_CORTEX_A78                0xD41
+#define ARM_CPU_PART_CORTEX_A78AE      0xD42
 #define ARM_CPU_PART_CORTEX_X1         0xD44
 #define ARM_CPU_PART_CORTEX_A510       0xD46
 #define ARM_CPU_PART_CORTEX_A710       0xD47
 #define MIDR_CORTEX_A77        MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
 #define MIDR_NEOVERSE_V1       MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
 #define MIDR_CORTEX_A78        MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE      MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
 #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
index 7f3c87f7a0cec758928c9d46dcce97058b0e413e..c31be7eda9df413aaef23c8a59a883efccf0047a 100644 (file)
        isb                                     // Make sure SRE is now set
        mrs_s   x0, SYS_ICC_SRE_EL2             // Read SRE back,
        tbz     x0, #0, .Lskip_gicv3_\@         // and check that it sticks
-       msr_s   SYS_ICH_HCR_EL2, xzr            // Reset ICC_HCR_EL2 to defaults
+       msr_s   SYS_ICH_HCR_EL2, xzr            // Reset ICH_HCR_EL2 to defaults
 .Lskip_gicv3_\@:
 .endm
 
index 7fd836bea7eb42853883fae3ea5015867e3dd0b9..3995652daf81a0af14bba4da469fcfa4a633887d 100644 (file)
@@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
 
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                       unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
 #endif /* __ASM_IO_H */
index d62405ce3e6de53c37a94b2d10613f81b7d991e6..f71358271b71c1387db4874afae7d7504a90c879 100644 (file)
@@ -40,13 +40,26 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_vabt(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
 
 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
 
+#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
 static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
 {
        return !(vcpu->arch.hcr_el2 & HCR_RW);
 }
+#else
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
+                              &kvm->arch.flags));
+
+       return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
+}
+#endif
 
 static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 {
@@ -72,15 +85,14 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
                vcpu->arch.hcr_el2 |= HCR_TVM;
        }
 
-       if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
+       if (vcpu_el1_is_32bit(vcpu))
                vcpu->arch.hcr_el2 &= ~HCR_RW;
-
-       /*
-        * TID3: trap feature register accesses that we virtualise.
-        * For now this is conditional, since no AArch32 feature regs
-        * are currently virtualised.
-        */
-       if (!vcpu_el1_is_32bit(vcpu))
+       else
+               /*
+                * TID3: trap feature register accesses that we virtualise.
+                * For now this is conditional, since no AArch32 feature regs
+                * are currently virtualised.
+                */
                vcpu->arch.hcr_el2 |= HCR_TID3;
 
        if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
index e3b25dc6c367ae8302ea4bd4d1dc3eb673e77b19..94a27a7520f4740e64e202599c11fa75441b4e44 100644 (file)
@@ -127,6 +127,16 @@ struct kvm_arch {
 #define KVM_ARCH_FLAG_MTE_ENABLED                      1
        /* At least one vCPU has ran in the VM */
 #define KVM_ARCH_FLAG_HAS_RAN_ONCE                     2
+       /*
+        * The following two bits are used to indicate the guest's EL1
+        * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
+        * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
+        * Otherwise, the guest's EL1 register width has not yet been
+        * determined yet.
+        */
+#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED             3
+#define KVM_ARCH_FLAG_EL1_32BIT                                4
+
        unsigned long flags;
 
        /*
index 94e147e5456ca9998ede2aa20d580c2f4431b9fa..dff2b483ea50927249b3392152c0afdf0c190f3d 100644 (file)
@@ -535,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                                 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)          ((pmd_val(pmd) & PMD_TYPE_MASK) == \
                                 PMD_TYPE_SECT)
-#define pmd_leaf(pmd)          pmd_sect(pmd)
+#define pmd_leaf(pmd)          (pmd_present(pmd) && !pmd_table(pmd))
 #define pmd_bad(pmd)           (!pmd_table(pmd))
 
 #define pmd_leaf_size(pmd)     (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
@@ -625,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 #define pud_none(pud)          (!pud_val(pud))
 #define pud_bad(pud)           (!pud_table(pud))
 #define pud_present(pud)       pte_present(pud_pte(pud))
-#define pud_leaf(pud)          pud_sect(pud)
+#define pud_leaf(pud)          (pud_present(pud) && !pud_table(pud))
 #define pud_valid(pud)         pte_valid(pud_pte(pud))
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
index 986837d7ec82dc1863f06b0906575674d32a628e..fa7981d0d9170057fda37d96f49e6ad0e6f33c2c 100644 (file)
@@ -75,6 +75,10 @@ obj-$(CONFIG_ARM64_MTE)                      += mte.o
 obj-y                                  += vdso-wrap.o
 obj-$(CONFIG_COMPAT_VDSO)              += vdso32-wrap.o
 
+# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
+$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
+$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
+
 obj-y                                  += probes/
 head-y                                 := head.o
 extra-y                                        += $(head-y) vmlinux.lds
index 3fb79b76e9d96adae5de2f08f3974a4a40ff898e..7bbf5104b7b7bd9985849a774051abb591d07b0d 100644 (file)
@@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature)
 /*
  * Check if the target PC is within an alternative block.
  */
-static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
 {
        unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
        return !(pc >= replptr && pc <= (replptr + alt->alt_len));
@@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
 
 #define align_down(x, a)       ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
 
-static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
+static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
 {
        u32 insn;
 
@@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
        return insn;
 }
 
-static void patch_alternative(struct alt_instr *alt,
+static noinstr void patch_alternative(struct alt_instr *alt,
                              __le32 *origptr, __le32 *updptr, int nr_inst)
 {
        __le32 *replptr;
index 4c9b5b4b7a0bc036f3d8a92e4378b1d622e9dbef..a0f3d0aaa3c53e4e519d8aadb03c938ae93a23c6 100644 (file)
@@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 #ifdef CONFIG_ARM64_ERRATUM_1286807
        {
                ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+               /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
+               ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
        },
 #endif
        {},
index d72c4b4d389c4130741e5a56df1159a9049d063d..2cb9cc9e0eff1b16e68074acfb139ccd00598f71 100644 (file)
@@ -654,7 +654,6 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
                               &id_aa64isar1_override),
-       ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
                               &id_aa64isar2_override),
 
@@ -810,7 +809,7 @@ static void __init sort_ftr_regs(void)
                 * to sys_id for subsequent binary search in get_arm64_ftr_reg()
                 * to work correctly.
                 */
-               BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+               BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id);
        }
 }
 
index 3ed39c61a510c7c164073bc1c9431298378e1b1b..98d67444a5b615246a9bd0f01c59a06660015b5f 100644 (file)
@@ -8,16 +8,9 @@
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
 
-#ifndef VMA_ITERATOR
-#define VMA_ITERATOR(name, mm, addr)   \
-       struct mm_struct *name = mm
-#define for_each_vma(vmi, vma)         \
-       for (vma = vmi->mmap; vma; vma = vma->vm_next)
-#endif
-
-#define for_each_mte_vma(vmi, vma)                                     \
+#define for_each_mte_vma(tsk, vma)                                     \
        if (system_supports_mte())                                      \
-               for_each_vma(vmi, vma)                                  \
+               for (vma = tsk->mm->mmap; vma; vma = vma->vm_next)      \
                        if (vma->vm_flags & VM_MTE)
 
 static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
@@ -32,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
 static int mte_dump_tag_range(struct coredump_params *cprm,
                              unsigned long start, unsigned long end)
 {
+       int ret = 1;
        unsigned long addr;
+       void *tags = NULL;
 
        for (addr = start; addr < end; addr += PAGE_SIZE) {
-               char tags[MTE_PAGE_TAG_STORAGE];
                struct page *page = get_dump_page(addr);
 
                /*
@@ -59,22 +53,36 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
                        continue;
                }
 
+               if (!tags) {
+                       tags = mte_allocate_tag_storage();
+                       if (!tags) {
+                               put_page(page);
+                               ret = 0;
+                               break;
+                       }
+               }
+
                mte_save_page_tags(page_address(page), tags);
                put_page(page);
-               if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE))
-                       return 0;
+               if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) {
+                       mte_free_tag_storage(tags);
+                       ret = 0;
+                       break;
+               }
        }
 
-       return 1;
+       if (tags)
+               mte_free_tag_storage(tags);
+
+       return ret;
 }
 
 Elf_Half elf_core_extra_phdrs(void)
 {
        struct vm_area_struct *vma;
        int vma_count = 0;
-       VMA_ITERATOR(vmi, current->mm, 0);
 
-       for_each_mte_vma(vmi, vma)
+       for_each_mte_vma(current, vma)
                vma_count++;
 
        return vma_count;
@@ -83,12 +91,11 @@ Elf_Half elf_core_extra_phdrs(void)
 int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
        struct vm_area_struct *vma;
-       VMA_ITERATOR(vmi, current->mm, 0);
 
-       for_each_mte_vma(vmi, vma) {
+       for_each_mte_vma(current, vma) {
                struct elf_phdr phdr;
 
-               phdr.p_type = PT_ARM_MEMTAG_MTE;
+               phdr.p_type = PT_AARCH64_MEMTAG_MTE;
                phdr.p_offset = offset;
                phdr.p_vaddr = vma->vm_start;
                phdr.p_paddr = 0;
@@ -109,9 +116,8 @@ size_t elf_core_extra_data_size(void)
 {
        struct vm_area_struct *vma;
        size_t data_size = 0;
-       VMA_ITERATOR(vmi, current->mm, 0);
 
-       for_each_mte_vma(vmi, vma)
+       for_each_mte_vma(current, vma)
                data_size += mte_vma_tag_dump_size(vma);
 
        return data_size;
@@ -120,9 +126,8 @@ size_t elf_core_extra_data_size(void)
 int elf_core_write_extra_data(struct coredump_params *cprm)
 {
        struct vm_area_struct *vma;
-       VMA_ITERATOR(vmi, current->mm, 0);
 
-       for_each_mte_vma(vmi, vma) {
+       for_each_mte_vma(current, vma) {
                if (vma->vm_flags & VM_DONTDUMP)
                        continue;
 
index 712e97c03e54c287e4f6e9ee0e5d02ee3665a3cf..cd868084e724244d8964c95e6cbf7ba34a0b36f2 100644 (file)
@@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler);
  * addresses. There is no straight-forward way, short of disassembling the
  * offending instruction, to map that address back to the watchpoint. This
  * function computes the distance of the memory access from the watchpoint as a
- * heuristic for the likelyhood that a given access triggered the watchpoint.
+ * heuristic for the likelihood that a given access triggered the watchpoint.
  *
  * See Section D2.10.5 "Determining the memory location that caused a Watchpoint
  * exception" of ARMv8 Architecture Reference Manual for details.
index e53493d8b208bb9cc3377ef07c89238f55aef97f..a3d0494f25a91de87c8e15a04fe404d516e6ef2e 100644 (file)
@@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
                         * increasing the section's alignment so that the
                         * resulting address of this instruction is guaranteed
                         * to equal the offset in that particular bit (as well
-                        * as all less signficant bits). This ensures that the
+                        * as all less significant bits). This ensures that the
                         * address modulo 4 KB != 0xfff8 or 0xfffc (which would
                         * have all ones in bits [11:3])
                         */
index 78b3e0f8e997cab99b70bffe181949d8d29610dc..d502703e8373a7bdaf9238149d91090818622ab3 100644 (file)
@@ -76,6 +76,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
                        mte_sync_page_tags(page, old_pte, check_swap,
                                           pte_is_tagged);
        }
+
+       /* ensure the tags are visible before the PTE is set */
+       smp_wmb();
 }
 
 int memcmp_pages(struct page *page1, struct page *page2)
index 75fed4460407dee05914177a86c234423a6efe54..57c7c211f8c71d2447e3cde932b1a3a655b309b8 100644 (file)
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
+       u64 ret = 0;
 
        reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
         * online notification callback runs. Until the callback
         * has run we just return zero.
         */
-       if (!reg->kaddr)
+       rcu_read_lock();
+       kaddr = rcu_dereference(reg->kaddr);
+       if (!kaddr) {
+               rcu_read_unlock();
                return 0;
+       }
 
-       return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+       ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+       rcu_read_unlock();
+       return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
 
        reg = this_cpu_ptr(&stolen_time_region);
        if (!reg->kaddr)
                return 0;
 
-       memunmap(reg->kaddr);
-       memset(reg, 0, sizeof(*reg));
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+       synchronize_rcu();
+       memunmap(kaddr);
 
        return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+       struct pvclock_vcpu_stolen_time *kaddr = NULL;
        struct pv_time_stolen_time_region *reg;
        struct arm_smccc_res res;
 
@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
        if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
                return -EINVAL;
 
-       reg->kaddr = memremap(res.a0,
+       kaddr = memremap(res.a0,
                              sizeof(struct pvclock_vcpu_stolen_time),
                              MEMREMAP_WB);
 
+       rcu_assign_pointer(reg->kaddr, kaddr);
+
        if (!reg->kaddr) {
                pr_warn("Failed to map stolen time data structure\n");
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }
index 771f543464e060729740949d5edf7f63762e43a9..33e0fabc0b79b7ba7794c9b3522d3cb194f65876 100644 (file)
@@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
        int i, ret = 0;
        struct aarch64_insn_patch *pp = arg;
 
-       /* The first CPU becomes master */
-       if (atomic_inc_return(&pp->cpu_count) == 1) {
+       /* The last CPU becomes master */
+       if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
                for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
                        ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
                                                             pp->new_insns[i]);
index 5777929d35bf47664ec9f8efd6598fb9bb17cf7e..40be3a7c2c53154a17be367d56badf57b9e1c48a 100644 (file)
@@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope)
        if (scope == SCOPE_LOCAL_CPU) {
                static const struct midr_range spectre_bhb_k32_list[] = {
                        MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+                       MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
                        MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
                        MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
                        MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
index f0a3df9e18a32158fed3f3a49ec7e67733cd7407..413f899e4ac639414dcfaf2e5f0a7152fc55b938 100644 (file)
  * safe memory that has been set up to be preserved during the copy operation.
  */
 SYM_CODE_START(arm64_relocate_new_kernel)
+       /*
+        * The kimage structure isn't allocated specially and may be clobbered
+        * during relocation. We must load any values we need from it prior to
+        * any relocation occurring.
+        */
+       ldr     x28, [x0, #KIMAGE_START]
+       ldr     x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
+       ldr     x26, [x0, #KIMAGE_ARCH_DTB_MEM]
+
        /* Setup the list loop variables. */
        ldr     x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
        ldr     x17, [x0, #KIMAGE_ARCH_TTBR1]   /* x17 = linear map copy */
@@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel)
        ic      iallu
        dsb     nsh
        isb
-       ldr     x4, [x0, #KIMAGE_START]                 /* relocation start */
-       ldr     x1, [x0, #KIMAGE_ARCH_EL2_VECTORS]      /* relocation start */
-       ldr     x0, [x0, #KIMAGE_ARCH_DTB_MEM]          /* dtb address */
        turn_off_mmu x12, x13
 
        /* Start new image. */
-       cbz     x1, .Lel1
-       mov     x1, x4                          /* relocation start */
-       mov     x2, x                         /* dtb address */
+       cbz     x27, .Lel1
+       mov     x1, x28                         /* kernel entry point */
+       mov     x2, x26                         /* dtb address */
        mov     x3, xzr
        mov     x4, xzr
        mov     x0, #HVC_SOFT_RESTART
        hvc     #0                              /* Jumps from el2 */
 .Lel1:
+       mov     x0, x26                         /* dtb address */
+       mov     x1, xzr
        mov     x2, xzr
        mov     x3, xzr
-       br      x                             /* Jumps from el1 */
+       br      x28                             /* Jumps from el1 */
 SYM_CODE_END(arm64_relocate_new_kernel)
index 27df5c1e6baad53098e8cf1740cc054c2dea5d89..3b46041f2b978893eae596fdf84bb6aec6074080 100644 (file)
@@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void)
         * Log the CPU info before it is marked online and might get read.
         */
        cpuinfo_store_cpu();
+       store_cpu_topology(cpu);
 
        /*
         * Enable GIC and timers.
@@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void)
 
        ipi_setup(cpu);
 
-       store_cpu_topology(cpu);
        numa_add_cpu(cpu);
 
        /*
index 19ee7c33769d3f5820762cf0b6b497963bdb6bca..2b0887e58a7c4df64cf6cedf7968fd305cf99283 100644 (file)
@@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
        /*
         * Restore pstate flags. OS lock and mdscr have been already
         * restored, so from this point onwards, debugging is fully
-        * renabled if it was enabled when core started shutdown.
+        * reenabled if it was enabled when core started shutdown.
         */
        local_daif_restore(flags);
 
index 172452f79e462ed6ec6b462a19a80f0db4c1e857..ac1964ebed1ef416f0f2c8b6e607105e0f17eb45 100644 (file)
@@ -52,9 +52,6 @@ GCOV_PROFILE := n
 targets += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
-# Force dependency (incbin is bad)
-$(obj)/vdso.o : $(obj)/vdso.so
-
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
        $(call if_changed,vdsold_and_vdso_check)
index ed181bedbffc5b16d4a654f34c6c62d22db4c21a..05ba1aae1b6f24017fb5e5c732f260d6accd7958 100644 (file)
@@ -131,9 +131,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
 targets += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
-# Force dependency (vdso.s includes vdso.so through incbin)
-$(obj)/vdso.o: $(obj)/vdso.so
-
 include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
        $(call if_changed,vdsosym)
 
index 523bc934fe2f66687b2bb605776f4b239b6114d3..a66d83540c15acb15b48ac39d7dbe77b394e4368 100644 (file)
@@ -1436,7 +1436,8 @@ static int kvm_init_vector_slots(void)
        base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
        kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
 
-       if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
+       if (kvm_system_needs_idmapped_vectors() &&
+           !is_protected_kvm_enabled()) {
                err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
                                               __BP_HARDEN_HYP_VECS_SZ, &base);
                if (err)
index 3d613e721a75d00b8e253574e632277b01c3d4b8..727c979b2b69b4607677067e5703ea7ce7f14401 100644 (file)
@@ -198,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector)
        invalid_host_el2_vect                   // FIQ EL2h
        invalid_host_el2_vect                   // Error EL2h
 
-       host_el1_sync_vect                      // Synchronous 64-bit EL1
-       invalid_host_el1_vect                   // IRQ 64-bit EL1
-       invalid_host_el1_vect                   // FIQ 64-bit EL1
-       invalid_host_el1_vect                   // Error 64-bit EL1
-
-       invalid_host_el1_vect                   // Synchronous 32-bit EL1
-       invalid_host_el1_vect                   // IRQ 32-bit EL1
-       invalid_host_el1_vect                   // FIQ 32-bit EL1
-       invalid_host_el1_vect                   // Error 32-bit EL1
+       host_el1_sync_vect                      // Synchronous 64-bit EL1/EL0
+       invalid_host_el1_vect                   // IRQ 64-bit EL1/EL0
+       invalid_host_el1_vect                   // FIQ 64-bit EL1/EL0
+       invalid_host_el1_vect                   // Error 64-bit EL1/EL0
+
+       host_el1_sync_vect                      // Synchronous 32-bit EL1/EL0
+       invalid_host_el1_vect                   // IRQ 32-bit EL1/EL0
+       invalid_host_el1_vect                   // FIQ 32-bit EL1/EL0
+       invalid_host_el1_vect                   // Error 32-bit EL1/EL0
 SYM_CODE_END(__kvm_hyp_host_vector)
 
 /*
index b47df73e98d782a55192051c9f3e537ddfd10941..ba20405d2dc2fdfd312f580b12cdda297d092f85 100644 (file)
@@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
                inject_abt64(vcpu, true, addr);
 }
 
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
+{
+       unsigned long addr, esr;
+
+       addr  = kvm_vcpu_get_fault_ipa(vcpu);
+       addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               kvm_inject_pabt(vcpu, addr);
+       else
+               kvm_inject_dabt(vcpu, addr);
+
+       /*
+        * If AArch64 or LPAE, set FSC to 0 to indicate an Address
+        * Size Fault at level 0, as if exceeding PARange.
+        *
+        * Non-LPAE guests will only get the external abort, as there
+        * is no way to to describe the ASF.
+        */
+       if (vcpu_el1_is_32bit(vcpu) &&
+           !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
+               return;
+
+       esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+       esr &= ~GENMASK_ULL(5, 0);
+       vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+}
+
 /**
  * kvm_inject_undefined - inject an undefined instruction into the guest
  * @vcpu: The vCPU in which to inject the exception
index 0d19259454d8c583d184ec6ab21cda3f43a19cf0..5400fc020164e827c89b68f3b972fc55940fd519 100644 (file)
@@ -1079,7 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        gfn_t gfn;
        kvm_pfn_t pfn;
        bool logging_active = memslot_is_logging(memslot);
-       bool logging_perm_fault = false;
+       bool use_read_lock = false;
        unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
        unsigned long vma_pagesize, fault_granule;
        enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -1114,7 +1114,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (logging_active) {
                force_pte = true;
                vma_shift = PAGE_SHIFT;
-               logging_perm_fault = (fault_status == FSC_PERM && write_fault);
+               use_read_lock = (fault_status == FSC_PERM && write_fault &&
+                                fault_granule == PAGE_SIZE);
        } else {
                vma_shift = get_vma_page_shift(vma, hva);
        }
@@ -1218,7 +1219,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         * logging dirty logging, only acquire read lock for permission
         * relaxation.
         */
-       if (logging_perm_fault)
+       if (use_read_lock)
                read_lock(&kvm->mmu_lock);
        else
                write_lock(&kvm->mmu_lock);
@@ -1268,6 +1269,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
                ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
        } else {
+               WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");
+
                ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
                                             __pfn_to_phys(pfn), prot,
                                             memcache);
@@ -1280,7 +1283,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        }
 
 out_unlock:
-       if (logging_perm_fault)
+       if (use_read_lock)
                read_unlock(&kvm->mmu_lock);
        else
                write_unlock(&kvm->mmu_lock);
@@ -1334,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
        fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
        is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
+       if (fault_status == FSC_FAULT) {
+               /* Beyond sanitised PARange (which is the IPA limit) */
+               if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
+                       kvm_inject_size_fault(vcpu);
+                       return 1;
+               }
+
+               /* Falls between the IPA range and the PARange? */
+               if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
+                       fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+                       if (is_iabt)
+                               kvm_inject_pabt(vcpu, fault_ipa);
+                       else
+                               kvm_inject_dabt(vcpu, fault_ipa);
+                       return 1;
+               }
+       }
+
        /* Synchronous External Abort? */
        if (kvm_vcpu_abt_issea(vcpu)) {
                /*
index 78fdc443adc7ddbb56fd8a0b312f9f39ca58e5ca..3dc990ac4f4499ed1cce825d6d5e58a1af5a434a 100644 (file)
@@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        struct kvm_pmc *pmc = &pmu->pmc[select_idx];
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return 0;
+
        counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 
        if (kvm_pmu_pmc_is_chained(pmc) &&
@@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 {
        u64 reg;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return;
+
        reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
              ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
        __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
@@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        struct kvm_pmc *pmc;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return;
+
        if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
                return;
 
@@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        struct kvm_pmc *pmc;
 
-       if (!val)
+       if (!kvm_vcpu_has_pmu(vcpu) || !val)
                return;
 
        for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        int i;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return;
+
        if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
                return;
 
@@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 {
        int i;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return;
+
        if (val & ARMV8_PMU_PMCR_E) {
                kvm_pmu_enable_counter_mask(vcpu,
                       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
@@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 {
        u64 reg, mask;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return;
+
        mask  =  ARMV8_PMU_EVTYPE_MASK;
        mask &= ~ARMV8_PMU_EVTYPE_EVENT;
        mask |= kvm_pmu_event_mask(vcpu->kvm);
@@ -827,6 +845,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        u64 val, mask = 0;
        int base, i, nr_events;
 
+       if (!kvm_vcpu_has_pmu(vcpu))
+               return 0;
+
        if (!pmceid1) {
                val = read_sysreg(pmceid0_el0);
                base = 0;
index 372da09a2fab62f824ff016c7ac69787a33d359e..708d80e8e60dde36bb1d36a562ce7859ca74b14d 100644 (file)
@@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
 
        memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
        vcpu->run->system_event.type = type;
-       vcpu->run->system_event.flags = flags;
+       vcpu->run->system_event.ndata = 1;
+       vcpu->run->system_event.data[0] = flags;
        vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
 }
 
@@ -215,15 +216,11 @@ static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
 
 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
 {
-       switch(fn) {
-       case PSCI_0_2_FN64_CPU_SUSPEND:
-       case PSCI_0_2_FN64_CPU_ON:
-       case PSCI_0_2_FN64_AFFINITY_INFO:
-               /* Disallow these functions for 32bit guests */
-               if (vcpu_mode_is_32bit(vcpu))
-                       return PSCI_RET_NOT_SUPPORTED;
-               break;
-       }
+       /*
+        * Prevent 32 bit guests from calling 64 bit PSCI functions.
+        */
+       if ((fn & PSCI_0_2_64BIT) && vcpu_mode_is_32bit(vcpu))
+               return PSCI_RET_NOT_SUPPORTED;
 
        return 0;
 }
@@ -235,10 +232,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
        unsigned long val;
        int ret = 1;
 
-       val = kvm_psci_check_allowed_function(vcpu, psci_fn);
-       if (val)
-               goto out;
-
        switch (psci_fn) {
        case PSCI_0_2_FN_PSCI_VERSION:
                /*
@@ -306,7 +299,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
                break;
        }
 
-out:
        smccc_set_retval(vcpu, val, 0, 0, 0);
        return ret;
 }
@@ -318,9 +310,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
        unsigned long val;
        int ret = 1;
 
-       if (minor > 1)
-               return -EINVAL;
-
        switch(psci_fn) {
        case PSCI_0_2_FN_PSCI_VERSION:
                val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1;
@@ -426,6 +415,15 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
  */
 int kvm_psci_call(struct kvm_vcpu *vcpu)
 {
+       u32 psci_fn = smccc_get_function(vcpu);
+       unsigned long val;
+
+       val = kvm_psci_check_allowed_function(vcpu, psci_fn);
+       if (val) {
+               smccc_set_retval(vcpu, val, 0, 0, 0);
+               return 1;
+       }
+
        switch (kvm_psci_version(vcpu)) {
        case KVM_ARM_PSCI_1_1:
                return kvm_psci_1_x_call(vcpu, 1);
index ecc40c8cd6f643301c4c3a76f9eb6933ca8350e7..6c70c6f61c7038fb180a6c4e5b8c3c943c8b9a55 100644 (file)
@@ -181,27 +181,51 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
+/**
+ * kvm_set_vm_width() - set the register width for the guest
+ * @vcpu: Pointer to the vcpu being configured
+ *
+ * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED
+ * in the VM flags based on the vcpu's requested register width, the HW
+ * capabilities and other options (such as MTE).
+ * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be
+ * consistent with the value of the FLAG_EL1_32BIT bit in the flags.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
 {
-       struct kvm_vcpu *tmp;
+       struct kvm *kvm = vcpu->kvm;
        bool is32bit;
-       unsigned long i;
 
        is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+
+       lockdep_assert_held(&kvm->lock);
+
+       if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
+               /*
+                * The guest's register width is already configured.
+                * Make sure that the vcpu is consistent with it.
+                */
+               if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags))
+                       return 0;
+
+               return -EINVAL;
+       }
+
        if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
-               return false;
+               return -EINVAL;
 
        /* MTE is incompatible with AArch32 */
-       if (kvm_has_mte(vcpu->kvm) && is32bit)
-               return false;
+       if (kvm_has_mte(kvm) && is32bit)
+               return -EINVAL;
 
-       /* Check that the vcpus are either all 32bit or all 64bit */
-       kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
-               if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
-                       return false;
-       }
+       if (is32bit)
+               set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
 
-       return true;
+       set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags);
+
+       return 0;
 }
 
 /**
@@ -230,10 +254,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
        u32 pstate;
 
        mutex_lock(&vcpu->kvm->lock);
-       reset_state = vcpu->arch.reset_state;
-       WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+       ret = kvm_set_vm_width(vcpu);
+       if (!ret) {
+               reset_state = vcpu->arch.reset_state;
+               WRITE_ONCE(vcpu->arch.reset_state.reset, false);
+       }
        mutex_unlock(&vcpu->kvm->lock);
 
+       if (ret)
+               return ret;
+
        /* Reset PMU outside of the non-preemptible section */
        kvm_pmu_vcpu_reset(vcpu);
 
@@ -260,14 +290,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
                }
        }
 
-       if (!vcpu_allowed_register_width(vcpu)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
        switch (vcpu->arch.target) {
        default:
-               if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+               if (vcpu_el1_is_32bit(vcpu)) {
                        pstate = VCPU_RESET_PSTATE_SVC;
                } else {
                        pstate = VCPU_RESET_PSTATE_EL1;
index 7b45c040cc27f37ab5c21050f5f75c35a52fffae..adf408c09cdb88007b4c123b132dbf039cdd5538 100644 (file)
@@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
                val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
                val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
                val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
-               if (irqchip_in_kernel(vcpu->kvm) &&
-                   vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+               if (kvm_vgic_global_state.type == VGIC_V3) {
                        val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
                        val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
                }
index f38c40a762519f2d10b6fb8b9f26b3294b78f39d..78cde687383ca8cd1c6396701deb0dfc552bddf5 100644 (file)
@@ -82,7 +82,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
 
 static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
 {
-       struct kvm *kvm = (struct kvm *)s->private;
+       struct kvm *kvm = s->private;
        struct vgic_state_iter *iter;
 
        mutex_lock(&kvm->lock);
@@ -110,7 +110,7 @@ out:
 
 static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
 {
-       struct kvm *kvm = (struct kvm *)s->private;
+       struct kvm *kvm = s->private;
        struct vgic_state_iter *iter = kvm->arch.vgic.iter;
 
        ++*pos;
@@ -122,7 +122,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
 
 static void vgic_debug_stop(struct seq_file *s, void *v)
 {
-       struct kvm *kvm = (struct kvm *)s->private;
+       struct kvm *kvm = s->private;
        struct vgic_state_iter *iter;
 
        /*
@@ -229,8 +229,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 
 static int vgic_debug_show(struct seq_file *s, void *v)
 {
-       struct kvm *kvm = (struct kvm *)s->private;
-       struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
+       struct kvm *kvm = s->private;
+       struct vgic_state_iter *iter = v;
        struct vgic_irq *irq;
        struct kvm_vcpu *vcpu = NULL;
        unsigned long flags;
index 089fc2ffcb43d1c6c1269036762edf669068ad24..2e13402be3bd24654962c3eccb0c0ab56d5cd420 100644 (file)
@@ -2143,7 +2143,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
 static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
                                void *ptr, void *opaque)
 {
-       struct its_device *dev = (struct its_device *)opaque;
+       struct its_device *dev = opaque;
        struct its_collection *collection;
        struct kvm *kvm = its->dev->kvm;
        struct kvm_vcpu *vcpu = NULL;
index 8ac25f19084e89891d9629cfcf8f7d82c0464584..1e7b1550e2fcebbea89f1443cb8daa5f009b4498 100644 (file)
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr);
  * In this scheme a comparatively quicker boot is observed.
  *
  * If ZONE_DMA configs are defined, crash kernel memory reservation
- * is delayed until DMA zone memory range size initilazation performed in
+ * is delayed until DMA zone memory range size initialization performed in
  * zone_sizes_init().  The defer is necessary to steer clear of DMA zone
  * memory range to avoid overlap allocation.  So crash kernel memory boundaries
  * are not known when mapping all bank memory ranges, which otherwise means
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr);
  * so page-granularity mappings are created for the entire memory range.
  * Hence a slightly slower boot is observed.
  *
- * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * Note: Page-granularity mappings are necessary for crash kernel memory
  * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
  */
 #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
index b7c81dacabf079f50d6d6225a43b15a2b4e621ab..b21f91cd830db4fdbcf4f52f2477c3c96a875e0f 100644 (file)
@@ -99,3 +99,11 @@ void __init early_ioremap_init(void)
 {
        early_ioremap_setup();
 }
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+                                unsigned long flags)
+{
+       unsigned long pfn = PHYS_PFN(offset);
+
+       return pfn_is_map_memory(pfn);
+}
index b05bb70a2e46f3d1af2d60421e6b81644e1f29ce..8026baf46e729262065571c785635bd6ed876795 100644 (file)
@@ -40,9 +40,9 @@
 typedef unsigned int cycles_t;
 
 /*
- * On R4000/R4400 before version 5.0 an erratum exists such that if the
- * cycle counter is read in the exact moment that it is matching the
- * compare register, no interrupt will be generated.
+ * On R4000/R4400 an erratum exists such that if the cycle counter is
+ * read in the exact moment that it is matching the compare register,
+ * no interrupt will be generated.
  *
  * There is a suggested workaround and also the erratum can't strike if
  * the compare interrupt isn't being used as the clock source device.
@@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid)
        if (!__builtin_constant_p(cpu_has_counter))
                asm volatile("" : "=m" (cpu_data[0].options));
        if (likely(cpu_has_counter &&
-                  prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0))))
+                  prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15))))
                return 1;
        else
                return 0;
index caa01457dce609b5079c89d6613e6ab15bd1b5e7..ed339d7979f3f5a336c8f2d22753bc0fa349e597 100644 (file)
@@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void)
        case CPU_R4400MC:
                /*
                 * The published errata for the R4400 up to 3.0 say the CPU
-                * has the mfc0 from count bug.
+                * has the mfc0 from count bug.  This seems the last version
+                * produced.
                 */
-               if ((current_cpu_data.processor_id & 0xff) <= 0x30)
-                       return 1;
-
-               /*
-                * we assume newer revisions are ok
-                */
-               return 0;
+               return 1;
        }
 
        return 0;
index 52e550b45692402c4aca3ec88bc571e1002da99b..bd22578859d0024c72ba913bc88614c4d0fa7b72 100644 (file)
@@ -38,6 +38,7 @@ config PARISC
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_ARCH_TOPOLOGY if SMP
+       select GENERIC_CPU_DEVICES if !SMP
        select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select SYSCTL_ARCH_UNALIGN_ALLOW
        select SYSCTL_EXCEPTION_TRACE
index a5fee10d76ee6880ce675f7bdb627b7b5dfc412e..8ce0ae3706804defdc1cc6428befd709038a262a 100644 (file)
@@ -6,6 +6,9 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=16
+CONFIG_CGROUPS=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_PERF_EVENTS=y
@@ -47,7 +50,6 @@ CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=m
 CONFIG_PARPORT_1284=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=6144
 CONFIG_BLK_DEV_SD=y
index 1b8fd80cbe7f89ad43a89beef21bdadb813b56dd..57501b0aed92ef0a7eddf1e4331c57618c8d6467 100644 (file)
@@ -16,6 +16,7 @@ CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CPUSETS=y
+CONFIG_USER_NS=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -267,9 +268,9 @@ CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRC_CCITT=m
 CONFIG_LIBCRC32C=y
 CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_KERNEL=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_SCHED_DEBUG is not set
index e8b4a03343d393f6baa6f44e1a1b05b4d0cdca66..8d03b3b26229e7d057076999fc02e77145494b1e 100644 (file)
@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page);
        flush_kernel_icache_range_asm(s,e);             \
 } while (0)
 
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-       flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-       memcpy(dst, src, len); \
-       flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
-} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-do { \
-       flush_cache_page(vma, vaddr, page_to_pfn(page)); \
-       memcpy(dst, src, len); \
-} while (0)
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len);
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
+               unsigned long pfn);
 void flush_cache_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end);
 
@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma,
 void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 
 #define ARCH_HAS_FLUSH_ANON_PAGE
-static inline void
-flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
-       if (PageAnon(page)) {
-               flush_tlb_page(vma, vmaddr);
-               preempt_disable();
-               flush_dcache_page_asm(page_to_phys(page), vmaddr);
-               preempt_enable();
-       }
-}
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
 
 #define ARCH_HAS_FLUSH_ON_KUNMAP
 static inline void kunmap_flush_on_unmap(void *addr)
index 0561568f7b4898d4959b97576f25ca08cde37ebe..6faaaa3ebe9b8f84fab3f34471a6d049b34291fa 100644 (file)
 #define copy_page(to, from)    copy_page_asm((void *)(to), (void *)(from))
 
 struct page;
+struct vm_area_struct;
 
 void clear_page_asm(void *page);
 void copy_page_asm(void *to, void *from);
 #define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-                       struct page *pg);
+void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr,
+               struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
 
 /*
  * These are used to make use of C type-checking..
index 939db6fe620bdb9af6c58c75d700147589bd844e..69765a6dbe89d7bccd53907969ed89c502a86c28 100644 (file)
@@ -160,7 +160,7 @@ extern void __update_cache(pte_t pte);
 #define SPACEID_SHIFT  (MAX_ADDRBITS - 32)
 #else
 #define MAX_ADDRBITS   (BITS_PER_LONG)
-#define MAX_ADDRESS    (1UL << MAX_ADDRBITS)
+#define MAX_ADDRESS    (1ULL << MAX_ADDRBITS)
 #define SPACEID_SHIFT  0
 #endif
 
index 23348199f3f8060e130b17819b2703b635928a52..0fd04073d4b685e5fea2b2ef8389f722db5c6793 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/shmparam.h>
+#include <asm/mmu_context.h>
 
 int split_tlb __ro_after_init;
 int dcache_stride __ro_after_init;
@@ -91,7 +92,7 @@ static inline void flush_data_cache(void)
 }
 
 
-/* Virtual address of pfn.  */
+/* Kernel virtual address of pfn.  */
 #define pfn_va(pfn)    __va(PFN_PHYS(pfn))
 
 void
@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m)
                cache_info.ic_size/1024 );
        if (cache_info.dc_loop != 1)
                snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop);
-       seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n",
+       seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n",
                cache_info.dc_size/1024,
                (cache_info.dc_conf.cc_wt ? "WT":"WB"),
                (cache_info.dc_conf.cc_sh ? ", shared I/D":""),
-               ((cache_info.dc_loop == 1) ? "direct mapped" : buf));
+               ((cache_info.dc_loop == 1) ? "direct mapped" : buf),
+               cache_info.dc_conf.cc_alias
+       );
        seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n",
                cache_info.it_size,
                cache_info.dt_size,
@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
        preempt_enable();
 }
 
-static inline void
-__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
-                  unsigned long physaddr)
+static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
-       if (!static_branch_likely(&parisc_has_cache))
-               return;
+       unsigned long flags, space, pgd, prot;
+#ifdef CONFIG_TLB_PTLOCK
+       unsigned long pgd_lock;
+#endif
+
+       vmaddr &= PAGE_MASK;
+
        preempt_disable();
-       purge_dcache_page_asm(physaddr, vmaddr);
+
+       /* Set context for flush */
+       local_irq_save(flags);
+       prot = mfctl(8);
+       space = mfsp(SR_USER);
+       pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+       pgd_lock = mfctl(28);
+#endif
+       switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
+       local_irq_restore(flags);
+
+       flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
        if (vma->vm_flags & VM_EXEC)
-               flush_icache_page_asm(physaddr, vmaddr);
+               flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
+       flush_tlb_page(vma, vmaddr);
+
+       /* Restore previous context */
+       local_irq_save(flags);
+#ifdef CONFIG_TLB_PTLOCK
+       mtctl(pgd_lock, 28);
+#endif
+       mtctl(pgd, 25);
+       mtsp(space, SR_USER);
+       mtctl(prot, 8);
+       local_irq_restore(flags);
+
        preempt_enable();
 }
 
+static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
+{
+       pte_t *ptep = NULL;
+       pgd_t *pgd = mm->pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       if (!pgd_none(*pgd)) {
+               p4d = p4d_offset(pgd, addr);
+               if (!p4d_none(*p4d)) {
+                       pud = pud_offset(p4d, addr);
+                       if (!pud_none(*pud)) {
+                               pmd = pmd_offset(pud, addr);
+                               if (!pmd_none(*pmd))
+                                       ptep = pte_offset_map(pmd, addr);
+                       }
+               }
+       }
+       return ptep;
+}
+
+static inline bool pte_needs_flush(pte_t pte)
+{
+       return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE))
+               == (_PAGE_PRESENT | _PAGE_ACCESSED);
+}
+
 void flush_dcache_page(struct page *page)
 {
        struct address_space *mapping = page_mapping_file(page);
        struct vm_area_struct *mpnt;
        unsigned long offset;
        unsigned long addr, old_addr = 0;
+       unsigned long count = 0;
        pgoff_t pgoff;
 
        if (mapping && !mapping_mapped(mapping)) {
@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page)
 
        pgoff = page->index;
 
-       /* We have carefully arranged in arch_get_unmapped_area() that
+       /*
+        * We have carefully arranged in arch_get_unmapped_area() that
         * *any* mappings of a file are always congruently mapped (whether
         * declared as MAP_PRIVATE or MAP_SHARED), so we only need
-        * to flush one address here for them all to become coherent */
-
+        * to flush one address here for them all to become coherent
+        * on machines that support equivalent aliasing
+        */
        flush_dcache_mmap_lock(mapping);
        vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
                offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
                addr = mpnt->vm_start + offset;
+               if (parisc_requires_coherency()) {
+                       pte_t *ptep;
 
-               /* The TLB is the engine of coherence on parisc: The
-                * CPU is entitled to speculate any page with a TLB
-                * mapping, so here we kill the mapping then flush the
-                * page along a special flush only alias mapping.
-                * This guarantees that the page is no-longer in the
-                * cache for any process and nor may it be
-                * speculatively read in (until the user or kernel
-                * specifically accesses it, of course) */
-
-               flush_tlb_page(mpnt, addr);
-               if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
-                                     != (addr & (SHM_COLOUR - 1))) {
-                       __flush_cache_page(mpnt, addr, page_to_phys(page));
-                       if (parisc_requires_coherency() && old_addr)
-                               printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file);
-                       old_addr = addr;
+                       ptep = get_ptep(mpnt->vm_mm, addr);
+                       if (ptep && pte_needs_flush(*ptep))
+                               flush_user_cache_page(mpnt, addr);
+               } else {
+                       /*
+                        * The TLB is the engine of coherence on parisc:
+                        * The CPU is entitled to speculate any page
+                        * with a TLB mapping, so here we kill the
+                        * mapping then flush the page along a special
+                        * flush only alias mapping. This guarantees that
+                        * the page is no-longer in the cache for any
+                        * process and nor may it be speculatively read
+                        * in (until the user or kernel specifically
+                        * accesses it, of course)
+                        */
+                       flush_tlb_page(mpnt, addr);
+                       if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+                                       != (addr & (SHM_COLOUR - 1))) {
+                               __flush_cache_page(mpnt, addr, page_to_phys(page));
+                               /*
+                                * Software is allowed to have any number
+                                * of private mappings to a page.
+                                */
+                               if (!(mpnt->vm_flags & VM_SHARED))
+                                       continue;
+                               if (old_addr)
+                                       pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+                                               old_addr, addr, mpnt->vm_file);
+                               old_addr = addr;
+                       }
                }
+               WARN_ON(++count == 4096);
        }
        flush_dcache_mmap_unlock(mapping);
 }
@@ -417,23 +495,16 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+       threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime));
+       pr_info("Calculated flush threshold is %lu KiB\n",
+               threshold/1024);
 
        /*
-        * The threshold computed above isn't very reliable since the
-        * flush times depend greatly on the percentage of dirty lines
-        * in the flush range. Further, the whole cache time doesn't
-        * include the time to refill lines that aren't in the mm/vma
-        * being flushed. By timing glibc build and checks on mako cpus,
-        * the following formula seems to work reasonably well. The
-        * value from the timing calculation is too small, and increases
-        * build and check times by almost a factor two.
+        * The threshold computed above isn't very reliable. The following
+        * heuristic works reasonably well on c8000/rp3440.
         */
        threshold2 = cache_info.dc_size * num_online_cpus();
-       if (threshold2 > threshold)
-               threshold = threshold2;
-       if (threshold)
-               parisc_cache_flush_threshold = threshold;
+       parisc_cache_flush_threshold = threshold2;
        printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
                parisc_cache_flush_threshold/1024);
 
@@ -489,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr)
 }
 EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
 
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-       struct page *pg)
+static void flush_cache_page_if_present(struct vm_area_struct *vma,
+       unsigned long vmaddr, unsigned long pfn)
 {
-       /* Copy using kernel mapping.  No coherency is needed (all in
-         kunmap) for the `to' page.  However, the `from' page needs to
-         be flushed through a mapping equivalent to the user mapping
-         before it can be accessed through the kernel mapping. */
-       preempt_disable();
-       flush_dcache_page_asm(__pa(vfrom), vaddr);
-       copy_page_asm(vto, vfrom);
-       preempt_enable();
+       pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
+
+       /*
+        * The pte check is racy and sometimes the flush will trigger
+        * a non-access TLB miss. Hopefully, the page has already been
+        * flushed.
+        */
+       if (ptep && pte_needs_flush(*ptep))
+               flush_cache_page(vma, vmaddr, pfn);
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+       unsigned long vaddr, struct vm_area_struct *vma)
+{
+       void *kto, *kfrom;
+
+       kfrom = kmap_local_page(from);
+       kto = kmap_local_page(to);
+       flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+       copy_page_asm(kto, kfrom);
+       kunmap_local(kto);
+       kunmap_local(kfrom);
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len)
+{
+       flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+       memcpy(dst, src, len);
+       flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+               unsigned long user_vaddr, void *dst, void *src, int len)
+{
+       flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+       memcpy(dst, src, len);
 }
-EXPORT_SYMBOL(copy_user_page);
 
 /* __flush_tlb_range()
  *
@@ -532,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
        return 0;
 }
 
-static inline unsigned long mm_total_size(struct mm_struct *mm)
+static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *vma;
-       unsigned long usize = 0;
-
-       for (vma = mm->mmap; vma; vma = vma->vm_next)
-               usize += vma->vm_end - vma->vm_start;
-       return usize;
-}
-
-static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
-{
-       pte_t *ptep = NULL;
+       unsigned long addr, pfn;
+       pte_t *ptep;
 
-       if (!pgd_none(*pgd)) {
-               p4d_t *p4d = p4d_offset(pgd, addr);
-               if (!p4d_none(*p4d)) {
-                       pud_t *pud = pud_offset(p4d, addr);
-                       if (!pud_none(*pud)) {
-                               pmd_t *pmd = pmd_offset(pud, addr);
-                               if (!pmd_none(*pmd))
-                                       ptep = pte_offset_map(pmd, addr);
+       for (addr = start; addr < end; addr += PAGE_SIZE) {
+               /*
+                * The vma can contain pages that aren't present. Although
+                * the pte search is expensive, we need the pte to find the
+                * page pfn and to check whether the page should be flushed.
+                */
+               ptep = get_ptep(vma->vm_mm, addr);
+               if (ptep && pte_needs_flush(*ptep)) {
+                       if (parisc_requires_coherency()) {
+                               flush_user_cache_page(vma, addr);
+                       } else {
+                               pfn = pte_pfn(*ptep);
+                               if (WARN_ON(!pfn_valid(pfn)))
+                                       return;
+                               __flush_cache_page(vma, addr, PFN_PHYS(pfn));
                        }
                }
        }
-       return ptep;
 }
 
-static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
-                             unsigned long start, unsigned long end)
+static inline unsigned long mm_total_size(struct mm_struct *mm)
 {
-       unsigned long addr, pfn;
-       pte_t *ptep;
+       struct vm_area_struct *vma;
+       unsigned long usize = 0;
 
-       for (addr = start; addr < end; addr += PAGE_SIZE) {
-               ptep = get_ptep(mm->pgd, addr);
-               if (ptep) {
-                       pfn = pte_pfn(*ptep);
-                       flush_cache_page(vma, addr, pfn);
-               }
-       }
+       for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
+               usize += vma->vm_end - vma->vm_start;
+       return usize;
 }
 
 void flush_cache_mm(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
 
-       /* Flushing the whole cache on each cpu takes forever on
-          rp3440, etc.  So, avoid it if the mm isn't too big.  */
-       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
-           mm_total_size(mm) >= parisc_cache_flush_threshold) {
-               if (mm->context.space_id)
-                       flush_tlb_all();
+       /*
+        * Flushing the whole cache on each cpu takes forever on
+        * rp3440, etc. So, avoid it if the mm isn't too big.
+        *
+        * Note that we must flush the entire cache on machines
+        * with aliasing caches to prevent random segmentation
+        * faults.
+        */
+       if (!parisc_requires_coherency()
+           ||  mm_total_size(mm) >= parisc_cache_flush_threshold) {
+               if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+                       return;
+               flush_tlb_all();
                flush_cache_all();
                return;
        }
 
+       /* Flush mm */
        for (vma = mm->mmap; vma; vma = vma->vm_next)
-               flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
+               flush_cache_pages(vma, vma->vm_start, vma->vm_end);
 }
 
-void flush_cache_range(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
-           end - start >= parisc_cache_flush_threshold) {
-               if (vma->vm_mm->context.space_id)
-                       flush_tlb_range(vma, start, end);
+       if (!parisc_requires_coherency()
+           || end - start >= parisc_cache_flush_threshold) {
+               if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+                       return;
+               flush_tlb_range(vma, start, end);
                flush_cache_all();
                return;
        }
 
-       flush_cache_pages(vma, vma->vm_mm, start, end);
+       flush_cache_pages(vma, start, end);
 }
 
-void
-flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
-       if (pfn_valid(pfn)) {
-               if (likely(vma->vm_mm->context.space_id)) {
-                       flush_tlb_page(vma, vmaddr);
-                       __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
-               } else {
-                       __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
-               }
+       if (WARN_ON(!pfn_valid(pfn)))
+               return;
+       if (parisc_requires_coherency())
+               flush_user_cache_page(vma, vmaddr);
+       else
+               __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+       if (!PageAnon(page))
+               return;
+
+       if (parisc_requires_coherency()) {
+               flush_user_cache_page(vma, vmaddr);
+               return;
        }
+
+       flush_tlb_page(vma, vmaddr);
+       preempt_disable();
+       flush_dcache_page_asm(page_to_phys(page), vmaddr);
+       preempt_enable();
 }
 
 void flush_kernel_vmap_range(void *vaddr, int size)
index 3343d2fb788970241562727e66265a5f6cd3f430..6e0b86652f30d6aed40ac072a921aae7a28c883b 100644 (file)
@@ -152,7 +152,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
        /* for absolute branch instructions we can copy iaoq_b. for relative
         * branch instructions we need to calculate the new address based on the
         * difference between iaoq_f and iaoq_b. We cannot use iaoq_b without
-        * modificationt because it's based on our ainsn.insn address.
+        * modifications because it's based on our ainsn.insn address.
         */
 
        if (p->post_handler)
index d98692115221a119579a9eec8782db87a194f85c..26eb568f8b961e41ed4612a65ee8c6bd4ea70864 100644 (file)
@@ -171,6 +171,7 @@ static int __init processor_probe(struct parisc_device *dev)
        p->cpu_num = cpu_info.cpu_num;
        p->cpu_loc = cpu_info.cpu_loc;
 
+       set_cpu_possible(cpuid, true);
        store_cpu_topology(cpuid);
 
 #ifdef CONFIG_SMP
@@ -419,8 +420,7 @@ show_cpuinfo (struct seq_file *m, void *v)
                }
                seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities);
 
-               seq_printf(m, "model\t\t: %s\n"
-                               "model name\t: %s\n",
+               seq_printf(m, "model\t\t: %s - %s\n",
                                 boot_cpu_data.pdc.sys_model_name,
                                 cpuinfo->dev ?
                                 cpuinfo->dev->name : "Unknown");
@@ -461,6 +461,13 @@ static struct parisc_driver cpu_driver __refdata = {
  */
 void __init processor_init(void)
 {
+       unsigned int cpu;
+
        reset_cpu_topology();
+
+       /* reset possible mask. We will mark those which are possible. */
+       for_each_possible_cpu(cpu)
+               set_cpu_possible(cpu, false);
+
        register_parisc_driver(&cpu_driver);
 }
index b91cb45ffd4e3eaf4afee92c2f60efbc92f2f8e3..f005ddedb50e469653bcec1d5eb10c062a414311 100644 (file)
@@ -161,6 +161,8 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PA11
        dma_ops_init();
 #endif
+
+       clear_sched_clock_stable();
 }
 
 /*
index bb27dfeeddfcc2f776c70ccf0a84df3c8971da09..9714fbd7c42d65819dea49264c255de9b89032e1 100644 (file)
@@ -251,13 +251,9 @@ void __init time_init(void)
 static int __init init_cr16_clocksource(void)
 {
        /*
-        * The cr16 interval timers are not syncronized across CPUs, even if
-        * they share the same socket.
+        * The cr16 interval timers are not synchronized across CPUs.
         */
        if (num_online_cpus() > 1 && !running_on_qemu) {
-               /* mark sched_clock unstable */
-               clear_sched_clock_stable();
-
                clocksource_cr16.name = "cr16_unstable";
                clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
                clocksource_cr16.rating = 0;
index a6e61cf2cad045b265a64513ed9f8753af5842e5..b78f1b9d45c18b1e382e6c68961db149fa03c5f0 100644 (file)
@@ -469,7 +469,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o
         * panic notifiers, and we should call panic
         * directly from the location that we wish. 
         * e.g. We should not call panic from
-        * parisc_terminate, but rather the oter way around.
+        * parisc_terminate, but rather the other way around.
         * This hack works, prints the panic message twice,
         * and it enables reboot timers!
         */
index ec487e07f004fbb80c8bb48207c3e0a446bdbe28..00e561d4aa550d41ea77da5764a7f6dbcdb89a88 100644 (file)
@@ -253,7 +253,7 @@ dbl_fadd(
            return(NOEXCEPTION);
            }
        right_exponent = 1;     /* Set exponent to reflect different bias
-                                * with denomalized numbers. */
+                                * with denormalized numbers. */
        }
     else
        {
index c4f30acf2d48d25a68848bd5dcb1135b46cf4ca7..4f03782284bd0a2ce618d85f79f8dbd7b38d2f91 100644 (file)
@@ -256,7 +256,7 @@ dbl_fsub(
            return(NOEXCEPTION);
            }
        right_exponent = 1;     /* Set exponent to reflect different bias
-                                * with denomalized numbers. */
+                                * with denormalized numbers. */
        }
     else
        {
index 838758279d5bd26ce1d7d200ff8cdc5a9ad1faf9..9b98c874dfac7f2f6044f24f14bc08aad9ff3f2d 100644 (file)
@@ -249,7 +249,7 @@ sgl_fadd(
            return(NOEXCEPTION);
            }
        right_exponent = 1;     /* Set exponent to reflect different bias
-                                * with denomalized numbers. */
+                                * with denormalized numbers. */
        }
     else
        {
index 583d3ace46346c084993478ddd7c392a8b5b5c33..29d9eed09d12d7e34eb8fcdab67ce40cb764d220 100644 (file)
@@ -252,7 +252,7 @@ sgl_fsub(
            return(NOEXCEPTION);
            }
        right_exponent = 1;     /* Set exponent to reflect different bias
-                                * with denomalized numbers. */
+                                * with denormalized numbers. */
        }
     else
        {
index f114e102aaf213ab69c5f39665faf9a5737693a9..84bc437be5cd1f4d5efea94d024389650d6bd46a 100644 (file)
@@ -22,6 +22,8 @@
 
 #include <asm/traps.h>
 
+#define DEBUG_NATLB 0
+
 /* Various important other fields */
 #define bit22set(x)            (x & 0x00000200)
 #define bits23_25set(x)                (x & 0x000001c0)
@@ -450,8 +452,8 @@ handle_nadtlb_fault(struct pt_regs *regs)
                fallthrough;
        case 0x380:
                /* PDC and FIC instructions */
-               if (printk_ratelimit()) {
-                       pr_warn("BUG: nullifying cache flush/purge instruction\n");
+               if (DEBUG_NATLB && printk_ratelimit()) {
+                       pr_warn("WARNING: nullifying cache flush/purge instruction\n");
                        show_regs(regs);
                }
                if (insn & 0x20) {
index ecbae1832de31ffd540ccaaf085d288d478c83ab..61a4736355c244448104080e144631a3cb8839b2 100644 (file)
@@ -13,7 +13,8 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 .macro __EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
-5001:   .4byte \addr - 5001b, 5002f - 5001b
+5001:   .4byte \addr - .
+        .4byte 5002f - .
         .short \line, \flags
         .org 5001b+BUG_ENTRY_SIZE
         .previous
@@ -24,7 +25,7 @@
 #else
 .macro __EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
-5001:   .4byte \addr - 5001b
+5001:   .4byte \addr - .
         .short \flags
         .org 5001b+BUG_ENTRY_SIZE
         .previous
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define _EMIT_BUG_ENTRY                                \
        ".section __bug_table,\"aw\"\n"         \
-       "2:\t.4byte 1b - 2b, %0 - 2b\n"         \
-       "\t.short %1, %2\n"                     \
+       "2:     .4byte 1b - .\n"                \
+       "       .4byte %0 - .\n"                \
+       "       .short %1, %2\n"                \
        ".org 2b+%3\n"                          \
        ".previous\n"
 #else
 #define _EMIT_BUG_ENTRY                                \
        ".section __bug_table,\"aw\"\n"         \
-       "2:\t.4byte 1b - 2b\n"                  \
-       "\t.short %2\n"                         \
+       "2:     .4byte 1b - .\n"                \
+       "       .short %2\n"                    \
        ".org 2b+%3\n"                          \
        ".previous\n"
 #endif
index 827038a33064b9b5703d617bdb23dc8cf1e6b04e..4def2bd17b9b865fc410866876fb5e43860afb1f 100644 (file)
 #include <asm/ppc-opcode.h>
 #include <asm/pte-walk.h>
 
-#ifdef CONFIG_PPC_PSERIES
-static inline bool kvmhv_on_pseries(void)
-{
-       return !cpu_has_feature(CPU_FTR_HVMODE);
-}
-#else
-static inline bool kvmhv_on_pseries(void)
-{
-       return false;
-}
-#endif
-
 /*
  * Structure for a nested guest, that is, for a guest that is managed by
  * one of our guests.
index c583d0c37f319247471811960d9b6e534ace6b23..838d4cb460b7edf6b9a820a509e763e63298b0fe 100644 (file)
@@ -586,6 +586,18 @@ static inline bool kvm_hv_mode_active(void)                { return false; }
 
 #endif
 
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+       return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+       return false;
+}
+#endif
+
 #ifdef CONFIG_KVM_XICS
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 {
index 254687258f42b1673d4becc5ff213a32db18bb24..f2c5c26869f1a43888681bdee0633aab3cbf3784 100644 (file)
@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
 #define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 
-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({                                      \
+       unsigned long _addr = (unsigned long)vaddr;                     \
+       _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&   \
+       pfn_valid(virt_to_pfn(_addr));                                  \
+})
 
 /*
  * On Book-E parts we need __va to parse the device tree and we can't
index 049ca26893e61033f3319b632ae3f4410fd2fe0d..8fa37ef5da4d177cc80e9c425a16aec31edb5a88 100644 (file)
@@ -28,11 +28,13 @@ void setup_panic(void);
 #define ARCH_PANIC_TIMEOUT 180
 
 #ifdef CONFIG_PPC_PSERIES
+extern bool pseries_reloc_on_exception(void);
 extern bool pseries_enable_reloc_on_exc(void);
 extern void pseries_disable_reloc_on_exc(void);
 extern void pseries_big_endian_exceptions(void);
 void __init pseries_little_endian_exceptions(void);
 #else
+static inline bool pseries_reloc_on_exception(void) { return false; }
 static inline bool pseries_enable_reloc_on_exc(void) { return false; }
 static inline void pseries_disable_reloc_on_exc(void) {}
 static inline void pseries_big_endian_exceptions(void) {}
index 0a0bc79bd1fa951337aa6ecbf5ba8ef705507e4d..de1018cc522b376fe958f561f26305c3a98fb2c4 100644 (file)
@@ -24,5 +24,6 @@
 
 #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)      __PPC_SCT(name, "b " #func)
 #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)       __PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)       __PPC_SCT(name, "b .+20")
 
 #endif /* _ASM_POWERPC_STATIC_CALL_H */
index 55caeee37c087190bee29126242ca79813d92bb0..b66dd6f775a4079f078b97b7d91f02e47b328298 100644 (file)
@@ -809,6 +809,10 @@ __start_interrupts:
  * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
  * - Standard kernel environment is set up (stack, paca, etc)
  *
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
+ * ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
+ *
  * Call convention:
  *
  * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
index 65562c4a0a690b7672eddacfe81c2ecda083fcd7..4c09c6688ac6685665ddfc82aef3376a52b366d9 100644 (file)
@@ -752,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
         * FIXME: How do i get PID? Do I really need it?
         * prstatus.pr_pid = ????
         */
-       elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+       elf_core_copy_regs(&prstatus.pr_reg, regs);
        buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
                              &prstatus, sizeof(prstatus));
        return buf;
index 40a583e9d3c70b02e630c927c14d5417d7773976..97a76a8619fbd47bac406a3f23830ce04a13723a 100644 (file)
@@ -101,7 +101,7 @@ __module_alloc(unsigned long size, unsigned long start, unsigned long end, bool
         * too.
         */
        return __vmalloc_node_range(size, 1, start, end, gfp, prot,
-                                   VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+                                   VM_FLUSH_RESET_PERMS,
                                    NUMA_NO_NODE, __builtin_return_address(0));
 }
 
index e547066a06aa6860bf39f5404acdef820af4632f..a96f05063bc9108da0770877ad3b2b86291f8c90 100644 (file)
@@ -196,6 +196,34 @@ static void __init configure_exceptions(void)
 
        /* Under a PAPR hypervisor, we need hypercalls */
        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+               /*
+                * - PR KVM does not support AIL mode interrupts in the host
+                *   while a PR guest is running.
+                *
+                * - SCV system call interrupt vectors are only implemented for
+                *   AIL mode interrupts.
+                *
+                * - On pseries, AIL mode can only be enabled and disabled
+                *   system-wide so when a PR VM is created on a pseries host,
+                *   all CPUs of the host are set to AIL=0 mode.
+                *
+                * - Therefore host CPUs must not execute scv while a PR VM
+                *   exists.
+                *
+                * - SCV support can not be disabled dynamically because the
+                *   feature is advertised to host userspace. Disabling the
+                *   facility and emulating it would be possible but is not
+                *   implemented.
+                *
+                * - So SCV support is blanket disabled if PR KVM could possibly
+                *   run. That is, PR support compiled in, booting on pseries
+                *   with hash MMU.
+                */
+               if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+                       init_task.thread.fscr &= ~FSCR_SCV;
+                       cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+               }
+
                /* Enable AIL if possible */
                if (!pseries_enable_reloc_on_exc()) {
                        init_task.thread.fscr &= ~FSCR_SCV;
index f5cbfe5efd25fdfe2236f40194388511c7f320a4..f80cce0e38994538dcef03bd2dec3f0e03ffa716 100644 (file)
@@ -615,23 +615,22 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
                return;
        }
 
-       /* Conditionally hard-enable interrupts. */
-       if (should_hard_irq_enable()) {
-               /*
-                * Ensure a positive value is written to the decrementer, or
-                * else some CPUs will continue to take decrementer exceptions.
-                * When the PPC_WATCHDOG (decrementer based) is configured,
-                * keep this at most 31 bits, which is about 4 seconds on most
-                * systems, which gives the watchdog a chance of catching timer
-                * interrupt hard lockups.
-                */
-               if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
-                       set_dec(0x7fffffff);
-               else
-                       set_dec(decrementer_max);
+       /*
+        * Ensure a positive value is written to the decrementer, or
+        * else some CPUs will continue to take decrementer exceptions.
+        * When the PPC_WATCHDOG (decrementer based) is configured,
+        * keep this at most 31 bits, which is about 4 seconds on most
+        * systems, which gives the watchdog a chance of catching timer
+        * interrupt hard lockups.
+        */
+       if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+               set_dec(0x7fffffff);
+       else
+               set_dec(decrementer_max);
 
+       /* Conditionally hard-enable interrupts. */
+       if (should_hard_irq_enable())
                do_hard_irq_enable();
-       }
 
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
        if (atomic_read(&ppc_n_lost_interrupts) != 0)
index eb9c81e1c218526d1ddc0ac76885894ed0fe74ec..0c4ecc8fec5a64d1f56f5ee4cf9dc80760ac24ce 100644 (file)
 .macro cvdso_call funct call_time=0
   .cfi_startproc
        PPC_STLU        r1, -PPC_MIN_STKFRM(r1)
+  .cfi_adjust_cfa_offset PPC_MIN_STKFRM
        mflr            r0
-  .cfi_register lr, r0
        PPC_STLU        r1, -PPC_MIN_STKFRM(r1)
+  .cfi_adjust_cfa_offset PPC_MIN_STKFRM
        PPC_STL         r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+  .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
 #ifdef __powerpc64__
        PPC_STL         r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
 #endif
        get_datapage    r5
        .ifeq   \call_time
        PPC_LL          r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
 #ifdef __powerpc64__
        PPC_LL          r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_restore r2
 #endif
        .ifeq   \call_time
        cmpwi           r3, 0
        .endif
        mtlr            r0
-  .cfi_restore lr
        addi            r1, r1, 2 * PPC_MIN_STKFRM
+  .cfi_restore lr
+  .cfi_def_cfa_offset 0
        crclr           so
        .ifeq   \call_time
        beqlr+
index 18e58085447cb3168ef519839db8868f1f475792..ddd88179110a096f3cb06ec1b92aed6715754001 100644 (file)
@@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR
          guest in user mode (problem state) and emulating all
          privileged instructions and registers.
 
+         This is only available for hash MMU mode and only supports
+         guests that use hash MMU mode.
+
          This is not as fast as using hypervisor mode, but works on
          machines where hypervisor mode is not available or not usable,
          and can emulate processors that are different from the host
          processor, including emulating 32-bit processors on a 64-bit
          host.
 
+         Selecting this option will cause the SCV facility to be
+         disabled when the kernel is booted on the pseries platform in
+         hash MMU mode (regardless of PR VMs running). When any PR VMs
+         are running, "AIL" mode is disabled which may slow interrupts
+         and system calls on the host.
+
 config KVM_BOOK3S_HV_EXIT_TIMING
        bool "Detailed timing for hypervisor real-mode code"
        depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
index e3ab9df6cf199c1151464ac08cc62460d08e210f..6cfcd20d466862dd8bdb9c4ee63ccc9496a25f84 100644 (file)
 
        /* 0x0 - 0xb */
 
-       /* 'current->mm' needs to be in r4 */
-       tophys(r4, r2)
-       lwz     r4, MM(r4)
-       tophys(r4, r4)
-       /* This only clobbers r0, r3, r4 and r5 */
+       /* switch_mmu_context() needs paging, let's enable it */
+       mfmsr   r9
+       ori     r11, r9, MSR_DR
+       mtmsr   r11
+       sync
+
+       /* switch_mmu_context() clobbers r12, rescue it */
+       SAVE_GPR(12, r1)
+
+       /* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
+       lwz     r4, MM(r2)
        bl      switch_mmu_context
 
+       /* restore r12 */
+       REST_GPR(12, r1)
+
+       /* Disable paging again */
+       mfmsr   r9
+       li      r6, MSR_DR
+       andc    r9, r9, r6
+       mtmsr   r9
+       sync
+
 .endm
index 05e003eb5d9063855f412b08bc519e9fa679f6c8..e42d1c609e4767f04318f2b586b88045c6e0acaa 100644 (file)
@@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
         */
        ld      r10,HSTATE_SCRATCH0(r13)
        cmpwi   r10,BOOK3S_INTERRUPT_MACHINE_CHECK
-       beq     machine_check_common
+       beq     .Lcall_machine_check_common
 
        cmpwi   r10,BOOK3S_INTERRUPT_SYSTEM_RESET
-       beq     system_reset_common
+       beq     .Lcall_system_reset_common
 
        b       .
+
+.Lcall_machine_check_common:
+       b       machine_check_common
+
+.Lcall_system_reset_common:
+       b       system_reset_common
 #endif
index e4ce2a35483f6fcb32f00b50c633c563375e1a1d..42851c32ff3bee0eb576b3d90c768fa7313f247c 100644 (file)
@@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
                        return -EINVAL;
                /* Read the entry from guest memory */
                addr = base + (index * sizeof(rpte));
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
+               kvm_vcpu_srcu_read_lock(vcpu);
                ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (ret) {
                        if (pte_ret_p)
                                *pte_ret_p = addr;
@@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
 
        /* Read the table to find the root of the radix tree */
        ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
-       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
-       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (ret)
                return ret;
 
index d42b4b6d4a791d17fde665688a3d08e70a08c01e..85cfa6328222b326736f2309d31c12e46a80fc6b 100644 (file)
@@ -420,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
        tbl[idx % TCES_PER_PAGE] = tce;
 }
 
-static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
-               unsigned long entry)
+static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
+               struct iommu_table *tbl, unsigned long entry)
 {
-       unsigned long hpa = 0;
-       enum dma_data_direction dir = DMA_NONE;
+       unsigned long i;
+       unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+       unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+       for (i = 0; i < subpages; ++i) {
+               unsigned long hpa = 0;
+               enum dma_data_direction dir = DMA_NONE;
 
-       iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
+               iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
+       }
 }
 
 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -485,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
                        break;
        }
 
+       iommu_tce_kill(tbl, io_entry, subpages);
+
        return ret;
 }
 
@@ -544,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm,
                        break;
        }
 
+       iommu_tce_kill(tbl, io_entry, subpages);
+
        return ret;
 }
 
@@ -590,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                        ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
                                        entry, ua, dir);
 
-               iommu_tce_kill(stit->tbl, entry, 1);
 
                if (ret != H_SUCCESS) {
-                       kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+                       kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
                        goto unlock_exit;
                }
        }
@@ -669,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                 */
                if (get_user(tce, tces + i)) {
                        ret = H_TOO_HARD;
-                       goto invalidate_exit;
+                       goto unlock_exit;
                }
                tce = be64_to_cpu(tce);
 
                if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
                        ret = H_PARAMETER;
-                       goto invalidate_exit;
+                       goto unlock_exit;
                }
 
                list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -684,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                                        iommu_tce_direction(tce));
 
                        if (ret != H_SUCCESS) {
-                               kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
-                                               entry);
-                               goto invalidate_exit;
+                               kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
+                                                entry + i);
+                               goto unlock_exit;
                        }
                }
 
                kvmppc_tce_put(stt, entry + i, tce);
        }
 
-invalidate_exit:
-       list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-               iommu_tce_kill(stit->tbl, entry, npages);
-
 unlock_exit:
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
@@ -735,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
                                continue;
 
                        if (ret == H_TOO_HARD)
-                               goto invalidate_exit;
+                               return ret;
 
                        WARN_ON_ONCE(1);
-                       kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+                       kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
                }
        }
 
        for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
                kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
-invalidate_exit:
-       list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-               iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
-
        return ret;
 }
 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
index 870b7f0c7ea561fb323439add7e294a3007cdb6f..fdeda6a9cff449b1a13cc45a05ae1629183b13b3 100644 (file)
@@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl,
                tbl->it_ops->tce_kill(tbl, entry, pages, true);
 }
 
-static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
-               unsigned long entry)
+static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
+               struct iommu_table *tbl, unsigned long entry)
 {
-       unsigned long hpa = 0;
-       enum dma_data_direction dir = DMA_NONE;
+       unsigned long i;
+       unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+       unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+       for (i = 0; i < subpages; ++i) {
+               unsigned long hpa = 0;
+               enum dma_data_direction dir = DMA_NONE;
 
-       iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
+               iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
+       }
 }
 
 static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
                        break;
        }
 
+       iommu_tce_kill_rm(tbl, io_entry, subpages);
+
        return ret;
 }
 
@@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
                        break;
        }
 
+       iommu_tce_kill_rm(tbl, io_entry, subpages);
+
        return ret;
 }
 
@@ -420,10 +430,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                        ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
                                        stit->tbl, entry, ua, dir);
 
-               iommu_tce_kill_rm(stit->tbl, entry, 1);
-
                if (ret != H_SUCCESS) {
-                       kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+                       kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
                        return ret;
                }
        }
@@ -561,7 +569,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                ua = 0;
                if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
                        ret = H_PARAMETER;
-                       goto invalidate_exit;
+                       goto unlock_exit;
                }
 
                list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -570,19 +578,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                                        iommu_tce_direction(tce));
 
                        if (ret != H_SUCCESS) {
-                               kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
-                                               entry);
-                               goto invalidate_exit;
+                               kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
+                                               entry + i);
+                               goto unlock_exit;
                        }
                }
 
                kvmppc_rm_tce_put(stt, entry + i, tce);
        }
 
-invalidate_exit:
-       list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-               iommu_tce_kill_rm(stit->tbl, entry, npages);
-
 unlock_exit:
        if (!prereg)
                arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
@@ -620,20 +624,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
                                continue;
 
                        if (ret == H_TOO_HARD)
-                               goto invalidate_exit;
+                               return ret;
 
                        WARN_ON_ONCE_RM(1);
-                       kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+                       kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
                }
        }
 
        for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
                kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
-invalidate_exit:
-       list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
-               iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
-
        return ret;
 }
 
index c886557638a15b16c53df54de0a9b8a5754ffa7a..6fa518f6501d513076200e994c25ef4cbd594a35 100644 (file)
@@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
        int cpu;
        struct rcuwait *waitp;
 
+       /*
+        * rcuwait_wake_up contains smp_mb() which orders prior stores that
+        * create pending work vs below loads of cpu fields. The other side
+        * is the barrier in vcpu run that orders setting the cpu fields vs
+        * testing for pending work.
+        */
+
        waitp = kvm_arch_vcpu_get_wait(vcpu);
        if (rcuwait_wake_up(waitp))
                ++vcpu->stat.generic.halt_wakeup;
@@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                        break;
                }
                tvcpu->arch.prodded = 1;
-               smp_mb();
+               smp_mb(); /* This orders prodded store vs ceded load */
                if (tvcpu->arch.ceded)
                        kvmppc_fast_vcpu_kick_hv(tvcpu);
                break;
@@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
                pvc = core_info.vc[sub];
                pvc->pcpu = pcpu + thr;
                for_each_runnable_thread(i, vcpu, pvc) {
+                       /*
+                        * XXX: is kvmppc_start_thread called too late here?
+                        * It updates vcpu->cpu and vcpu->arch.thread_cpu
+                        * which are used by kvmppc_fast_vcpu_kick_hv(), but
+                        * kick is called after new exceptions become available
+                        * and exceptions are checked earlier than here, by
+                        * kvmppc_core_prepare_to_enter.
+                        */
                        kvmppc_start_thread(vcpu, pvc);
                        kvmppc_create_dtl_entry(vcpu, pvc);
                        trace_kvm_guest_enter(vcpu);
@@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        if (need_resched() || !kvm->arch.mmu_ready)
                goto out;
 
+       vcpu->cpu = pcpu;
+       vcpu->arch.thread_cpu = pcpu;
+       vc->pcpu = pcpu;
+       local_paca->kvm_hstate.kvm_vcpu = vcpu;
+       local_paca->kvm_hstate.ptid = 0;
+       local_paca->kvm_hstate.fake_suspend = 0;
+
+       /*
+        * Orders set cpu/thread_cpu vs testing for pending interrupts and
+        * doorbells below. The other side is when these fields are set vs
+        * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
+        * kick a vCPU to notice the pending interrupt.
+        */
+       smp_mb();
+
        if (!nested) {
                kvmppc_core_prepare_to_enter(vcpu);
                if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
@@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
        tb = mftb();
 
-       vcpu->cpu = pcpu;
-       vcpu->arch.thread_cpu = pcpu;
-       vc->pcpu = pcpu;
-       local_paca->kvm_hstate.kvm_vcpu = vcpu;
-       local_paca->kvm_hstate.ptid = 0;
-       local_paca->kvm_hstate.fake_suspend = 0;
-
        __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
 
        trace_kvm_guest_enter(vcpu);
@@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        run->exit_reason = KVM_EXIT_INTR;
        vcpu->arch.ret = -EINTR;
  out:
+       vcpu->cpu = -1;
+       vcpu->arch.thread_cpu = -1;
        powerpc_local_irq_pmu_restore(flags);
        preempt_enable();
        goto done;
index 9d373f8963ee98a9977f4a796f200c6343b5d67b..c943a051c6e700c2b58416aef857b9e310563678 100644 (file)
@@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        /* copy parameters in */
        hv_ptr = kvmppc_get_gpr(vcpu, 4);
        regs_ptr = kvmppc_get_gpr(vcpu, 5);
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                              hv_ptr, regs_ptr);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (err)
                return H_PARAMETER;
 
@@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
                byteswap_hv_regs(&l2_hv);
                byteswap_pt_regs(&l2_regs);
        }
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                               hv_ptr, regs_ptr);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (err)
                return H_AUTHORITY;
 
@@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
                        goto not_found;
 
                /* Write what was loaded into our buffer back to the L1 guest */
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (rc)
                        goto not_found;
        } else {
                /* Load the data to be stored from the L1 guest into our buf */
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (rc)
                        goto not_found;
 
index 34a801c3604adcee59ea7641a8ded0d51247dd1a..7bf9e6ca5c2df60506bb2225a92c1914c71d5b26 100644 (file)
@@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
        svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
        svcpu->in_use = 0;
        svcpu_put(svcpu);
-#endif
 
        /* Disable AIL if supported */
-       if (cpu_has_feature(CPU_FTR_HVMODE) &&
-           cpu_has_feature(CPU_FTR_ARCH_207S))
-               mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               if (cpu_has_feature(CPU_FTR_ARCH_207S))
+                       mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+               if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+                       mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+       }
+#endif
 
        vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
        memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
        to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
        svcpu_put(svcpu);
+
+       /* Enable AIL if supported */
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               if (cpu_has_feature(CPU_FTR_ARCH_207S))
+                       mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+               if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+                       mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+       }
 #endif
 
        if (kvmppc_is_split_real(vcpu))
@@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
        kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
        kvmppc_save_tm_pr(vcpu);
 
-       /* Enable AIL if supported */
-       if (cpu_has_feature(CPU_FTR_HVMODE) &&
-           cpu_has_feature(CPU_FTR_ARCH_207S))
-               mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
-
        vcpu->cpu = -1;
 }
 
@@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 
 void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
 {
+       if (fscr & FSCR_SCV)
+               fscr &= ~FSCR_SCV; /* SCV must not be enabled */
        if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
                /* TAR got dropped, drop it in shadow too */
                kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
index 1f10e7dfcdd05bd3aaff4f49a78ffcf08436c855..dc4f51ac84bc6012a068eb4c9793e344a86b9221 100644 (file)
@@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
        return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu)
+{
+       unsigned long mflags = kvmppc_get_gpr(vcpu, 4);
+       unsigned long resource = kvmppc_get_gpr(vcpu, 5);
+
+       if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) {
+               /* KVM PR does not provide AIL!=0 to guests */
+               if (mflags == 0)
+                       kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+               else
+                       kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63);
+               return EMULATE_DONE;
+       }
+       return EMULATE_FAIL;
+}
+
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 {
@@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
                return kvmppc_h_pr_logical_ci_load(vcpu);
        case H_LOGICAL_CI_STORE:
                return kvmppc_h_pr_logical_ci_store(vcpu);
+       case H_SET_MODE:
+               return kvmppc_h_pr_set_mode(vcpu);
        case H_XIRR:
        case H_CPPR:
        case H_EOI:
@@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
        case H_CEDE:
        case H_LOGICAL_CI_LOAD:
        case H_LOGICAL_CI_STORE:
+       case H_SET_MODE:
 #ifdef CONFIG_KVM_XICS
        case H_XIRR:
        case H_CPPR:
@@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = {
        H_BULK_REMOVE,
        H_PUT_TCE,
        H_CEDE,
+       H_SET_MODE,
 #ifdef CONFIG_KVM_XICS
        H_XIRR,
        H_CPPR,
index 0f847f1e5ddd0ba6590642548fe799f6335ca015..6808bda0dbc10c114a9b1d2fab1b4d63d24a6224 100644 (file)
@@ -229,9 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
         */
        args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (rc)
                goto fail;
 
index 9772b176e406b03565273a9fe10d73d72b85ec53..533c4232e5abfd926c859fcdfb45c7fee658682a 100644 (file)
@@ -425,9 +425,9 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                return EMULATE_DONE;
        }
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (rc)
                return EMULATE_DO_MMIO;
 
@@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = 1;
                break;
 #endif
+       case KVM_CAP_PPC_AIL_MODE_3:
+               r = 0;
+               /*
+                * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode.
+                * The POWER9s can support it if the guest runs in hash mode,
+                * but QEMU doesn't necessarily query the capability in time.
+                */
+               if (hv_enabled) {
+                       if (kvmhv_on_pseries()) {
+                               if (pseries_reloc_on_exception())
+                                       r = 1;
+                       } else if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+                                 !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+                               r = 1;
+                       }
+               }
+               break;
        default:
                r = 0;
                break;
index 8e301cd8925b2bde870fd8cddd933a6128b4924e..4d221d033804ef89638e1fd199e5a8fc8c397e69 100644 (file)
@@ -255,7 +255,7 @@ void __init mem_init(void)
 #endif
 
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-       set_max_mapnr(max_low_pfn);
+       set_max_mapnr(max_pfn);
 
        kasan_late_init();
 
index b9b7fefbb64b9c7298ac718da6864b8b5b4c6a32..13022d734951b99e312a8074658d66b044558c6a 100644 (file)
@@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu)
        if (new_nid < 0 || !node_possible(new_nid))
                new_nid = first_online_node;
 
-       if (NODE_DATA(new_nid) == NULL) {
+       if (!node_online(new_nid)) {
 #ifdef CONFIG_MEMORY_HOTPLUG
                /*
                 * Need to ensure that NODE_DATA is initialized for a node from
index 2f46e31c76129799eb13378b31498ae76cbe5611..4f53d0b97539bb6cd31fadfbc5eab06a8054ffc8 100644 (file)
@@ -3,11 +3,11 @@
 obj-y                          += callchain.o callchain_$(BITS).o perf_regs.o
 obj-$(CONFIG_COMPAT)           += callchain_32.o
 
-obj-$(CONFIG_PPC_PERF_CTRS)    += core-book3s.o bhrb.o
+obj-$(CONFIG_PPC_PERF_CTRS)    += core-book3s.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += ppc970-pmu.o power5-pmu.o \
                                   power5+-pmu.o power6-pmu.o power7-pmu.o \
                                   isa207-common.o power8-pmu.o power9-pmu.o \
-                                  generic-compat-pmu.o power10-pmu.o
+                                  generic-compat-pmu.o power10-pmu.o bhrb.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_PPC_POWERNV)      += imc-pmu.o
index d3398100a60fd36babe6c6c0f13126c4b164ba82..c6d51e7093cf1191d4af1527d07a8d9e9f979b42 100644 (file)
@@ -91,8 +91,8 @@ extern u64 PERF_REG_EXTENDED_MASK;
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power10_event_alternatives[][MAX_ALT] = {
-       { PM_CYC_ALT,                   PM_CYC },
        { PM_INST_CMPL_ALT,             PM_INST_CMPL },
+       { PM_CYC_ALT,                   PM_CYC },
 };
 
 static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
index c9eb5232e68b6ab7b220882b4c502c15f3bb9feb..c393e837648e2efe34287615ef6fd2155a2b59d5 100644 (file)
@@ -133,11 +133,11 @@ int p9_dd22_bl_ev[] = {
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power9_event_alternatives[][MAX_ALT] = {
-       { PM_INST_DISP,                 PM_INST_DISP_ALT },
-       { PM_RUN_CYC_ALT,               PM_RUN_CYC },
-       { PM_RUN_INST_CMPL_ALT,         PM_RUN_INST_CMPL },
-       { PM_LD_MISS_L1,                PM_LD_MISS_L1_ALT },
        { PM_BR_2PATH,                  PM_BR_2PATH_ALT },
+       { PM_INST_DISP,                 PM_INST_DISP_ALT },
+       { PM_RUN_CYC_ALT,               PM_RUN_CYC },
+       { PM_LD_MISS_L1,                PM_LD_MISS_L1_ALT },
+       { PM_RUN_INST_CMPL_ALT,         PM_RUN_INST_CMPL },
 };
 
 static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
index b97bc179f65ad25f5018060aed91f0a8ef13a914..adcb1a1a2bfe805519e1774101ecdc5c5eb913c9 100644 (file)
@@ -112,7 +112,7 @@ static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
                          struct pt_regs *regs)
 {
        memset(prstatus, 0, sizeof(struct elf_prstatus));
-       elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
+       elf_core_copy_regs(&(prstatus->pr_reg), regs);
 
        /*
         * Overload PID with PIR value.
index f58728d5f10d2469c2844ee5b1905a9fdaaa7fbb..39962c9055422eea5b9ee23d43b1c67fb520f1cd 100644 (file)
@@ -462,7 +462,6 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
 {
        struct papr_scm_perf_stat *stat;
        struct papr_scm_perf_stats *stats;
-       char *statid;
        int index, rc, count;
        u32 available_events;
 
@@ -493,14 +492,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
 
        for (index = 0, stat = stats->scm_statistic, count = 0;
                     index < available_events; index++, ++stat) {
-               statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
-               if (!statid) {
+               p->nvdimm_events_map[count] = kmemdup_nul(stat->stat_id, 8, GFP_KERNEL);
+               if (!p->nvdimm_events_map[count]) {
                        rc = -ENOMEM;
                        goto out_nvdimm_events_map;
                }
 
-               strcpy(statid, stat->stat_id);
-               p->nvdimm_events_map[count] = statid;
                count++;
        }
        p->nvdimm_events_map[count] = NULL;
index 069d7b3bb142ef58fb9afde20b2905955d204364..955ff8aa1644d03da0a53b6ca7e2f2a465d737ba 100644 (file)
@@ -353,6 +353,14 @@ static void pseries_lpar_idle(void)
        pseries_idle_epilog();
 }
 
+static bool pseries_reloc_on_exception_enabled;
+
+bool pseries_reloc_on_exception(void)
+{
+       return pseries_reloc_on_exception_enabled;
+}
+EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
+
 /*
  * Enable relocation on during exceptions. This has partition wide scope and
  * may take a while to complete, if it takes longer than one second we will
@@ -377,6 +385,7 @@ bool pseries_enable_reloc_on_exc(void)
                                        " on exceptions: %ld\n", rc);
                                return false;
                        }
+                       pseries_reloc_on_exception_enabled = true;
                        return true;
                }
 
@@ -404,7 +413,9 @@ void pseries_disable_reloc_on_exc(void)
                        break;
                mdelay(get_longbusy_msecs(rc));
        }
-       if (rc != H_SUCCESS)
+       if (rc == H_SUCCESS)
+               pseries_reloc_on_exception_enabled = false;
+       else
                pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
                        rc);
 }
index 4a7fcde5afc0708e6cce0ca4f31ec893cbcb1795..ec65586cbeb39092886544d7c4213656f78d5dc3 100644 (file)
@@ -27,22 +27,31 @@ struct vas_caps_entry {
 
 /*
  * This function is used to get the notification from the drmgr when
- * QoS credits are changed. Though receiving the target total QoS
- * credits here, get the official QoS capabilities from the hypervisor.
+ * QoS credits are changed.
  */
-static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps,
+static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps,
                                                const char *buf, size_t count)
 {
        int err;
        u16 creds;
 
        err = kstrtou16(buf, 0, &creds);
+       /*
+        * The user space interface from the management console
+        * notifies OS with the new QoS credits and then the
+        * hypervisor. So OS has to use this new credits value
+        * and reconfigure VAS windows (close or reopen depends
+        * on the credits available) instead of depending on VAS
+        * QoS capabilities from the hypervisor.
+        */
        if (!err)
-               err = vas_reconfig_capabilties(caps->win_type);
+               err = vas_reconfig_capabilties(caps->win_type, creds);
 
        if (err)
                return -EINVAL;
 
+       pr_info("Set QoS total credits %u\n", creds);
+
        return count;
 }
 
@@ -92,13 +101,14 @@ VAS_ATTR_RO(nr_total_credits);
 VAS_ATTR_RO(nr_used_credits);
 
 static struct vas_sysfs_entry update_total_credits_attribute =
-       __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger);
+       __ATTR(update_total_credits, 0200, NULL, update_total_credits_store);
 
 static struct attribute *vas_def_capab_attrs[] = {
        &nr_total_credits_attribute.attr,
        &nr_used_credits_attribute.attr,
        NULL,
 };
+ATTRIBUTE_GROUPS(vas_def_capab);
 
 static struct attribute *vas_qos_capab_attrs[] = {
        &nr_total_credits_attribute.attr,
@@ -106,6 +116,7 @@ static struct attribute *vas_qos_capab_attrs[] = {
        &update_total_credits_attribute.attr,
        NULL,
 };
+ATTRIBUTE_GROUPS(vas_qos_capab);
 
 static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
                             char *buf)
@@ -154,13 +165,13 @@ static const struct sysfs_ops vas_sysfs_ops = {
 static struct kobj_type vas_def_attr_type = {
                .release        =       vas_type_release,
                .sysfs_ops      =       &vas_sysfs_ops,
-               .default_attrs  =       vas_def_capab_attrs,
+               .default_groups =       vas_def_capab_groups,
 };
 
 static struct kobj_type vas_qos_attr_type = {
                .release        =       vas_type_release,
                .sysfs_ops      =       &vas_sysfs_ops,
-               .default_attrs  =       vas_qos_capab_attrs,
+               .default_groups =       vas_qos_capab_groups,
 };
 
 static char *vas_caps_kobj_name(struct vas_caps_entry *centry,
index 1f59d78c77a1fd690a978f619c6017c0c0f78de5..ec643bbdb67fcabf2428f34c1eeaee613a174859 100644 (file)
@@ -779,10 +779,10 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
  * changes. Reconfig window configurations based on the credits
  * availability from this new capabilities.
  */
-int vas_reconfig_capabilties(u8 type)
+int vas_reconfig_capabilties(u8 type, int new_nr_creds)
 {
        struct vas_cop_feat_caps *caps;
-       int old_nr_creds, new_nr_creds;
+       int old_nr_creds;
        struct vas_caps *vcaps;
        int rc = 0, nr_active_wins;
 
@@ -795,12 +795,6 @@ int vas_reconfig_capabilties(u8 type)
        caps = &vcaps->caps;
 
        mutex_lock(&vas_pseries_mutex);
-       rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat,
-                                     (u64)virt_to_phys(&hv_cop_caps));
-       if (rc)
-               goto out;
-
-       new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
 
        old_nr_creds = atomic_read(&caps->nr_total_credits);
 
@@ -832,7 +826,6 @@ int vas_reconfig_capabilties(u8 type)
                                        false);
        }
 
-out:
        mutex_unlock(&vas_pseries_mutex);
        return rc;
 }
@@ -850,7 +843,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
        struct of_reconfig_data *rd = data;
        struct device_node *dn = rd->dn;
        const __be32 *intserv = NULL;
-       int len, rc = 0;
+       int new_nr_creds, len, rc = 0;
 
        if ((action == OF_RECONFIG_ATTACH_NODE) ||
                (action == OF_RECONFIG_DETACH_NODE))
@@ -862,7 +855,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
        if (!intserv)
                return NOTIFY_OK;
 
-       rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE);
+       rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+                                       vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+                                       (u64)virt_to_phys(&hv_cop_caps));
+       if (!rc) {
+               new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+               rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
+                                               new_nr_creds);
+       }
+
        if (rc)
                pr_err("Failed reconfig VAS capabilities with DLPAR\n");
 
index 34177881e998031daac8ac4d6b19c290717456d4..333ffa2f9f426758510481e3edde8812d81930f1 100644 (file)
@@ -135,7 +135,7 @@ struct pseries_vas_window {
 };
 
 int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
-int vas_reconfig_capabilties(u8 type);
+int vas_reconfig_capabilties(u8 type, int new_nr_creds);
 int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
 
 #ifdef CONFIG_PPC_VAS
index 34592d00dde8c65316b7b4f7325aa7e05fc4ff21..f6ef358d8a2cf6eb628a054ef96794a682a12d3d 100644 (file)
@@ -38,7 +38,7 @@ config SOC_VIRT
        select SIFIVE_PLIC
        select PM_GENERIC_DOMAINS if PM
        select PM_GENERIC_DOMAINS_OF if PM && OF
-       select RISCV_SBI_CPUIDLE if CPU_IDLE
+       select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI
        help
          This enables support for QEMU Virt Machine.
 
index 854320e17b285028526621903b312951684e546d..ccaac3371cf99a7fbb960428831b6fffbdf28c1d 100644 (file)
@@ -7,7 +7,7 @@
                reg = <0x0 0x41000000 0x0 0xF0>;
                microchip,sync-update-mask = /bits/ 32 <0>;
                #pwm-cells = <2>;
-               clocks = <&clkcfg CLK_FIC3>;
+               clocks = <&fabric_clk3>;
                status = "disabled";
        };
 
                reg = <0x0 0x44000000 0x0 0x1000>;
                #address-cells = <1>;
                #size-cells = <0>;
-               clocks = <&clkcfg CLK_FIC3>;
+               clocks = <&fabric_clk3>;
                interrupt-parent = <&plic>;
                interrupts = <122>;
                clock-frequency = <100000>;
                status = "disabled";
        };
+
+       fabric_clk3: fabric-clk3 {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <62500000>;
+       };
+
+       fabric_clk1: fabric-clk1 {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <125000000>;
+       };
 };
index cd2fe80fa81a6edc70da902d05f66ccd2e6f76c6..3392153dd0f11e6d9b9026f3b391aa704883e8fb 100644 (file)
@@ -45,7 +45,7 @@
 };
 
 &refclk {
-       clock-frequency = <600000000>;
+       clock-frequency = <125000000>;
 };
 
 &mmuart1 {
index c5c9d1360de074a3bd9e81199e07029e104b91d0..cf2f55e1dcb679776bf0e1aa84e67fab4064577d 100644 (file)
                };
        };
 
-       refclk: msspllclk {
+       refclk: mssrefclk {
                compatible = "fixed-clock";
                #clock-cells = <0>;
        };
 
                clkcfg: clkcfg@20002000 {
                        compatible = "microchip,mpfs-clkcfg";
-                       reg = <0x0 0x20002000 0x0 0x1000>;
+                       reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
                        clocks = <&refclk>;
                        #clock-cells = <1>;
                };
 
                gpio1: gpio@20121000 {
                        compatible = "microchip,mpfs-gpio";
-                       reg = <000 0x20121000 0x0 0x1000>;
+                       reg = <0x0 0x20121000 0x0 0x1000>;
                        interrupt-parent = <&plic>;
                        interrupt-controller;
                        #interrupt-cells = <1>;
                        reg = <0x0 0x20124000 0x0 0x1000>;
                        interrupt-parent = <&plic>;
                        interrupts = <80>, <81>;
-                       clocks = <&clkcfg CLK_RTC>;
-                       clock-names = "rtc";
+                       clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>;
+                       clock-names = "rtc", "rtcref";
                        status = "disabled";
                };
 
                                        <0 0 0 3 &pcie_intc 2>,
                                        <0 0 0 4 &pcie_intc 3>;
                        interrupt-map-mask = <0 0 0 7>;
-                       clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>;
+                       clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>;
                        clock-names = "fic0", "fic1", "fic3";
                        ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
                        msi-parent = <&pcie>;
index aad45d7f498fd90f8ddc851827a31e2265e0a67b..5c638fd5b35c78fb0e914e0f8724c01056f43d6a 100644 (file)
                        clocks = <&prci FU540_PRCI_CLK_TLCLK>;
                        status = "disabled";
                };
-               dma: dma@3000000 {
+               dma: dma-controller@3000000 {
                        compatible = "sifive,fu540-c000-pdma";
                        reg = <0x0 0x3000000 0x0 0x8000>;
                        interrupt-parent = <&plic0>;
index 30e3017f22bc778ae25ed2bad6f0b26553de0f9a..0cc17db8aaba804a2a9c045104e0fcf26011797f 100644 (file)
@@ -101,6 +101,7 @@ CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
 CONFIG_RPMSG_VIRTIO=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
index 7e5efdc3829d1165eb44a9a7f58a7f4b60c135fe..6cd9d84d3e1387e6177b850bb1ef60ea8a543edd 100644 (file)
@@ -93,6 +93,7 @@ CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
 CONFIG_RPMSG_VIRTIO=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
index d3804a2f9aad33fb8089691ca5f5ccd847452b51..1aaea81fb1413eeb87dc23cbd9346055fb6f6ef8 100644 (file)
@@ -30,8 +30,8 @@
 typedef u32 bug_insn_t;
 
 #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-#define __BUG_ENTRY_ADDR       RISCV_INT " 1b - 2b"
-#define __BUG_ENTRY_FILE       RISCV_INT " %0 - 2b"
+#define __BUG_ENTRY_ADDR       RISCV_INT " 1b - ."
+#define __BUG_ENTRY_FILE       RISCV_INT " %0 - ."
 #else
 #define __BUG_ENTRY_ADDR       RISCV_PTR " 1b"
 #define __BUG_ENTRY_FILE       RISCV_PTR " %0"
index 78da839657e52401f34b93acde2bc1cd31fd6cac..cd4bbcecb0fbf04c0ddf3ef1a9928576dd51c385 100644 (file)
@@ -193,9 +193,6 @@ struct kvm_vcpu_arch {
 
        /* Don't run the VCPU (blocked) */
        bool pause;
-
-       /* SRCU lock index for in-kernel run loop */
-       int srcu_idx;
 };
 
 static inline void kvm_arch_hardware_unsetup(void) {}
index 0b552873a5778b397a3b25ca8e64124c57d4a5c3..765004b605132abebbc3fbe9a5819d2ce14e81d4 100644 (file)
@@ -104,7 +104,7 @@ static int patch_text_cb(void *data)
        struct patch_insn *patch = data;
        int ret = 0;
 
-       if (atomic_inc_return(&patch->cpu_count) == 1) {
+       if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
                ret =
                    patch_text_nosync(patch->addr, &patch->insn,
                                            GET_INSN_LENGTH(patch->insn));
index 624166004e36c637fb5fbd8966f3412f72a84294..7461f964d20a92e579e80c2485a41be9da2b051a 100644 (file)
@@ -38,14 +38,16 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
                       sizeof(kvm_vcpu_stats_desc),
 };
 
-#define KVM_RISCV_ISA_ALLOWED  (riscv_isa_extension_mask(a) | \
-                                riscv_isa_extension_mask(c) | \
-                                riscv_isa_extension_mask(d) | \
-                                riscv_isa_extension_mask(f) | \
-                                riscv_isa_extension_mask(i) | \
-                                riscv_isa_extension_mask(m) | \
-                                riscv_isa_extension_mask(s) | \
-                                riscv_isa_extension_mask(u))
+#define KVM_RISCV_ISA_DISABLE_ALLOWED  (riscv_isa_extension_mask(d) | \
+                                       riscv_isa_extension_mask(f))
+
+#define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED      (riscv_isa_extension_mask(a) | \
+                                               riscv_isa_extension_mask(c) | \
+                                               riscv_isa_extension_mask(i) | \
+                                               riscv_isa_extension_mask(m))
+
+#define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
+                              KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
 
 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 {
@@ -219,7 +221,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
        switch (reg_num) {
        case KVM_REG_RISCV_CONFIG_REG(isa):
                if (!vcpu->arch.ran_atleast_once) {
-                       vcpu->arch.isa = reg_val;
+                       /* Ignore the disable request for these extensions */
+                       vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
                        vcpu->arch.isa &= riscv_isa_extension_base(NULL);
                        vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
                        kvm_riscv_vcpu_fp_reset(vcpu);
@@ -653,8 +656,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
                                     vcpu->arch.isa);
        kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
 
-       csr_write(CSR_HGATP, 0);
-
        csr->vsstatus = csr_read(CSR_VSSTATUS);
        csr->vsie = csr_read(CSR_VSIE);
        csr->vstvec = csr_read(CSR_VSTVEC);
@@ -726,13 +727,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        /* Mark this VCPU ran at least once */
        vcpu->arch.ran_atleast_once = true;
 
-       vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        /* Process MMIO value returned from user-space */
        if (run->exit_reason == KVM_EXIT_MMIO) {
                ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
                if (ret) {
-                       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        return ret;
                }
        }
@@ -741,13 +742,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
                ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
                if (ret) {
-                       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        return ret;
                }
        }
 
        if (run->immediate_exit) {
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                return -EINTR;
        }
 
@@ -786,7 +787,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 */
                vcpu->mode = IN_GUEST_MODE;
 
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                smp_mb__after_srcu_read_unlock();
 
                /*
@@ -804,7 +805,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        local_irq_enable();
                        preempt_enable();
-                       vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+                       kvm_vcpu_srcu_read_lock(vcpu);
                        continue;
                }
 
@@ -848,7 +849,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
                preempt_enable();
 
-               vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
        }
@@ -857,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
        vcpu_put(vcpu);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        return ret;
 }
index aa8af129e4bb93ed5c48e988ab7e4071e6ef3273..a72c15d4b42a599a2b640b27af8eb699b4f4724e 100644 (file)
@@ -456,9 +456,9 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
 void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
 {
        if (!kvm_arch_vcpu_runnable(vcpu)) {
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_halt(vcpu);
-               vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
        }
 }
index 4449a976e5a6bab8fb691b81db972fc82ee5b12a..d4308c5120078223b88f9b401ee849e8bd2020d2 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
+#include <asm/hwcap.h>
 
 #ifdef CONFIG_FPU
 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
index a09ecb97b89085b40e359affcb9436b8c561969b..d45e7da3f0d324e61c7215ddec8f8a61b7446a9c 100644 (file)
@@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
                                     struct kvm_run *run,
-                                    u32 type, u64 flags)
+                                    u32 type, u64 reason)
 {
        unsigned long i;
        struct kvm_vcpu *tmp;
@@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
 
        memset(&run->system_event, 0, sizeof(run->system_event));
        run->system_event.type = type;
-       run->system_event.flags = flags;
+       run->system_event.ndata = 1;
+       run->system_event.data[0] = reason;
        run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
 }
 
index 9535bea8688c097a2895dadd34895e93cde58a14..05ed641a1134c4d7f73b43e059ea374026be0542 100644 (file)
@@ -208,8 +208,25 @@ static void __init setup_bootmem(void)
         * early_init_fdt_reserve_self() since __pa() does
         * not work for DTB pointers that are fixmap addresses
         */
-       if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
-               memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+       if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
+               /*
+                * In case the DTB is not located in a memory region we won't
+                * be able to locate it later on via the linear mapping and
+                * get a segfault when accessing it via __va(dtb_early_pa).
+                * To avoid this situation copy DTB to a memory region.
+                * Note that memblock_phys_alloc will also reserve DTB region.
+                */
+               if (!memblock_is_memory(dtb_early_pa)) {
+                       size_t fdt_size = fdt_totalsize(dtb_early_va);
+                       phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
+                       void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);
+
+                       memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
+                       early_memunmap(new_dtb_early_va, fdt_size);
+                       _dtb_early_pa = new_dtb_early_pa;
+               } else
+                       memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+       }
 
        early_init_fdt_scan_reserved_mem();
        dma_contiguous_reserve(dma32_phys_limit);
@@ -718,6 +735,7 @@ retry:
                if (!check_l4) {
                        disable_pgtable_l5();
                        check_l4 = true;
+                       memset(early_pg_dir, 0, PAGE_SIZE);
                        goto retry;
                }
                disable_pgtable_l4();
index 77b5a03de13a97558a606d08a1c462cc20f543aa..e084c72104f86f55dad91577800b8952366494c2 100644 (file)
@@ -255,6 +255,10 @@ config HAVE_MARCH_Z15_FEATURES
        def_bool n
        select HAVE_MARCH_Z14_FEATURES
 
+config HAVE_MARCH_Z16_FEATURES
+       def_bool n
+       select HAVE_MARCH_Z15_FEATURES
+
 choice
        prompt "Processor type"
        default MARCH_Z196
@@ -312,6 +316,14 @@ config MARCH_Z15
          and 8561 series). The kernel will be slightly faster but will not
          work on older machines.
 
+config MARCH_Z16
+       bool "IBM z16"
+       select HAVE_MARCH_Z16_FEATURES
+       depends on $(cc-option,-march=z16)
+       help
+         Select this to enable optimizations for IBM z16 (3931 and
+         3932 series).
+
 endchoice
 
 config MARCH_Z10_TUNE
@@ -332,6 +344,9 @@ config MARCH_Z14_TUNE
 config MARCH_Z15_TUNE
        def_bool TUNE_Z15 || MARCH_Z15 && TUNE_DEFAULT
 
+config MARCH_Z16_TUNE
+       def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
+
 choice
        prompt "Tune code generation"
        default TUNE_DEFAULT
@@ -372,6 +387,10 @@ config TUNE_Z15
        bool "IBM z15"
        depends on $(cc-option,-mtune=z15)
 
+config TUNE_Z16
+       bool "IBM z16"
+       depends on $(cc-option,-mtune=z16)
+
 endchoice
 
 config 64BIT
index 7a65bca1e5afcf6c9065860560dabeaa812e6c9e..df325eacf62d261b8b0f97711120cf4ae7acc73f 100644 (file)
@@ -30,6 +30,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
 KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
+
+ifdef CONFIG_CC_IS_GCC
+       ifeq ($(call cc-ifversion, -ge, 1200, y), y)
+               ifeq ($(call cc-ifversion, -lt, 1300, y), y)
+                       KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+                       KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds)
+               endif
+       endif
+endif
+
 UTS_MACHINE    := s390x
 STACK_SIZE     := $(if $(CONFIG_KASAN),65536,16384)
 CHECKFLAGS     += -D__s390__ -D__s390x__
@@ -42,6 +52,7 @@ mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_Z13)    := -march=z13
 mflags-$(CONFIG_MARCH_Z14)    := -march=z14
 mflags-$(CONFIG_MARCH_Z15)    := -march=z15
+mflags-$(CONFIG_MARCH_Z16)    := -march=z16
 
 export CC_FLAGS_MARCH := $(mflags-y)
 
@@ -54,6 +65,7 @@ cflags-$(CONFIG_MARCH_ZEC12_TUNE)     += -mtune=zEC12
 cflags-$(CONFIG_MARCH_Z13_TUNE)                += -mtune=z13
 cflags-$(CONFIG_MARCH_Z14_TUNE)                += -mtune=z14
 cflags-$(CONFIG_MARCH_Z15_TUNE)                += -mtune=z15
+cflags-$(CONFIG_MARCH_Z16_TUNE)                += -mtune=z16
 
 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 
index 498bed9b261b876a52122ec415515275b58cdaf5..f6dfde577ce83049db124250e3ebbb3af5c56924 100644 (file)
@@ -499,11 +499,13 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_CHELSIO is not set
 # CONFIG_NET_VENDOR_CISCO is not set
 # CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
 # CONFIG_NET_VENDOR_DEC is not set
 # CONFIG_NET_VENDOR_DLINK is not set
 # CONFIG_NET_VENDOR_EMULEX is not set
 # CONFIG_NET_VENDOR_ENGLEDER is not set
 # CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -588,13 +590,13 @@ CONFIG_MLX5_INFINIBAND=m
 CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
 CONFIG_VFIO_MDEV=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -690,6 +692,7 @@ CONFIG_ENCRYPTED_KEYS=m
 CONFIG_KEY_NOTIFICATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
@@ -733,6 +736,7 @@ CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
@@ -786,7 +790,6 @@ CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
@@ -814,6 +817,7 @@ CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
 CONFIG_KFENCE=y
+CONFIG_KFENCE_DEFERRABLE=y
 CONFIG_KFENCE_STATIC_KEYS=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_PANIC_ON_OOPS=y
index 61e36b999f67e631038137d4c648510f84ef3ff3..706df3a4a867f0619aeae6e8be6efefffd8ea26e 100644 (file)
@@ -490,11 +490,13 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_CHELSIO is not set
 # CONFIG_NET_VENDOR_CISCO is not set
 # CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
 # CONFIG_NET_VENDOR_DEC is not set
 # CONFIG_NET_VENDOR_DLINK is not set
 # CONFIG_NET_VENDOR_EMULEX is not set
 # CONFIG_NET_VENDOR_ENGLEDER is not set
 # CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
@@ -578,13 +580,13 @@ CONFIG_MLX5_INFINIBAND=m
 CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
 CONFIG_VFIO_MDEV=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -720,6 +722,7 @@ CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
@@ -772,7 +775,6 @@ CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
index c55c668dc3c788ba11c6e83803c62ee50a4c75f5..a87fcc45e3071d93b5c869c89b9887236cbb2e27 100644 (file)
@@ -26,6 +26,7 @@ CONFIG_CRASH_DUMP=y
 # CONFIG_S390_GUEST is not set
 # CONFIG_SECCOMP is not set
 # CONFIG_GCC_PLUGINS is not set
+# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_COMPACTION is not set
@@ -60,7 +61,6 @@ CONFIG_ZFCP=y
 # CONFIG_HID is not set
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
@@ -71,10 +71,10 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity"
 CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_SYMBOLIC_ERRNAME is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
index 0b25f28351edc496ddfe728b169a4700e724cd7f..aebe1e22c7befa486bbe269383164f9a792b5c89 100644 (file)
@@ -15,7 +15,8 @@
                "1:     .asciz  \""__FILE__"\"\n"               \
                ".previous\n"                                   \
                ".section __bug_table,\"awM\",@progbits,%2\n"   \
-               "2:     .long   0b-2b,1b-2b\n"                  \
+               "2:     .long   0b-.\n"                         \
+               "       .long   1b-.\n"                         \
                "       .short  %0,%1\n"                        \
                "       .org    2b+%2\n"                        \
                ".previous\n"                                   \
@@ -30,7 +31,7 @@
        asm_inline volatile(                                    \
                "0:     mc      0,0\n"                          \
                ".section __bug_table,\"awM\",@progbits,%1\n"   \
-               "1:     .long   0b-1b\n"                        \
+               "1:     .long   0b-.\n"                         \
                "       .short  %0\n"                           \
                "       .org    1b+%1\n"                        \
                ".previous\n"                                   \
index eabab24b71dd76b1b7680aae3eaf7f1bd16a58f3..2f0a1cacdf858d42f94db5b7f652840af6761645 100644 (file)
@@ -58,7 +58,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 
 static inline bool on_thread_stack(void)
 {
-       return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
+       return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
 }
 
 #endif
index eee8d96fb38e7c7f1d9563cc759a610fcabaa406..ff1e25d515a855f666278ca3e444ccecb9b1da80 100644 (file)
@@ -200,13 +200,7 @@ unsigned long __get_wchan(struct task_struct *p);
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
-static __always_inline unsigned long current_stack_pointer(void)
-{
-       unsigned long sp;
-
-       asm volatile("la %0,0(15)" : "=a" (sp));
-       return sp;
-}
+register unsigned long current_stack_pointer asm("r15");
 
 static __always_inline unsigned short stap(void)
 {
index 275f4258fbd590778dfdf676e0469d69a6eb333b..f8500191993df0b81f971c9a1a52534a7de8c3a8 100644 (file)
@@ -46,7 +46,7 @@ struct stack_frame {
 };
 
 /*
- * Unlike current_stack_pointer() which simply returns current value of %r15
+ * Unlike current_stack_pointer which simply contains the current value of %r15
  * current_frame_address() returns function stack frame address, which matches
  * %r15 upon function invocation. It may differ from %r15 later if function
  * allocates stack for local variables or new stack frame to call other
index b2ef014a9287cb100db8e641d9a1ba5f2e5e1b88..6ebf02e15c85812bf4a42ff4afa5d547f5d9171a 100644 (file)
@@ -54,7 +54,7 @@ static void __do_machine_kdump(void *image)
         * This need to be done *after* s390_reset_system set the
         * prefix register of this CPU to zero
         */
-       memcpy((void *) __LC_FPREGS_SAVE_AREA,
+       memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
               (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
 
        __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
index 7a74ea5f7531b9eee127e5273681da6496115e0b..aa0e0e7fc773e31901eef61e4f7dfc1f763b1f29 100644 (file)
@@ -283,6 +283,10 @@ static int __init setup_elf_platform(void)
        case 0x8562:
                strcpy(elf_platform, "z15");
                break;
+       case 0x3931:
+       case 0x3932:
+               strcpy(elf_platform, "z16");
+               break;
        }
        return 0;
 }
index 9b30beac904db866ba23fecb890784fb8e8eb4a9..af96dc0549a4b57ccce07688cad056c1e3f67fe1 100644 (file)
@@ -1334,11 +1334,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
        hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
        VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
 no_timer:
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        kvm_vcpu_halt(vcpu);
        vcpu->valid_wakeup = false;
        __unset_cpu_idle(vcpu);
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        hrtimer_cancel(&vcpu->arch.ckc_timer);
        return 0;
index 156d1c25a3c1ec9a225669013153bf44c90d5363..76ad6408cb2cd0c2a4f5e91cbc3f735a7721e416 100644 (file)
@@ -2384,7 +2384,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
                return -EINVAL;
        if (mop->size > MEM_OP_MAX_SIZE)
                return -E2BIG;
-       if (kvm_s390_pv_is_protected(kvm))
+       /*
+        * This is technically a heuristic only, if the kvm->lock is not
+        * taken, it is not guaranteed that the vm is/remains non-protected.
+        * This is ok from a kernel perspective, wrongdoing is detected
+        * on the access, -EFAULT is returned and the vm may crash the
+        * next time it accesses the memory in question.
+        * There is no sane usecase to do switching and a memop on two
+        * different CPUs at the same time.
+        */
+       if (kvm_s390_pv_get_handle(kvm))
                return -EINVAL;
        if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
                if (access_key_invalid(mop->key))
@@ -4237,14 +4246,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
         * We try to hold kvm->srcu during most of vcpu_run (except when run-
         * ning the guest), so that memslots (and other stuff) are protected
         */
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        do {
                rc = vcpu_pre_run(vcpu);
                if (rc)
                        break;
 
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                /*
                 * As PF_VCPU will be used in fault handler, between
                 * guest_enter and guest_exit should be no uaccess.
@@ -4281,12 +4290,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                __enable_cpu_timer_accounting(vcpu);
                guest_exit_irqoff();
                local_irq_enable();
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                rc = vcpu_post_run(vcpu, exit_reason);
        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        return rc;
 }
 
index 7f7c0d6af2ce9f9a6a04ee96864e2865262006c3..cc7c9599f43ee8dc8f292eabe57701de1a51dfa1 100644 (file)
@@ -137,12 +137,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
        /* Allocate variable storage */
        vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
        vlen += uv_info.guest_virt_base_stor_len;
-       /*
-        * The Create Secure Configuration Ultravisor Call does not support
-        * using large pages for the virtual memory area.
-        * This is a hardware limitation.
-        */
-       kvm->arch.pv.stor_var = vmalloc_no_huge(vlen);
+       kvm->arch.pv.stor_var = vzalloc(vlen);
        if (!kvm->arch.pv.stor_var)
                goto out_err;
        return 0;
index acda4b6fc851824d22c3b7b68715c0905a8edfd1..dada78b92691fa01f8d6a96081743a8ca2dd59fe 100644 (file)
@@ -1091,7 +1091,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
        handle_last_fault(vcpu, vsie_page);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        /* save current guest state of bp isolation override */
        guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1133,7 +1133,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
        if (!guest_bp_isolation)
                clear_thread_flag(TIF_ISOLATE_BP_GUEST);
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        if (rc == -EINTR) {
                VCPU_EVENT(vcpu, 3, "%s", "machine check");
index 9bb067321ab4ef6aebed472384a7809d852c84ad..5a053b393d5c5a05d1d17dbd638a5258780dde6c 100644 (file)
@@ -147,7 +147,7 @@ static __always_inline struct pt_regs fake_pt_regs(void)
        struct pt_regs regs;
 
        memset(&regs, 0, sizeof(regs));
-       regs.gprs[15] = current_stack_pointer();
+       regs.gprs[15] = current_stack_pointer;
 
        asm volatile(
                "basr   %[psw_addr],0\n"
index af03cacf34ec7d0758b68536678f68868de4e4bc..1ac73917a8d399ef8335744a056252032120aacd 100644 (file)
@@ -1183,6 +1183,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
                                    struct gmap_rmap *rmap)
 {
+       struct gmap_rmap *temp;
        void __rcu **slot;
 
        BUG_ON(!gmap_is_shadow(sg));
@@ -1190,6 +1191,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
        if (slot) {
                rmap->next = radix_tree_deref_slot_protected(slot,
                                                        &sg->guest_table_lock);
+               for (temp = rmap->next; temp; temp = temp->next) {
+                       if (temp->raddr == rmap->raddr) {
+                               kfree(rmap);
+                               return;
+                       }
+               }
                radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
        } else {
                rmap->next = NULL;
index 41c6d734a47417647b4115eaf14175f4bebfbe46..adb6991d04554cb49e5b8510823802c538aefab4 100644 (file)
@@ -35,6 +35,7 @@
 #define flush_page_for_dma(addr) \
        sparc32_cachetlb_ops->page_for_dma(addr)
 
+struct page;
 void sparc_flush_page_to_ram(struct page *page);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
index b03269faef714764748b16f4645e9496cb555633..c4344b67628dd677a4bd952d2aa8e783ccc56b39 100644 (file)
@@ -483,7 +483,6 @@ static void ubd_handler(void)
                        if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
                                blk_queue_max_discard_sectors(io_req->req->q, 0);
                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
-                               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
                        }
                        blk_mq_end_request(io_req->req, io_req->error);
                        kfree(io_req);
@@ -800,10 +799,8 @@ static int ubd_open_dev(struct ubd *ubd_dev)
        }
        if (ubd_dev->no_trim == 0) {
                ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
-               ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
                blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
                blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
        }
        blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
        return 0;
index b0142e01002e3dd4fc3299f23da6ce2e6aaa3997..7fa8bdd0ed7e91681bc8d959b0e8de50f89761a3 100644 (file)
@@ -878,6 +878,21 @@ config ACRN_GUEST
          IOT with small footprint and real-time features. More details can be
          found in https://projectacrn.org/.
 
+config INTEL_TDX_GUEST
+       bool "Intel TDX (Trust Domain Extensions) - Guest Support"
+       depends on X86_64 && CPU_SUP_INTEL
+       depends on X86_X2APIC
+       select ARCH_HAS_CC_PLATFORM
+       select X86_MEM_ENCRYPT
+       select X86_MCE
+       help
+         Support running as a guest under Intel TDX.  Without this support,
+         the guest kernel can not boot or run under TDX.
+         TDX includes memory encryption and integrity capabilities
+         which protect the confidentiality and integrity of guest
+         memory contents and CPU state. TDX guests are protected from
+         some attacks from the VMM.
+
 endif #HYPERVISOR_GUEST
 
 source "arch/x86/Kconfig.cpu"
@@ -1816,17 +1831,6 @@ config ARCH_RANDOM
          If supported, this is a high bandwidth, cryptographically
          secure hardware random number generator.
 
-config X86_SMAP
-       def_bool y
-       prompt "Supervisor Mode Access Prevention" if EXPERT
-       help
-         Supervisor Mode Access Prevention (SMAP) is a security
-         feature in newer Intel processors.  There is a small
-         performance cost if this enabled and turned on; there is
-         also a small increase in the kernel size if this is enabled.
-
-         If unsure, say Y.
-
 config X86_UMIP
        def_bool y
        prompt "User Mode Instruction Prevention" if EXPERT
@@ -1866,7 +1870,7 @@ config X86_KERNEL_IBT
          code with them to make this happen.
 
          In addition to building the kernel with IBT, seal all functions that
-         are not indirect call targets, avoiding them ever becomming one.
+         are not indirect call targets, avoiding them ever becoming one.
 
          This requires LTO like objtool runs and will slow down the build. It
          does significantly reduce the number of ENDBR instructions in the
@@ -2838,13 +2842,6 @@ config IA32_EMULATION
          64-bit kernel. You should likely turn this on, unless you're
          100% sure that you don't have any 32-bit programs left.
 
-config IA32_AOUT
-       tristate "IA32 a.out support"
-       depends on IA32_EMULATION
-       depends on BROKEN
-       help
-         Support old a.out binaries in the 32bit emulation.
-
 config X86_X32_ABI
        bool "x32 ABI for 64-bit mode"
        depends on X86_64
index 63d50f65b8283466e0275c3e9e606f9b1d1a55e8..1abd7cc9d6cd98eee795f59ef61c1dd7e4166e7b 100644 (file)
@@ -313,5 +313,6 @@ define archhelp
   echo  ''
   echo  '  kvm_guest.config    - Enable Kconfig items for running this kernel as a KVM guest'
   echo  '  xen.config          - Enable Kconfig items for running this kernel as a Xen guest'
+  echo  '  x86_debug.config    - Enable tip tree debugging options for testing'
 
 endef
index 34c9dbb6a47d678e1f984947b2437523eb2d362e..148ba5c5106e1eae9e924b811658d69b38c5a46c 100644 (file)
@@ -26,6 +26,7 @@
 #include "bitops.h"
 #include "ctype.h"
 #include "cpuflags.h"
+#include "io.h"
 
 /* Useful macros */
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -35,44 +36,10 @@ extern struct boot_params boot_params;
 
 #define cpu_relax()    asm volatile("rep; nop")
 
-/* Basic port I/O */
-static inline void outb(u8 v, u16 port)
-{
-       asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u8 inb(u16 port)
-{
-       u8 v;
-       asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
-static inline void outw(u16 v, u16 port)
-{
-       asm volatile("outw %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u16 inw(u16 port)
-{
-       u16 v;
-       asm volatile("inw %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
-static inline void outl(u32 v, u16 port)
-{
-       asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
-}
-static inline u32 inl(u16 port)
-{
-       u32 v;
-       asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
-       return v;
-}
-
 static inline void io_delay(void)
 {
        const u16 DELAY_PORT = 0x80;
-       asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT));
+       outb(0, DELAY_PORT);
 }
 
 /* These functions are used to reference data in other segments. */
@@ -110,66 +77,78 @@ typedef unsigned int addr_t;
 
 static inline u8 rdfs8(addr_t addr)
 {
+       u8 *ptr = (u8 *)absolute_pointer(addr);
        u8 v;
-       asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+       asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
        return v;
 }
 static inline u16 rdfs16(addr_t addr)
 {
+       u16 *ptr = (u16 *)absolute_pointer(addr);
        u16 v;
-       asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+       asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 static inline u32 rdfs32(addr_t addr)
 {
+       u32 *ptr = (u32 *)absolute_pointer(addr);
        u32 v;
-       asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+       asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 
 static inline void wrfs8(u8 v, addr_t addr)
 {
-       asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+       u8 *ptr = (u8 *)absolute_pointer(addr);
+       asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
 }
 static inline void wrfs16(u16 v, addr_t addr)
 {
-       asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+       u16 *ptr = (u16 *)absolute_pointer(addr);
+       asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
 }
 static inline void wrfs32(u32 v, addr_t addr)
 {
-       asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+       u32 *ptr = (u32 *)absolute_pointer(addr);
+       asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
 }
 
 static inline u8 rdgs8(addr_t addr)
 {
+       u8 *ptr = (u8 *)absolute_pointer(addr);
        u8 v;
-       asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
+       asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
        return v;
 }
 static inline u16 rdgs16(addr_t addr)
 {
+       u16 *ptr = (u16 *)absolute_pointer(addr);
        u16 v;
-       asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
+       asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 static inline u32 rdgs32(addr_t addr)
 {
+       u32 *ptr = (u32 *)absolute_pointer(addr);
        u32 v;
-       asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
+       asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
        return v;
 }
 
 static inline void wrgs8(u8 v, addr_t addr)
 {
-       asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
+       u8 *ptr = (u8 *)absolute_pointer(addr);
+       asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
 }
 static inline void wrgs16(u16 v, addr_t addr)
 {
-       asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
+       u16 *ptr = (u16 *)absolute_pointer(addr);
+       asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
 }
 static inline void wrgs32(u32 v, addr_t addr)
 {
-       asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
+       u32 *ptr = (u32 *)absolute_pointer(addr);
+       asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
 }
 
 /* Note: these only return true/false, not a signed return value! */
index 6115274fe10fc54c133d098ee8c9fe1d0ce16bfb..19e1905dcbf6fd59f6cc5ca4d89e8997482b935d 100644 (file)
@@ -101,8 +101,10 @@ ifdef CONFIG_X86_64
 endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
 
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
 efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 $(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
index 8bcbcee54aa13703f82bc7e48c02bfdb48351e76..9caf89063e775eb054a857849eb8dd35a0ed9818 100644 (file)
@@ -3,10 +3,9 @@
 #include "misc.h"
 #include "error.h"
 #include "../string.h"
+#include "efi.h"
 
 #include <linux/numa.h>
-#include <linux/efi.h>
-#include <asm/efi.h>
 
 /*
  * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
  */
 struct mem_vector immovable_mem[MAX_NUMNODES*2];
 
-/*
- * Search EFI system tables for RSDP.  If both ACPI_20_TABLE_GUID and
- * ACPI_TABLE_GUID are found, take the former, which has more features.
- */
 static acpi_physical_address
-__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables,
-                   bool efi_64)
+__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
 {
-       acpi_physical_address rsdp_addr = 0;
-
 #ifdef CONFIG_EFI
-       int i;
-
-       /* Get EFI tables from systab. */
-       for (i = 0; i < nr_tables; i++) {
-               acpi_physical_address table;
-               efi_guid_t guid;
-
-               if (efi_64) {
-                       efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i;
-
-                       guid  = tbl->guid;
-                       table = tbl->table;
-
-                       if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
-                               debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
-                               return 0;
-                       }
-               } else {
-                       efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i;
-
-                       guid  = tbl->guid;
-                       table = tbl->table;
-               }
+       unsigned long rsdp_addr;
+       int ret;
 
-               if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
-                       rsdp_addr = table;
-               else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
-                       return table;
-       }
+       /*
+        * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to
+        * ACPI_TABLE_GUID because it has more features.
+        */
+       rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+                                         ACPI_20_TABLE_GUID);
+       if (rsdp_addr)
+               return (acpi_physical_address)rsdp_addr;
+
+       /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */
+       rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len,
+                                         ACPI_TABLE_GUID);
+       if (rsdp_addr)
+               return (acpi_physical_address)rsdp_addr;
+
+       debug_putstr("Error getting RSDP address.\n");
 #endif
-       return rsdp_addr;
-}
-
-/* EFI/kexec support is 64-bit only. */
-#ifdef CONFIG_X86_64
-static struct efi_setup_data *get_kexec_setup_data_addr(void)
-{
-       struct setup_data *data;
-       u64 pa_data;
-
-       pa_data = boot_params->hdr.setup_data;
-       while (pa_data) {
-               data = (struct setup_data *)pa_data;
-               if (data->type == SETUP_EFI)
-                       return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
-
-               pa_data = data->next;
-       }
-       return NULL;
-}
-
-static acpi_physical_address kexec_get_rsdp_addr(void)
-{
-       efi_system_table_64_t *systab;
-       struct efi_setup_data *esd;
-       struct efi_info *ei;
-       char *sig;
-
-       esd = (struct efi_setup_data *)get_kexec_setup_data_addr();
-       if (!esd)
-               return 0;
-
-       if (!esd->tables) {
-               debug_putstr("Wrong kexec SETUP_EFI data.\n");
-               return 0;
-       }
-
-       ei = &boot_params->efi_info;
-       sig = (char *)&ei->efi_loader_signature;
-       if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
-               debug_putstr("Wrong kexec EFI loader signature.\n");
-               return 0;
-       }
-
-       /* Get systab from boot params. */
-       systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32));
-       if (!systab)
-               error("EFI system table not found in kexec boot_params.");
-
-       return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true);
+       return 0;
 }
-#else
-static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; }
-#endif /* CONFIG_X86_64 */
 
 static acpi_physical_address efi_get_rsdp_addr(void)
 {
 #ifdef CONFIG_EFI
-       unsigned long systab, config_tables;
+       unsigned long cfg_tbl_pa = 0;
+       unsigned int cfg_tbl_len;
+       unsigned long systab_pa;
        unsigned int nr_tables;
-       struct efi_info *ei;
-       bool efi_64;
-       char *sig;
-
-       ei = &boot_params->efi_info;
-       sig = (char *)&ei->efi_loader_signature;
-
-       if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
-               efi_64 = true;
-       } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
-               efi_64 = false;
-       } else {
-               debug_putstr("Wrong EFI loader signature.\n");
-               return 0;
-       }
+       enum efi_type et;
+       int ret;
 
-       /* Get systab from boot params. */
-#ifdef CONFIG_X86_64
-       systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
-#else
-       if (ei->efi_systab_hi || ei->efi_memmap_hi) {
-               debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
+       et = efi_get_type(boot_params);
+       if (et == EFI_TYPE_NONE)
                return 0;
-       }
-       systab = ei->efi_systab;
-#endif
-       if (!systab)
-               error("EFI system table not found.");
 
-       /* Handle EFI bitness properly */
-       if (efi_64) {
-               efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
+       systab_pa = efi_get_system_table(boot_params);
+       if (!systab_pa)
+               error("EFI support advertised, but unable to locate system table.");
 
-               config_tables   = stbl->tables;
-               nr_tables       = stbl->nr_tables;
-       } else {
-               efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
+       ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len);
+       if (ret || !cfg_tbl_pa)
+               error("EFI config table not found.");
 
-               config_tables   = stbl->tables;
-               nr_tables       = stbl->nr_tables;
-       }
-
-       if (!config_tables)
-               error("EFI config tables not found.");
-
-       return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64);
+       return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len);
 #else
        return 0;
 #endif
@@ -256,14 +158,6 @@ acpi_physical_address get_rsdp_addr(void)
 
        pa = boot_params->acpi_rsdp_addr;
 
-       /*
-        * Try to get EFI data from setup_data. This can happen when we're a
-        * kexec'ed kernel and kexec(1) has passed all the required EFI info to
-        * us.
-        */
-       if (!pa)
-               pa = kexec_get_rsdp_addr();
-
        if (!pa)
                pa = efi_get_rsdp_addr();
 
index 261e81fb95826d0264c8ad3148f6259cc0c3f51d..70a8d1706d0f1fb46db84bdcb2acf140fc763e04 100644 (file)
@@ -1,5 +1,6 @@
 #include "misc.h"
 
-int early_serial_base;
+/* This might be accessed before .bss is cleared, so use .data instead. */
+int early_serial_base __section(".data");
 
 #include "../early_serial_console.c"
diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c
new file mode 100644 (file)
index 0000000..6edd034
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helpers for early access to EFI configuration table.
+ *
+ * Originally derived from arch/x86/boot/compressed/acpi.c
+ */
+
+#include "misc.h"
+
+/**
+ * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment.
+ *
+ * @bp:         pointer to boot_params
+ *
+ * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise.
+ */
+enum efi_type efi_get_type(struct boot_params *bp)
+{
+       struct efi_info *ei;
+       enum efi_type et;
+       const char *sig;
+
+       ei = &bp->efi_info;
+       sig = (char *)&ei->efi_loader_signature;
+
+       if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+               et = EFI_TYPE_64;
+       } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+               et = EFI_TYPE_32;
+       } else {
+               debug_putstr("No EFI environment detected.\n");
+               et = EFI_TYPE_NONE;
+       }
+
+#ifndef CONFIG_X86_64
+       /*
+        * Existing callers like acpi.c treat this case as an indicator to
+        * fall-through to non-EFI, rather than an error, so maintain that
+        * functionality here as well.
+        */
+       if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+               debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n");
+               et = EFI_TYPE_NONE;
+       }
+#endif
+
+       return et;
+}
+
+/**
+ * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address
+ *                        of the EFI system table.
+ *
+ * @bp:         pointer to boot_params
+ *
+ * Return: EFI system table address on success. On error, return 0.
+ */
+unsigned long efi_get_system_table(struct boot_params *bp)
+{
+       unsigned long sys_tbl_pa;
+       struct efi_info *ei;
+       enum efi_type et;
+
+       /* Get systab from boot params. */
+       ei = &bp->efi_info;
+#ifdef CONFIG_X86_64
+       sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+       sys_tbl_pa = ei->efi_systab;
+#endif
+       if (!sys_tbl_pa) {
+               debug_putstr("EFI system table not found.");
+               return 0;
+       }
+
+       return sys_tbl_pa;
+}
+
+/*
+ * EFI config table address changes to virtual address after boot, which may
+ * not be accessible for the kexec'd kernel. To address this, kexec provides
+ * the initial physical address via a struct setup_data entry, which is
+ * checked for here, along with some sanity checks.
+ */
+static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp,
+                                                  enum efi_type et)
+{
+#ifdef CONFIG_X86_64
+       struct efi_setup_data *esd = NULL;
+       struct setup_data *data;
+       u64 pa_data;
+
+       pa_data = bp->hdr.setup_data;
+       while (pa_data) {
+               data = (struct setup_data *)pa_data;
+               if (data->type == SETUP_EFI) {
+                       esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data));
+                       break;
+               }
+
+               pa_data = data->next;
+       }
+
+       /*
+        * Original ACPI code falls back to attempting normal EFI boot in these
+        * cases, so maintain existing behavior by indicating non-kexec
+        * environment to the caller, but print them for debugging.
+        */
+       if (esd && !esd->tables) {
+               debug_putstr("kexec EFI environment missing valid configuration table.\n");
+               return NULL;
+       }
+
+       return esd;
+#endif
+       return NULL;
+}
+
+/**
+ * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical
+ *                      address of EFI configuration table.
+ *
+ * @bp:                 pointer to boot_params
+ * @cfg_tbl_pa:         location to store physical address of config table
+ * @cfg_tbl_len:        location to store number of config table entries
+ *
+ * Return: 0 on success. On error, return params are left unchanged.
+ */
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+                      unsigned int *cfg_tbl_len)
+{
+       unsigned long sys_tbl_pa;
+       enum efi_type et;
+       int ret;
+
+       if (!cfg_tbl_pa || !cfg_tbl_len)
+               return -EINVAL;
+
+       sys_tbl_pa = efi_get_system_table(bp);
+       if (!sys_tbl_pa)
+               return -EINVAL;
+
+       /* Handle EFI bitness properly */
+       et = efi_get_type(bp);
+       if (et == EFI_TYPE_64) {
+               efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa;
+               struct efi_setup_data *esd;
+
+               /* kexec provides an alternative EFI conf table, check for it. */
+               esd = get_kexec_setup_data(bp, et);
+
+               *cfg_tbl_pa = esd ? esd->tables : stbl->tables;
+               *cfg_tbl_len = stbl->nr_tables;
+       } else if (et == EFI_TYPE_32) {
+               efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa;
+
+               *cfg_tbl_pa = stbl->tables;
+               *cfg_tbl_len = stbl->nr_tables;
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Get vendor table address/guid from EFI config table at the given index */
+static int get_vendor_table(void *cfg_tbl, unsigned int idx,
+                           unsigned long *vendor_tbl_pa,
+                           efi_guid_t *vendor_tbl_guid,
+                           enum efi_type et)
+{
+       if (et == EFI_TYPE_64) {
+               efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx;
+
+               if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) {
+                       debug_putstr("Error: EFI config table entry located above 4GB.\n");
+                       return -EINVAL;
+               }
+
+               *vendor_tbl_pa = tbl_entry->table;
+               *vendor_tbl_guid = tbl_entry->guid;
+
+       } else if (et == EFI_TYPE_32) {
+               efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx;
+
+               *vendor_tbl_pa = tbl_entry->table;
+               *vendor_tbl_guid = tbl_entry->guid;
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * efi_find_vendor_table - Given EFI config table, search it for the physical
+ *                         address of the vendor table associated with GUID.
+ *
+ * @bp:                pointer to boot_params
+ * @cfg_tbl_pa:        pointer to EFI configuration table
+ * @cfg_tbl_len:       number of entries in EFI configuration table
+ * @guid:              GUID of vendor table
+ *
+ * Return: vendor table address on success. On error, return 0.
+ */
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                   unsigned long cfg_tbl_pa,
+                                   unsigned int cfg_tbl_len,
+                                   efi_guid_t guid)
+{
+       enum efi_type et;
+       unsigned int i;
+
+       et = efi_get_type(bp);
+       if (et == EFI_TYPE_NONE)
+               return 0;
+
+       for (i = 0; i < cfg_tbl_len; i++) {
+               unsigned long vendor_tbl_pa;
+               efi_guid_t vendor_tbl_guid;
+               int ret;
+
+               ret = get_vendor_table((void *)cfg_tbl_pa, i,
+                                      &vendor_tbl_pa,
+                                      &vendor_tbl_guid, et);
+               if (ret)
+                       return 0;
+
+               if (!efi_guidcmp(guid, vendor_tbl_guid))
+                       return vendor_tbl_pa;
+       }
+
+       return 0;
+}
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
new file mode 100644 (file)
index 0000000..7db2f41
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_EFI_H
+#define BOOT_COMPRESSED_EFI_H
+
+#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H)
+#error Please do not include kernel proper namespace headers
+#endif
+
+typedef guid_t efi_guid_t __aligned(__alignof__(u32));
+
+#define EFI_GUID(a, b, c, d...) (efi_guid_t){ {                                        \
+       (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff,  \
+       (b) & 0xff, ((b) >> 8) & 0xff,                                          \
+       (c) & 0xff, ((c) >> 8) & 0xff, d } }
+
+#define ACPI_TABLE_GUID                                EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3,  0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+#define ACPI_20_TABLE_GUID                     EFI_GUID(0x8868e871, 0xe4f1, 0x11d3,  0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define EFI_CC_BLOB_GUID                       EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+
+#define EFI32_LOADER_SIGNATURE "EL32"
+#define EFI64_LOADER_SIGNATURE "EL64"
+
+/*
+ * Generic EFI table header
+ */
+typedef        struct {
+       u64 signature;
+       u32 revision;
+       u32 headersize;
+       u32 crc32;
+       u32 reserved;
+} efi_table_hdr_t;
+
+#define EFI_CONVENTIONAL_MEMORY                 7
+
+#define EFI_MEMORY_MORE_RELIABLE \
+                               ((u64)0x0000000000010000ULL)    /* higher reliability */
+#define EFI_MEMORY_SP          ((u64)0x0000000000040000ULL)    /* soft reserved */
+
+#define EFI_PAGE_SHIFT         12
+
+typedef struct {
+       u32 type;
+       u32 pad;
+       u64 phys_addr;
+       u64 virt_addr;
+       u64 num_pages;
+       u64 attribute;
+} efi_memory_desc_t;
+
+#define efi_early_memdesc_ptr(map, desc_size, n)                       \
+       (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size)))
+
+typedef struct {
+       efi_guid_t guid;
+       u64 table;
+} efi_config_table_64_t;
+
+typedef struct {
+       efi_guid_t guid;
+       u32 table;
+} efi_config_table_32_t;
+
+typedef struct {
+       efi_table_hdr_t hdr;
+       u64 fw_vendor;  /* physical addr of CHAR16 vendor string */
+       u32 fw_revision;
+       u32 __pad1;
+       u64 con_in_handle;
+       u64 con_in;
+       u64 con_out_handle;
+       u64 con_out;
+       u64 stderr_handle;
+       u64 stderr;
+       u64 runtime;
+       u64 boottime;
+       u32 nr_tables;
+       u32 __pad2;
+       u64 tables;
+} efi_system_table_64_t;
+
+typedef struct {
+       efi_table_hdr_t hdr;
+       u32 fw_vendor;  /* physical addr of CHAR16 vendor string */
+       u32 fw_revision;
+       u32 con_in_handle;
+       u32 con_in;
+       u32 con_out_handle;
+       u32 con_out;
+       u32 stderr_handle;
+       u32 stderr;
+       u32 runtime;
+       u32 boottime;
+       u32 nr_tables;
+       u32 tables;
+} efi_system_table_32_t;
+
+/* kexec external ABI */
+struct efi_setup_data {
+       u64 fw_vendor;
+       u64 __unused;
+       u64 tables;
+       u64 smbios;
+       u64 reserved[8];
+};
+
+static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
+{
+       return memcmp(&left, &right, sizeof (efi_guid_t));
+}
+
+#ifdef CONFIG_EFI
+bool __pure __efi_soft_reserve_enabled(void);
+
+static inline bool __pure efi_soft_reserve_enabled(void)
+{
+       return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE)
+               && __efi_soft_reserve_enabled();
+}
+#else
+static inline bool efi_soft_reserve_enabled(void)
+{
+       return false;
+}
+#endif /* CONFIG_EFI */
+#endif /* BOOT_COMPRESSED_EFI_H */
index dea95301196b8550fdd3faa81355bb20eeba93c8..d33f060900d235169f05fdfaff4c7c7bbbae6cc1 100644 (file)
@@ -189,11 +189,11 @@ SYM_FUNC_START(startup_32)
        subl    $32, %eax       /* Encryption bit is always above bit 31 */
        bts     %eax, %edx      /* Set encryption mask for page tables */
        /*
-        * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
-        * will do a check. The sev_status memory will be fully initialized
-        * with the contents of MSR_AMD_SEV_STATUS later in
-        * set_sev_encryption_mask(). For now it is sufficient to know that SEV
-        * is active.
+        * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
+        * startup32_check_sev_cbit() will do a check. sev_enable() will
+        * initialize sev_status with all the bits reported by
+        * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
+        * needs to be set for now.
         */
        movl    $1, rva(sev_status)(%ebp)
 1:
@@ -289,7 +289,7 @@ SYM_FUNC_START(startup_32)
        pushl   %eax
 
        /* Enter paged protected Mode, activating Long Mode */
-       movl    $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
+       movl    $CR0_STATE, %eax
        movl    %eax, %cr0
 
        /* Jump from 32bit compatibility mode into 64bit mode. */
@@ -447,6 +447,23 @@ SYM_CODE_START(startup_64)
        call    load_stage1_idt
        popq    %rsi
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       /*
+        * Now that the stage1 interrupt handlers are set up, #VC exceptions from
+        * CPUID instructions can be properly handled for SEV-ES guests.
+        *
+        * For SEV-SNP, the CPUID table also needs to be set up in advance of any
+        * CPUID instructions being issued, so go ahead and do that now via
+        * sev_enable(), which will also handle the rest of the SEV-related
+        * detection/setup to ensure that has been done in advance of any dependent
+        * code.
+        */
+       pushq   %rsi
+       movq    %rsi, %rdi              /* real mode address */
+       call    sev_enable
+       popq    %rsi
+#endif
+
        /*
         * paging_prepare() sets up the trampoline and checks if we need to
         * enable 5-level paging.
@@ -558,17 +575,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
        shrq    $3, %rcx
        rep     stosq
 
-/*
- * If running as an SEV guest, the encryption mask is required in the
- * page-table setup code below. When the guest also has SEV-ES enabled
- * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
- * handler can't map its GHCB because the page-table is not set up yet.
- * So set up the encryption mask here while still on the stage1 #VC
- * handler. Then load stage2 IDT and switch to the kernel's own
- * page-table.
- */
        pushq   %rsi
-       call    set_sev_encryption_mask
        call    load_stage2_idt
 
        /* Pass boot_params to initialize_identity_maps() */
@@ -642,12 +649,28 @@ SYM_CODE_START(trampoline_32bit_src)
        movl    $MSR_EFER, %ecx
        rdmsr
        btsl    $_EFER_LME, %eax
+       /* Avoid writing EFER if no change was made (for TDX guest) */
+       jc      1f
        wrmsr
-       popl    %edx
+1:     popl    %edx
        popl    %ecx
 
+#ifdef CONFIG_X86_MCE
+       /*
+        * Preserve CR4.MCE if the kernel will enable #MC support.
+        * Clearing MCE may fault in some environments (that also force #MC
+        * support). Any machine check that occurs before #MC support is fully
+        * configured will crash the system regardless of the CR4.MCE value set
+        * here.
+        */
+       movl    %cr4, %eax
+       andl    $X86_CR4_MCE, %eax
+#else
+       movl    $0, %eax
+#endif
+
        /* Enable PAE and LA57 (if required) paging modes */
-       movl    $X86_CR4_PAE, %eax
+       orl     $X86_CR4_PAE, %eax
        testl   %edx, %edx
        jz      1f
        orl     $X86_CR4_LA57, %eax
@@ -661,8 +684,9 @@ SYM_CODE_START(trampoline_32bit_src)
        pushl   $__KERNEL_CS
        pushl   %eax
 
-       /* Enable paging again */
-       movl    $(X86_CR0_PG | X86_CR0_PE), %eax
+       /* Enable paging again. */
+       movl    %cr0, %eax
+       btsl    $X86_CR0_PG_BIT, %eax
        movl    %eax, %cr0
 
        lret
index f7213d0943b82e73ef0181a72def7c3815ab5c0e..44c350d627c79b4b13e29bfe74cbec1e5c69879f 100644 (file)
@@ -90,7 +90,7 @@ static struct x86_mapping_info mapping_info;
 /*
  * Adds the specified range to the identity mappings.
  */
-static void add_identity_map(unsigned long start, unsigned long end)
+void kernel_add_identity_map(unsigned long start, unsigned long end)
 {
        int ret;
 
@@ -157,14 +157,15 @@ void initialize_identity_maps(void *rmode)
         * explicitly here in case the compressed kernel does not touch them,
         * or does not touch all the pages covering them.
         */
-       add_identity_map((unsigned long)_head, (unsigned long)_end);
+       kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
        boot_params = rmode;
-       add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+       kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
        cmdline = get_cmd_line_ptr();
-       add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+       kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+       sev_prep_identity_maps(top_level_pgt);
 
        /* Load the new page-table. */
-       sev_verify_cbit(top_level_pgt);
        write_cr3(top_level_pgt);
 }
 
@@ -246,10 +247,10 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
         * It should already exist, but keep things generic.
         *
         * To map the page just read from it and fault it in if there is no
-        * mapping yet. add_identity_map() can't be called here because that
-        * would unconditionally map the address on PMD level, destroying any
-        * PTE-level mappings that might already exist. Use assembly here so
-        * the access won't be optimized away.
+        * mapping yet. kernel_add_identity_map() can't be called here because
+        * that would unconditionally map the address on PMD level, destroying
+        * any PTE-level mappings that might already exist. Use assembly here
+        * so the access won't be optimized away.
         */
        asm volatile("mov %[address], %%r9"
                     :: [address] "g" (*(unsigned long *)address)
@@ -275,15 +276,31 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
         * Changing encryption attributes of a page requires to flush it from
         * the caches.
         */
-       if ((set | clr) & _PAGE_ENC)
+       if ((set | clr) & _PAGE_ENC) {
                clflush_page(address);
 
+               /*
+                * If the encryption attribute is being cleared, change the page state
+                * to shared in the RMP table.
+                */
+               if (clr)
+                       snp_set_page_shared(__pa(address & PAGE_MASK));
+       }
+
        /* Update PTE */
        pte = *ptep;
        pte = pte_set_flags(pte, set);
        pte = pte_clear_flags(pte, clr);
        set_pte(ptep, pte);
 
+       /*
+        * If the encryption attribute is being set, then change the page state to
+        * private in the RMP entry. The page state change must be done after the PTE
+        * is updated.
+        */
+       if (set & _PAGE_ENC)
+               snp_set_page_private(__pa(address & PAGE_MASK));
+
        /* Flush TLB after changing encryption attribute */
        write_cr3(top_level_pgt);
 
@@ -347,5 +364,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
         * Error code is sane - now identity map the 2M region around
         * the faulting address.
         */
-       add_identity_map(address, end);
+       kernel_add_identity_map(address, end);
 }
index 9b93567d663a9003d25a8d3f9a3ff6029da28458..6debb816e83dcc85321e80f1c6db00d23ad3351f 100644 (file)
@@ -39,7 +39,23 @@ void load_stage1_idt(void)
        load_boot_idt(&boot_idt_desc);
 }
 
-/* Setup IDT after kernel jumping to  .Lrelocated */
+/*
+ * Setup IDT after kernel jumping to  .Lrelocated.
+ *
+ * initialize_identity_maps() needs a #PF handler to be setup
+ * in order to be able to fault-in identity mapping ranges; see
+ * do_boot_page_fault().
+ *
+ * This #PF handler setup needs to happen in load_stage2_idt() where the
+ * IDT is loaded and there the #VC IDT entry gets setup too.
+ *
+ * In order to be able to handle #VCs, one needs a GHCB which
+ * gets setup with an already set up pagetable, which is done in
+ * initialize_identity_maps(). And there's the catch 22: the boot #VC
+ * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself
+ * (and, especially set_page_decrypted()) because the SEV-ES setup code
+ * cannot initialize a GHCB as there's no #PF handler yet...
+ */
 void load_stage2_idt(void)
 {
        boot_idt_desc.address = (unsigned long)boot_idt;
index 411b268bc0a24db0f0eecf24989619a69025c1aa..4a3f223973f40f85bc633359f1a41c92ce6ca084 100644 (file)
 #include "misc.h"
 #include "error.h"
 #include "../string.h"
+#include "efi.h"
 
 #include <generated/compile.h>
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
-#include <linux/efi.h>
 #include <generated/utsrelease.h>
-#include <asm/efi.h>
 
 #define _SETUP
 #include <asm/setup.h> /* For COMMAND_LINE_SIZE */
index a63424d13627bcfe935ee1f02f1f6637cf984f1c..a73e4d783cae20b6786969c1505ac3bca32bedd7 100644 (file)
@@ -187,42 +187,6 @@ SYM_CODE_END(startup32_vc_handler)
        .code64
 
 #include "../../kernel/sev_verify_cbit.S"
-SYM_FUNC_START(set_sev_encryption_mask)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-       push    %rbp
-       push    %rdx
-
-       movq    %rsp, %rbp              /* Save current stack pointer */
-
-       call    get_sev_encryption_bit  /* Get the encryption bit position */
-       testl   %eax, %eax
-       jz      .Lno_sev_mask
-
-       bts     %rax, sme_me_mask(%rip) /* Create the encryption mask */
-
-       /*
-        * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
-        * get_sev_encryption_bit() because this function is 32-bit code and
-        * shared between 64-bit and 32-bit boot path.
-        */
-       movl    $MSR_AMD64_SEV, %ecx    /* Read the SEV MSR */
-       rdmsr
-
-       /* Store MSR value in sev_status */
-       shlq    $32, %rdx
-       orq     %rdx, %rax
-       movq    %rax, sev_status(%rip)
-
-.Lno_sev_mask:
-       movq    %rbp, %rsp              /* Restore original stack pointer */
-
-       pop     %rdx
-       pop     %rbp
-#endif
-
-       xor     %rax, %rax
-       RET
-SYM_FUNC_END(set_sev_encryption_mask)
 
        .data
 
index 1cdcaf34ee367bd179ad98ad95dfec56b6df37bb..cf690d8712f4eda895861a802e10f86ccc7d68a1 100644 (file)
@@ -48,12 +48,17 @@ void *memmove(void *dest, const void *src, size_t n);
  */
 struct boot_params *boot_params;
 
+struct port_io_ops pio_ops;
+
 memptr free_mem_ptr;
 memptr free_mem_end_ptr;
 
 static char *vidmem;
 static int vidport;
-static int lines, cols;
+
+/* These might be accessed before .bss is cleared, so use .data instead. */
+static int lines __section(".data");
+static int cols __section(".data");
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
@@ -371,6 +376,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
        lines = boot_params->screen_info.orig_video_lines;
        cols = boot_params->screen_info.orig_video_cols;
 
+       init_default_io_ops();
+
+       /*
+        * Detect TDX guest environment.
+        *
+        * It has to be done before console_init() in order to use
+        * paravirtualized port I/O operations if needed.
+        */
+       early_tdx_detect();
+
        console_init();
 
        /*
index 16ed360b6692dbb05a96cd3f39d1786bcf6e2ae0..4910bf230d7b4afbb44f83d958e0b1902fe26458 100644 (file)
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
-#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
 #include <asm/desc_defs.h>
 
+#include "tdx.h"
+
 #define BOOT_CTYPE_H
 #include <linux/acpi.h>
 
 #define BOOT_BOOT_H
 #include "../ctype.h"
+#include "../io.h"
+
+#include "efi.h"
 
 #ifdef CONFIG_X86_64
 #define memptr long
@@ -120,17 +124,23 @@ static inline void console_init(void)
 { }
 #endif
 
-void set_sev_encryption_mask(void);
-
 #ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_enable(struct boot_params *bp);
 void sev_es_shutdown_ghcb(void);
 extern bool sev_es_check_ghcb_fault(unsigned long address);
+void snp_set_page_private(unsigned long paddr);
+void snp_set_page_shared(unsigned long paddr);
+void sev_prep_identity_maps(unsigned long top_level_pgt);
 #else
+static inline void sev_enable(struct boot_params *bp) { }
 static inline void sev_es_shutdown_ghcb(void) { }
 static inline bool sev_es_check_ghcb_fault(unsigned long address)
 {
        return false;
 }
+static inline void snp_set_page_private(unsigned long paddr) { }
+static inline void snp_set_page_shared(unsigned long paddr) { }
+static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { }
 #endif
 
 /* acpi.c */
@@ -151,6 +161,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
 #ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
 #endif
+extern void kernel_add_identity_map(unsigned long start, unsigned long end);
 
 /* Used by PAGE_KERN* macros: */
 extern pteval_t __default_kernel_pte_mask;
@@ -172,4 +183,47 @@ void boot_stage2_vc(void);
 
 unsigned long sev_verify_cbit(unsigned long cr3);
 
+enum efi_type {
+       EFI_TYPE_64,
+       EFI_TYPE_32,
+       EFI_TYPE_NONE,
+};
+
+#ifdef CONFIG_EFI
+/* helpers for early EFI config table access */
+enum efi_type efi_get_type(struct boot_params *bp);
+unsigned long efi_get_system_table(struct boot_params *bp);
+int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa,
+                      unsigned int *cfg_tbl_len);
+unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                   unsigned long cfg_tbl_pa,
+                                   unsigned int cfg_tbl_len,
+                                   efi_guid_t guid);
+#else
+static inline enum efi_type efi_get_type(struct boot_params *bp)
+{
+       return EFI_TYPE_NONE;
+}
+
+static inline unsigned long efi_get_system_table(struct boot_params *bp)
+{
+       return 0;
+}
+
+static inline int efi_get_conf_table(struct boot_params *bp,
+                                    unsigned long *cfg_tbl_pa,
+                                    unsigned int *cfg_tbl_len)
+{
+       return -ENOENT;
+}
+
+static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
+                                                 unsigned long cfg_tbl_pa,
+                                                 unsigned int cfg_tbl_len,
+                                                 efi_guid_t guid)
+{
+       return 0;
+}
+#endif /* CONFIG_EFI */
+
 #endif /* BOOT_COMPRESSED_MISC_H */
index 6ff7e81b5628456b92779ede7a9c941fe7df6a85..cc9b2529a08634b4249ec65c0d33219f811d08a1 100644 (file)
@@ -6,7 +6,7 @@
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET        0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET   PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE     0x70
+#define TRAMPOLINE_32BIT_CODE_SIZE     0x80
 
 #define TRAMPOLINE_32BIT_STACK_END     TRAMPOLINE_32BIT_SIZE
 
index a1733319a22a055262758126bc578a32e7fd3ef0..2ac12ff4111bf8c0dddeafc38680daaf9c0834cc 100644 (file)
@@ -1,11 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "misc.h"
-#include <linux/efi.h>
 #include <asm/e820/types.h>
 #include <asm/processor.h>
-#include <asm/efi.h>
 #include "pgtable.h"
 #include "../string.h"
+#include "efi.h"
 
 #define BIOS_START_MIN         0x20000U        /* 128K, less than this is insane */
 #define BIOS_START_MAX         0x9f000U        /* 640K, absolute maximum */
index 28bcf04c022eaf4e87f92cdfb7f8fd74343750eb..52f989f6acc281f95815bc76e0976348b5b8f635 100644 (file)
 #include <asm/fpu/xcr.h>
 #include <asm/ptrace.h>
 #include <asm/svm.h>
+#include <asm/cpuid.h>
 
 #include "error.h"
+#include "../msr.h"
 
 struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
 struct ghcb *boot_ghcb;
@@ -56,23 +58,19 @@ static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
 
 static inline u64 sev_es_rd_ghcb_msr(void)
 {
-       unsigned long low, high;
+       struct msr m;
 
-       asm volatile("rdmsr" : "=a" (low), "=d" (high) :
-                       "c" (MSR_AMD64_SEV_ES_GHCB));
+       boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
 
-       return ((high << 32) | low);
+       return m.q;
 }
 
 static inline void sev_es_wr_ghcb_msr(u64 val)
 {
-       u32 low, high;
+       struct msr m;
 
-       low  = val & 0xffffffffUL;
-       high = val >> 32;
-
-       asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
-                       "a"(low), "d" (high) : "memory");
+       m.q = val;
+       boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
 }
 
 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
@@ -119,11 +117,54 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 /* Include code for early handlers */
 #include "../../kernel/sev-shared.c"
 
-static bool early_setup_sev_es(void)
+static inline bool sev_snp_enabled(void)
+{
+       return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+}
+
+static void __page_state_change(unsigned long paddr, enum psc_op op)
+{
+       u64 val;
+
+       if (!sev_snp_enabled())
+               return;
+
+       /*
+        * If private -> shared then invalidate the page before requesting the
+        * state change in the RMP table.
+        */
+       if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+       /* Issue VMGEXIT to change the page state in RMP table. */
+       sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+       VMGEXIT();
+
+       /* Read the response of the VMGEXIT. */
+       val = sev_es_rd_ghcb_msr();
+       if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+       /*
+        * Now that page state is changed in the RMP table, validate it so that it is
+        * consistent with the RMP entry.
+        */
+       if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+void snp_set_page_private(unsigned long paddr)
+{
+       __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+}
+
+void snp_set_page_shared(unsigned long paddr)
 {
-       if (!sev_es_negotiate_protocol())
-               sev_es_terminate(GHCB_SEV_ES_PROT_UNSUPPORTED);
+       __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+}
 
+static bool early_setup_ghcb(void)
+{
        if (set_page_decrypted((unsigned long)&boot_ghcb_page))
                return false;
 
@@ -135,6 +176,10 @@ static bool early_setup_sev_es(void)
        /* Initialize lookup tables for the instruction decoder */
        inat_init_tables();
 
+       /* SNP guest requires the GHCB GPA must be registered */
+       if (sev_snp_enabled())
+               snp_register_ghcb_early(__pa(&boot_ghcb_page));
+
        return true;
 }
 
@@ -174,8 +219,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
        struct es_em_ctxt ctxt;
        enum es_result result;
 
-       if (!boot_ghcb && !early_setup_sev_es())
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+       if (!boot_ghcb && !early_setup_ghcb())
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 
        vc_ghcb_invalidate(boot_ghcb);
        result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -202,5 +247,191 @@ finish:
        if (result == ES_OK)
                vc_finish_insn(&ctxt);
        else if (result != ES_RETRY)
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+static void enforce_vmpl0(void)
+{
+       u64 attrs;
+       int err;
+
+       /*
+        * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
+        * higher) privilege level. Here, clear the VMPL1 permission mask of the
+        * GHCB page. If the guest is not running at VMPL0, this will fail.
+        *
+        * If the guest is running at VMPL0, it will succeed. Even if that operation
+        * modifies permission bits, it is still ok to do so currently because Linux
+        * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
+        * changing is a don't-care.
+        */
+       attrs = 1;
+       if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
+}
+
+void sev_enable(struct boot_params *bp)
+{
+       unsigned int eax, ebx, ecx, edx;
+       struct msr m;
+       bool snp;
+
+       /*
+        * Setup/preliminary detection of SNP. This will be sanity-checked
+        * against CPUID/MSR values later.
+        */
+       snp = snp_init(bp);
+
+       /* Check for the SME/SEV support leaf */
+       eax = 0x80000000;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       if (eax < 0x8000001f)
+               return;
+
+       /*
+        * Check for the SME/SEV feature:
+        *   CPUID Fn8000_001F[EAX]
+        *   - Bit 0 - Secure Memory Encryption support
+        *   - Bit 1 - Secure Encrypted Virtualization support
+        *   CPUID Fn8000_001F[EBX]
+        *   - Bits 5:0 - Pagetable bit position used to indicate encryption
+        */
+       eax = 0x8000001f;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       /* Check whether SEV is supported */
+       if (!(eax & BIT(1))) {
+               if (snp)
+                       error("SEV-SNP support indicated by CC blob, but not CPUID.");
+               return;
+       }
+
+       /* Set the SME mask if this is an SEV guest. */
+       boot_rdmsr(MSR_AMD64_SEV, &m);
+       sev_status = m.q;
+       if (!(sev_status & MSR_AMD64_SEV_ENABLED))
+               return;
+
+       /* Negotiate the GHCB protocol version. */
+       if (sev_status & MSR_AMD64_SEV_ES_ENABLED) {
+               if (!sev_es_negotiate_protocol())
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED);
+       }
+
+       /*
+        * SNP is supported in v2 of the GHCB spec which mandates support for HV
+        * features.
+        */
+       if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+               if (!(get_hv_features() & GHCB_HV_FT_SNP))
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+               enforce_vmpl0();
+       }
+
+       if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
+
+       sme_me_mask = BIT_ULL(ebx & 0x3f);
+}
+
+/* Search for Confidential Computing blob in the EFI config table. */
+static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
+{
+       unsigned long cfg_table_pa;
+       unsigned int cfg_table_len;
+       int ret;
+
+       ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
+       if (ret)
+               return NULL;
+
+       return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
+                                                               cfg_table_len,
+                                                               EFI_CC_BLOB_GUID);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the boot kernel
+ * by firmware/bootloader in the following ways:
+ *
+ * - via an entry in the EFI config table
+ * - via a setup_data structure, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       cc_info = find_cc_blob_efi(bp);
+       if (cc_info)
+               goto found_cc_info;
+
+       cc_info = find_cc_blob_setup_data(bp);
+       if (!cc_info)
+               return NULL;
+
+found_cc_info:
+       if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+       return cc_info;
+}
+
+/*
+ * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
+ * will verify the SNP CPUID/MSR bits.
+ */
+bool snp_init(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       if (!bp)
+               return false;
+
+       cc_info = find_cc_blob(bp);
+       if (!cc_info)
+               return false;
+
+       /*
+        * If a SNP-specific Confidential Computing blob is present, then
+        * firmware/bootloader have indicated SNP support. Verifying this
+        * involves CPUID checks which will be more reliable if the SNP
+        * CPUID table is used. See comments over snp_setup_cpuid_table() for
+        * more details.
+        */
+       setup_cpuid_table(cc_info);
+
+       /*
+        * Pass run-time kernel a pointer to CC info via boot_params so EFI
+        * config table doesn't need to be searched again during early startup
+        * phase.
+        */
+       bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+       return true;
+}
+
+void sev_prep_identity_maps(unsigned long top_level_pgt)
+{
+       /*
+        * The Confidential Computing blob is used very early in uncompressed
+        * kernel to find the in-memory CPUID table to handle CPUID
+        * instructions. Make sure an identity-mapping exists so it can be
+        * accessed after switchover.
+        */
+       if (sev_snp_enabled()) {
+               unsigned long cc_info_pa = boot_params->cc_blob_address;
+               struct cc_blob_sev_info *cc_info;
+
+               kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
+
+               cc_info = (struct cc_blob_sev_info *)cc_info_pa;
+               kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len);
+       }
+
+       sev_verify_cbit(top_level_pgt);
 }
diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
new file mode 100644 (file)
index 0000000..46d0495
--- /dev/null
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../../coco/tdx/tdcall.S"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
new file mode 100644 (file)
index 0000000..918a760
--- /dev/null
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../cpuflags.h"
+#include "../string.h"
+#include "../io.h"
+#include "error.h"
+
+#include <vdso/limits.h>
+#include <uapi/asm/vmx.h>
+
+#include <asm/shared/tdx.h>
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+       error("TDVMCALL failed. TDX module bug?");
+}
+
+static inline unsigned int tdx_io_in(int size, u16 port)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = EXIT_REASON_IO_INSTRUCTION,
+               .r12 = size,
+               .r13 = 0,
+               .r14 = port,
+       };
+
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return UINT_MAX;
+
+       return args.r11;
+}
+
+static inline void tdx_io_out(int size, u16 port, u32 value)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = EXIT_REASON_IO_INSTRUCTION,
+               .r12 = size,
+               .r13 = 1,
+               .r14 = port,
+               .r15 = value,
+       };
+
+       __tdx_hypercall(&args, 0);
+}
+
+static inline u8 tdx_inb(u16 port)
+{
+       return tdx_io_in(1, port);
+}
+
+static inline void tdx_outb(u8 value, u16 port)
+{
+       tdx_io_out(1, port, value);
+}
+
+static inline void tdx_outw(u16 value, u16 port)
+{
+       tdx_io_out(2, port, value);
+}
+
+void early_tdx_detect(void)
+{
+       u32 eax, sig[3];
+
+       cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2],  &sig[1]);
+
+       if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+               return;
+
+       /* Use hypercalls instead of I/O instructions */
+       pio_ops.f_inb  = tdx_inb;
+       pio_ops.f_outb = tdx_outb;
+       pio_ops.f_outw = tdx_outw;
+}
diff --git a/arch/x86/boot/compressed/tdx.h b/arch/x86/boot/compressed/tdx.h
new file mode 100644 (file)
index 0000000..9055482
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_TDX_H
+#define BOOT_COMPRESSED_TDX_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+void early_tdx_detect(void);
+#else
+static inline void early_tdx_detect(void) { };
+#endif
+
+#endif /* BOOT_COMPRESSED_TDX_H */
index e1478d32de1aef50617f21932e1f7ab1fc237ba6..fed8d13ce2526086883422e7fee4b65de858be94 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/required-features.h>
 #include <asm/msr-index.h>
 #include "string.h"
+#include "msr.h"
 
 static u32 err_flags[NCAPINTS];
 
@@ -130,12 +131,11 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
                /* If this is an AMD and we're only missing SSE+SSE2, try to
                   turn them on */
 
-               u32 ecx = MSR_K7_HWCR;
-               u32 eax, edx;
+               struct msr m;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               eax &= ~(1 << 15);
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_rdmsr(MSR_K7_HWCR, &m);
+               m.l &= ~(1 << 15);
+               boot_wrmsr(MSR_K7_HWCR, &m);
 
                get_cpuflags(); /* Make sure it really did something */
                err = check_cpuflags();
@@ -145,28 +145,28 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
                /* If this is a VIA C3, we might have to enable CX8
                   explicitly */
 
-               u32 ecx = MSR_VIA_FCR;
-               u32 eax, edx;
+               struct msr m;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               eax |= (1<<1)|(1<<7);
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_rdmsr(MSR_VIA_FCR, &m);
+               m.l |= (1 << 1) | (1 << 7);
+               boot_wrmsr(MSR_VIA_FCR, &m);
 
                set_bit(X86_FEATURE_CX8, cpu.flags);
                err = check_cpuflags();
        } else if (err == 0x01 && is_transmeta()) {
                /* Transmeta might have masked feature bits in word 0 */
 
-               u32 ecx = 0x80860004;
-               u32 eax, edx;
+               struct msr m, m_tmp;
                u32 level = 1;
 
-               asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-               asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
+               boot_rdmsr(0x80860004, &m);
+               m_tmp = m;
+               m_tmp.l = ~0;
+               boot_wrmsr(0x80860004, &m_tmp);
                asm("cpuid"
                    : "+a" (level), "=d" (cpu.flags[0])
                    : : "ecx", "ebx");
-               asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+               boot_wrmsr(0x80860004, &m);
 
                err = check_cpuflags();
        } else if (err == 0x01 &&
index a0b75f73dc630d31d766394fc402a07f9336c059..a83d67ec627d1768facd1601e1eee537bcefbc2a 100644 (file)
@@ -71,8 +71,7 @@ int has_eflag(unsigned long mask)
 # define EBX_REG "=b"
 #endif
 
-static inline void cpuid_count(u32 id, u32 count,
-               u32 *a, u32 *b, u32 *c, u32 *d)
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d)
 {
        asm volatile(".ifnc %%ebx,%3 ; movl  %%ebx,%3 ; .endif  \n\t"
                     "cpuid                                     \n\t"
index 2e20814d3ce3e8d15e887bad15224e39646c100d..475b8fde90f7da3e80c1dd2d2b41b810c4dcb7a0 100644 (file)
@@ -17,5 +17,6 @@ extern u32 cpu_vendor[3];
 
 int has_eflag(unsigned long mask);
 void get_cpuflags(void);
+void cpuid_count(u32 id, u32 count, u32 *a, u32 *b, u32 *c, u32 *d);
 
 #endif
index 6dbd7e9f74c9caf51a08dcc3cd2924982b2120f1..0352e4589efa2e52636beb9ac169892cce41f619 100644 (file)
@@ -163,7 +163,11 @@ extra_header_fields:
        .long   0x200                           # SizeOfHeaders
        .long   0                               # CheckSum
        .word   IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
+#ifdef CONFIG_DXE_MEM_ATTRIBUTES
+       .word   IMAGE_DLL_CHARACTERISTICS_NX_COMPAT     # DllCharacteristics
+#else
        .word   0                               # DllCharacteristics
+#endif
 #ifdef CONFIG_X86_32
        .long   0                               # SizeOfStackReserve
        .long   0                               # SizeOfStackCommit
diff --git a/arch/x86/boot/io.h b/arch/x86/boot/io.h
new file mode 100644 (file)
index 0000000..1108809
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_IO_H
+#define BOOT_IO_H
+
+#include <asm/shared/io.h>
+
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+
+struct port_io_ops {
+       u8      (*f_inb)(u16 port);
+       void    (*f_outb)(u8 v, u16 port);
+       void    (*f_outw)(u16 v, u16 port);
+};
+
+extern struct port_io_ops pio_ops;
+
+/*
+ * Use the normal I/O instructions by default.
+ * TDX guests override these to use hypercalls.
+ */
+static inline void init_default_io_ops(void)
+{
+       pio_ops.f_inb  = __inb;
+       pio_ops.f_outb = __outb;
+       pio_ops.f_outw = __outw;
+}
+
+/*
+ * Redirect port I/O operations via pio_ops callbacks.
+ * TDX guests override these callbacks with TDX-specific helpers.
+ */
+#define inb  pio_ops.f_inb
+#define outb pio_ops.f_outb
+#define outw pio_ops.f_outw
+
+#endif
index e3add857c2c9dfd91806c9c466cc86dfcf046de0..c4ea5258ab558fc1773d7cef85796238b62da026 100644 (file)
@@ -17,6 +17,8 @@
 
 struct boot_params boot_params __attribute__((aligned(16)));
 
+struct port_io_ops pio_ops;
+
 char *HEAP = _end;
 char *heap_end = _end;         /* Default end of heap = no heap */
 
@@ -33,7 +35,7 @@ static void copy_boot_params(void)
                u16 cl_offset;
        };
        const struct old_cmdline * const oldcmd =
-               (const struct old_cmdline *)OLD_CL_ADDRESS;
+               absolute_pointer(OLD_CL_ADDRESS);
 
        BUILD_BUG_ON(sizeof(boot_params) != 4096);
        memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
@@ -133,6 +135,8 @@ static void init_heap(void)
 
 void main(void)
 {
+       init_default_io_ops();
+
        /* First, copy the boot header into the "zeropage" */
        copy_boot_params();
 
diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h
new file mode 100644 (file)
index 0000000..aed66f7
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers/definitions related to MSR access.
+ */
+
+#ifndef BOOT_MSR_H
+#define BOOT_MSR_H
+
+#include <asm/shared/msr.h>
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void boot_rdmsr(unsigned int reg, struct msr *m)
+{
+       asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void boot_wrmsr(unsigned int reg, const struct msr *m)
+{
+       asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* BOOT_MSR_H */
index c1ead00017a7fa3938a53bc3e30cacb2d74e4f37..c816acf78b6aa0e36ebab582c4fb6558d91ab19a 100644 (file)
@@ -4,3 +4,5 @@ KASAN_SANITIZE_core.o   := n
 CFLAGS_core.o          += -fno-stack-protector
 
 obj-y += core.o
+
+obj-$(CONFIG_INTEL_TDX_GUEST)  += tdx/
index fc1365dd927e8001971bcb86a2a8a41bb89ef33e..49b44f881484680376a7393dc0f101e4565a4009 100644 (file)
@@ -18,7 +18,15 @@ static u64 cc_mask __ro_after_init;
 
 static bool intel_cc_platform_has(enum cc_attr attr)
 {
-       return false;
+       switch (attr) {
+       case CC_ATTR_GUEST_UNROLL_STRING_IO:
+       case CC_ATTR_HOTPLUG_DISABLED:
+       case CC_ATTR_GUEST_MEM_ENCRYPT:
+       case CC_ATTR_MEM_ENCRYPT:
+               return true;
+       default:
+               return false;
+       }
 }
 
 /*
@@ -57,6 +65,9 @@ static bool amd_cc_platform_has(enum cc_attr attr)
                return (sev_status & MSR_AMD64_SEV_ENABLED) &&
                        !(sev_status & MSR_AMD64_SEV_ES_ENABLED);
 
+       case CC_ATTR_GUEST_SEV_SNP:
+               return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+
        default:
                return false;
        }
@@ -87,9 +98,18 @@ EXPORT_SYMBOL_GPL(cc_platform_has);
 
 u64 cc_mkenc(u64 val)
 {
+       /*
+        * Both AMD and Intel use a bit in the page table to indicate
+        * encryption status of the page.
+        *
+        * - for AMD, bit *set* means the page is encrypted
+        * - for Intel *clear* means encrypted.
+        */
        switch (vendor) {
        case CC_VENDOR_AMD:
                return val | cc_mask;
+       case CC_VENDOR_INTEL:
+               return val & ~cc_mask;
        default:
                return val;
        }
@@ -97,9 +117,12 @@ u64 cc_mkenc(u64 val)
 
 u64 cc_mkdec(u64 val)
 {
+       /* See comment in cc_mkenc() */
        switch (vendor) {
        case CC_VENDOR_AMD:
                return val & ~cc_mask;
+       case CC_VENDOR_INTEL:
+               return val | cc_mask;
        default:
                return val;
        }
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
new file mode 100644 (file)
index 0000000..46c5599
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += tdx.o tdcall.o
diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
new file mode 100644 (file)
index 0000000..f9eb113
--- /dev/null
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/asm.h>
+#include <asm/frame.h>
+#include <asm/unwind_hints.h>
+
+#include <linux/linkage.h>
+#include <linux/bits.h>
+#include <linux/errno.h>
+
+#include "../../virt/vmx/tdx/tdxcall.S"
+
+/*
+ * Bitmasks of exposed registers (with VMM).
+ */
+#define TDX_R10                BIT(10)
+#define TDX_R11                BIT(11)
+#define TDX_R12                BIT(12)
+#define TDX_R13                BIT(13)
+#define TDX_R14                BIT(14)
+#define TDX_R15                BIT(15)
+
+/*
+ * These registers are clobbered to hold arguments for each
+ * TDVMCALL. They are safe to expose to the VMM.
+ * Each bit in this mask represents a register ID. Bit field
+ * details can be found in TDX GHCI specification, section
+ * titled "TDCALL [TDG.VP.VMCALL] leaf".
+ */
+#define TDVMCALL_EXPOSE_REGS_MASK      ( TDX_R10 | TDX_R11 | \
+                                         TDX_R12 | TDX_R13 | \
+                                         TDX_R14 | TDX_R15 )
+
+/*
+ * __tdx_module_call()  - Used by TDX guests to request services from
+ * the TDX module (does not include VMM services) using TDCALL instruction.
+ *
+ * Transforms function call register arguments into the TDCALL register ABI.
+ * After TDCALL operation, TDX module output is saved in @out (if it is
+ * provided by the user).
+ *
+ *-------------------------------------------------------------------------
+ * TDCALL ABI:
+ *-------------------------------------------------------------------------
+ * Input Registers:
+ *
+ * RAX                 - TDCALL Leaf number.
+ * RCX,RDX,R8-R9       - TDCALL Leaf specific input registers.
+ *
+ * Output Registers:
+ *
+ * RAX                 - TDCALL instruction error code.
+ * RCX,RDX,R8-R11      - TDCALL Leaf specific output registers.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_module_call() function ABI:
+ *
+ * @fn  (RDI)          - TDCALL Leaf ID,    moved to RAX
+ * @rcx (RSI)          - Input parameter 1, moved to RCX
+ * @rdx (RDX)          - Input parameter 2, moved to RDX
+ * @r8  (RCX)          - Input parameter 3, moved to R8
+ * @r9  (R8)           - Input parameter 4, moved to R9
+ *
+ * @out (R9)           - struct tdx_module_output pointer
+ *                       stored temporarily in R12 (not
+ *                       shared with the TDX module). It
+ *                       can be NULL.
+ *
+ * Return status of TDCALL via RAX.
+ */
+SYM_FUNC_START(__tdx_module_call)
+       FRAME_BEGIN
+       TDX_MODULE_CALL host=0
+       FRAME_END
+       RET
+SYM_FUNC_END(__tdx_module_call)
+
+/*
+ * __tdx_hypercall() - Make hypercalls to a TDX VMM using TDVMCALL leaf
+ * of TDCALL instruction
+ *
+ * Transforms values in  function call argument struct tdx_hypercall_args @args
+ * into the TDCALL register ABI. After TDCALL operation, VMM output is saved
+ * back in @args.
+ *
+ *-------------------------------------------------------------------------
+ * TD VMCALL ABI:
+ *-------------------------------------------------------------------------
+ *
+ * Input Registers:
+ *
+ * RAX                 - TDCALL instruction leaf number (0 - TDG.VP.VMCALL)
+ * RCX                 - BITMAP which controls which part of TD Guest GPR
+ *                       is passed as-is to the VMM and back.
+ * R10                 - Set 0 to indicate TDCALL follows standard TDX ABI
+ *                       specification. Non zero value indicates vendor
+ *                       specific ABI.
+ * R11                 - VMCALL sub function number
+ * RBX, RBP, RDI, RSI  - Used to pass VMCALL sub function specific arguments.
+ * R8-R9, R12-R15      - Same as above.
+ *
+ * Output Registers:
+ *
+ * RAX                 - TDCALL instruction status (Not related to hypercall
+ *                        output).
+ * R10                 - Hypercall output error code.
+ * R11-R15             - Hypercall sub function specific output values.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_hypercall() function ABI:
+ *
+ * @args  (RDI)        - struct tdx_hypercall_args for input and output
+ * @flags (RSI)        - TDX_HCALL_* flags
+ *
+ * On successful completion, return the hypercall error code.
+ */
+SYM_FUNC_START(__tdx_hypercall)
+       FRAME_BEGIN
+
+       /* Save callee-saved GPRs as mandated by the x86_64 ABI */
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+
+       /* Mangle function call ABI into TDCALL ABI: */
+       /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
+       xor %eax, %eax
+
+       /* Copy hypercall registers from arg struct: */
+       movq TDX_HYPERCALL_r10(%rdi), %r10
+       movq TDX_HYPERCALL_r11(%rdi), %r11
+       movq TDX_HYPERCALL_r12(%rdi), %r12
+       movq TDX_HYPERCALL_r13(%rdi), %r13
+       movq TDX_HYPERCALL_r14(%rdi), %r14
+       movq TDX_HYPERCALL_r15(%rdi), %r15
+
+       movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
+
+       /*
+        * For the idle loop STI needs to be called directly before the TDCALL
+        * that enters idle (EXIT_REASON_HLT case). STI instruction enables
+        * interrupts only one instruction later. If there is a window between
+        * STI and the instruction that emulates the HALT state, there is a
+        * chance for interrupts to happen in this window, which can delay the
+        * HLT operation indefinitely. Since this is the not the desired
+        * result, conditionally call STI before TDCALL.
+        */
+       testq $TDX_HCALL_ISSUE_STI, %rsi
+       jz .Lskip_sti
+       sti
+.Lskip_sti:
+       tdcall
+
+       /*
+        * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+        * something has gone horribly wrong with the TDX module.
+        *
+        * The return status of the hypercall operation is in a separate
+        * register (in R10). Hypercall errors are a part of normal operation
+        * and are handled by callers.
+        */
+       testq %rax, %rax
+       jne .Lpanic
+
+       /* TDVMCALL leaf return code is in R10 */
+       movq %r10, %rax
+
+       /* Copy hypercall result registers to arg struct if needed */
+       testq $TDX_HCALL_HAS_OUTPUT, %rsi
+       jz .Lout
+
+       movq %r10, TDX_HYPERCALL_r10(%rdi)
+       movq %r11, TDX_HYPERCALL_r11(%rdi)
+       movq %r12, TDX_HYPERCALL_r12(%rdi)
+       movq %r13, TDX_HYPERCALL_r13(%rdi)
+       movq %r14, TDX_HYPERCALL_r14(%rdi)
+       movq %r15, TDX_HYPERCALL_r15(%rdi)
+.Lout:
+       /*
+        * Zero out registers exposed to the VMM to avoid speculative execution
+        * with VMM-controlled values. This needs to include all registers
+        * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
+        * context will be restored.
+        */
+       xor %r10d, %r10d
+       xor %r11d, %r11d
+
+       /* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+
+       FRAME_END
+
+       RET
+.Lpanic:
+       call __tdx_hypercall_failed
+       /* __tdx_hypercall_failed never returns */
+       REACHABLE
+       jmp .Lpanic
+SYM_FUNC_END(__tdx_hypercall)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
new file mode 100644 (file)
index 0000000..03deb4d
--- /dev/null
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021-2022 Intel Corporation */
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "tdx: " fmt
+
+#include <linux/cpufeature.h>
+#include <asm/coco.h>
+#include <asm/tdx.h>
+#include <asm/vmx.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <asm/pgtable.h>
+
+/* TDX module Call Leaf IDs */
+#define TDX_GET_INFO                   1
+#define TDX_GET_VEINFO                 3
+#define TDX_ACCEPT_PAGE                        6
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_MAP_GPA               0x10001
+
+/* MMIO direction */
+#define EPT_READ       0
+#define EPT_WRITE      1
+
+/* Port I/O direction */
+#define PORT_READ      0
+#define PORT_WRITE     1
+
+/* See Exit Qualification for I/O Instructions in VMX documentation */
+#define VE_IS_IO_IN(e)         ((e) & BIT(3))
+#define VE_GET_IO_SIZE(e)      (((e) & GENMASK(2, 0)) + 1)
+#define VE_GET_PORT_NUM(e)     ((e) >> 16)
+#define VE_IS_IO_STRING(e)     ((e) & BIT(4))
+
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = fn,
+               .r12 = r12,
+               .r13 = r13,
+               .r14 = r14,
+               .r15 = r15,
+       };
+
+       return __tdx_hypercall(&args, 0);
+}
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void)
+{
+       panic("TDVMCALL failed. TDX module bug?");
+}
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static u64 hcall_func(u64 exit_reason)
+{
+       return exit_reason;
+}
+
+#ifdef CONFIG_KVM_GUEST
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+                      unsigned long p3, unsigned long p4)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = nr,
+               .r11 = p1,
+               .r12 = p2,
+               .r13 = p3,
+               .r14 = p4,
+       };
+
+       return __tdx_hypercall(&args, 0);
+}
+EXPORT_SYMBOL_GPL(tdx_kvm_hypercall);
+#endif
+
+/*
+ * Used for TDX guests to make calls directly to the TD module.  This
+ * should only be used for calls that have no legitimate reason to fail
+ * or where the kernel can not survive the call failing.
+ */
+static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+                                  struct tdx_module_output *out)
+{
+       if (__tdx_module_call(fn, rcx, rdx, r8, r9, out))
+               panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
+}
+
+static u64 get_cc_mask(void)
+{
+       struct tdx_module_output out;
+       unsigned int gpa_width;
+
+       /*
+        * TDINFO TDX module call is used to get the TD execution environment
+        * information like GPA width, number of available vcpus, debug mode
+        * information, etc. More details about the ABI can be found in TDX
+        * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
+        * [TDG.VP.INFO].
+        *
+        * The GPA width that comes out of this call is critical. TDX guests
+        * can not meaningfully run without it.
+        */
+       tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out);
+
+       gpa_width = out.rcx & GENMASK(5, 0);
+
+       /*
+        * The highest bit of a guest physical address is the "sharing" bit.
+        * Set it for shared pages and clear it for private pages.
+        */
+       return BIT_ULL(gpa_width - 1);
+}
+
+static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_HLT),
+               .r12 = irq_disabled,
+       };
+
+       /*
+        * Emulate HLT operation via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
+        *
+        * The VMM uses the "IRQ disabled" param to understand IRQ
+        * enabled status (RFLAGS.IF) of the TD guest and to determine
+        * whether or not it should schedule the halted vCPU if an
+        * IRQ becomes pending. E.g. if IRQs are disabled, the VMM
+        * can keep the vCPU in virtual HLT, even if an IRQ is
+        * pending, without hanging/breaking the guest.
+        */
+       return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+}
+
+static bool handle_halt(void)
+{
+       /*
+        * Since non safe halt is mainly used in CPU offlining
+        * and the guest will always stay in the halt state, don't
+        * call the STI instruction (set do_sti as false).
+        */
+       const bool irq_disabled = irqs_disabled();
+       const bool do_sti = false;
+
+       if (__halt(irq_disabled, do_sti))
+               return false;
+
+       return true;
+}
+
+void __cpuidle tdx_safe_halt(void)
+{
+        /*
+         * For do_sti=true case, __tdx_hypercall() function enables
+         * interrupts using the STI instruction before the TDCALL. So
+         * set irq_disabled as false.
+         */
+       const bool irq_disabled = false;
+       const bool do_sti = true;
+
+       /*
+        * Use WARN_ONCE() to report the failure.
+        */
+       if (__halt(irq_disabled, do_sti))
+               WARN_ONCE(1, "HLT instruction emulation failed\n");
+}
+
+static bool read_msr(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_MSR_READ),
+               .r12 = regs->cx,
+       };
+
+       /*
+        * Emulate the MSR read via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
+        */
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+
+       regs->ax = lower_32_bits(args.r11);
+       regs->dx = upper_32_bits(args.r11);
+       return true;
+}
+
+static bool write_msr(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_MSR_WRITE),
+               .r12 = regs->cx,
+               .r13 = (u64)regs->dx << 32 | regs->ax,
+       };
+
+       /*
+        * Emulate the MSR write via hypercall. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface
+        * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
+        */
+       return !__tdx_hypercall(&args, 0);
+}
+
+static bool handle_cpuid(struct pt_regs *regs)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_CPUID),
+               .r12 = regs->ax,
+               .r13 = regs->cx,
+       };
+
+       /*
+        * Only allow VMM to control range reserved for hypervisor
+        * communication.
+        *
+        * Return all-zeros for any CPUID outside the range. It matches CPU
+        * behaviour for non-supported leaf.
+        */
+       if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
+               regs->ax = regs->bx = regs->cx = regs->dx = 0;
+               return true;
+       }
+
+       /*
+        * Emulate the CPUID instruction via a hypercall. More info about
+        * ABI can be found in TDX Guest-Host-Communication Interface
+        * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
+        */
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+
+       /*
+        * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
+        * EAX, EBX, ECX, EDX registers after the CPUID instruction execution.
+        * So copy the register contents back to pt_regs.
+        */
+       regs->ax = args.r12;
+       regs->bx = args.r13;
+       regs->cx = args.r14;
+       regs->dx = args.r15;
+
+       return true;
+}
+
+static bool mmio_read(int size, unsigned long addr, unsigned long *val)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION),
+               .r12 = size,
+               .r13 = EPT_READ,
+               .r14 = addr,
+               .r15 = *val,
+       };
+
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
+               return false;
+       *val = args.r11;
+       return true;
+}
+
+static bool mmio_write(int size, unsigned long addr, unsigned long val)
+{
+       return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size,
+                              EPT_WRITE, addr, val);
+}
+
+static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+{
+       char buffer[MAX_INSN_SIZE];
+       unsigned long *reg, val;
+       struct insn insn = {};
+       enum mmio_type mmio;
+       int size, extend_size;
+       u8 extend_val = 0;
+
+       /* Only in-kernel MMIO is supported */
+       if (WARN_ON_ONCE(user_mode(regs)))
+               return false;
+
+       if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
+               return false;
+
+       if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
+               return false;
+
+       mmio = insn_decode_mmio(&insn, &size);
+       if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
+               return false;
+
+       if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+               reg = insn_get_modrm_reg_ptr(&insn, regs);
+               if (!reg)
+                       return false;
+       }
+
+       ve->instr_len = insn.length;
+
+       /* Handle writes first */
+       switch (mmio) {
+       case MMIO_WRITE:
+               memcpy(&val, reg, size);
+               return mmio_write(size, ve->gpa, val);
+       case MMIO_WRITE_IMM:
+               val = insn.immediate.value;
+               return mmio_write(size, ve->gpa, val);
+       case MMIO_READ:
+       case MMIO_READ_ZERO_EXTEND:
+       case MMIO_READ_SIGN_EXTEND:
+               /* Reads are handled below */
+               break;
+       case MMIO_MOVS:
+       case MMIO_DECODE_FAILED:
+               /*
+                * MMIO was accessed with an instruction that could not be
+                * decoded or handled properly. It was likely not using io.h
+                * helpers or accessed MMIO accidentally.
+                */
+               return false;
+       default:
+               WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
+               return false;
+       }
+
+       /* Handle reads */
+       if (!mmio_read(size, ve->gpa, &val))
+               return false;
+
+       switch (mmio) {
+       case MMIO_READ:
+               /* Zero-extend for 32-bit operation */
+               extend_size = size == 4 ? sizeof(*reg) : 0;
+               break;
+       case MMIO_READ_ZERO_EXTEND:
+               /* Zero extend based on operand size */
+               extend_size = insn.opnd_bytes;
+               break;
+       case MMIO_READ_SIGN_EXTEND:
+               /* Sign extend based on operand size */
+               extend_size = insn.opnd_bytes;
+               if (size == 1 && val & BIT(7))
+                       extend_val = 0xFF;
+               else if (size > 1 && val & BIT(15))
+                       extend_val = 0xFF;
+               break;
+       default:
+               /* All other cases has to be covered with the first switch() */
+               WARN_ON_ONCE(1);
+               return false;
+       }
+
+       if (extend_size)
+               memset(reg, extend_val, extend_size);
+       memcpy(reg, &val, size);
+       return true;
+}
+
+static bool handle_in(struct pt_regs *regs, int size, int port)
+{
+       struct tdx_hypercall_args args = {
+               .r10 = TDX_HYPERCALL_STANDARD,
+               .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION),
+               .r12 = size,
+               .r13 = PORT_READ,
+               .r14 = port,
+       };
+       u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+       bool success;
+
+       /*
+        * Emulate the I/O read via hypercall. More info about ABI can be found
+        * in TDX Guest-Host-Communication Interface (GHCI) section titled
+        * "TDG.VP.VMCALL<Instruction.IO>".
+        */
+       success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+       /* Update part of the register affected by the emulated instruction */
+       regs->ax &= ~mask;
+       if (success)
+               regs->ax |= args.r11 & mask;
+
+       return success;
+}
+
+static bool handle_out(struct pt_regs *regs, int size, int port)
+{
+       u64 mask = GENMASK(BITS_PER_BYTE * size, 0);
+
+       /*
+        * Emulate the I/O write via hypercall. More info about ABI can be found
+        * in TDX Guest-Host-Communication Interface (GHCI) section titled
+        * "TDG.VP.VMCALL<Instruction.IO>".
+        */
+       return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size,
+                              PORT_WRITE, port, regs->ax & mask);
+}
+
+/*
+ * Emulate I/O using hypercall.
+ *
+ * Assumes the IO instruction was using ax, which is enforced
+ * by the standard io.h macros.
+ *
+ * Return True on success or False on failure.
+ */
+static bool handle_io(struct pt_regs *regs, u32 exit_qual)
+{
+       int size, port;
+       bool in;
+
+       if (VE_IS_IO_STRING(exit_qual))
+               return false;
+
+       in   = VE_IS_IO_IN(exit_qual);
+       size = VE_GET_IO_SIZE(exit_qual);
+       port = VE_GET_PORT_NUM(exit_qual);
+
+
+       if (in)
+               return handle_in(regs, size, port);
+       else
+               return handle_out(regs, size, port);
+}
+
+/*
+ * Early #VE exception handler. Only handles a subset of port I/O.
+ * Intended only for earlyprintk. If failed, return false.
+ */
+__init bool tdx_early_handle_ve(struct pt_regs *regs)
+{
+       struct ve_info ve;
+
+       tdx_get_ve_info(&ve);
+
+       if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
+               return false;
+
+       return handle_io(regs, ve.exit_qual);
+}
+
+void tdx_get_ve_info(struct ve_info *ve)
+{
+       struct tdx_module_output out;
+
+       /*
+        * Called during #VE handling to retrieve the #VE info from the
+        * TDX module.
+        *
+        * This has to be called early in #VE handling.  A "nested" #VE which
+        * occurs before this will raise a #DF and is not recoverable.
+        *
+        * The call retrieves the #VE info from the TDX module, which also
+        * clears the "#VE valid" flag. This must be done before anything else
+        * because any #VE that occurs while the valid flag is set will lead to
+        * #DF.
+        *
+        * Note, the TDX module treats virtual NMIs as inhibited if the #VE
+        * valid flag is set. It means that NMI=>#VE will not result in a #DF.
+        */
+       tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out);
+
+       /* Transfer the output parameters */
+       ve->exit_reason = out.rcx;
+       ve->exit_qual   = out.rdx;
+       ve->gla         = out.r8;
+       ve->gpa         = out.r9;
+       ve->instr_len   = lower_32_bits(out.r10);
+       ve->instr_info  = upper_32_bits(out.r10);
+}
+
+/* Handle the user initiated #VE */
+static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
+{
+       switch (ve->exit_reason) {
+       case EXIT_REASON_CPUID:
+               return handle_cpuid(regs);
+       default:
+               pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+               return false;
+       }
+}
+
+/* Handle the kernel #VE */
+static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+{
+       switch (ve->exit_reason) {
+       case EXIT_REASON_HLT:
+               return handle_halt();
+       case EXIT_REASON_MSR_READ:
+               return read_msr(regs);
+       case EXIT_REASON_MSR_WRITE:
+               return write_msr(regs);
+       case EXIT_REASON_CPUID:
+               return handle_cpuid(regs);
+       case EXIT_REASON_EPT_VIOLATION:
+               return handle_mmio(regs, ve);
+       case EXIT_REASON_IO_INSTRUCTION:
+               return handle_io(regs, ve->exit_qual);
+       default:
+               pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+               return false;
+       }
+}
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
+{
+       bool ret;
+
+       if (user_mode(regs))
+               ret = virt_exception_user(regs, ve);
+       else
+               ret = virt_exception_kernel(regs, ve);
+
+       /* After successful #VE handling, move the IP */
+       if (ret)
+               regs->ip += ve->instr_len;
+
+       return ret;
+}
+
+static bool tdx_tlb_flush_required(bool private)
+{
+       /*
+        * TDX guest is responsible for flushing TLB on private->shared
+        * transition. VMM is responsible for flushing on shared->private.
+        *
+        * The VMM _can't_ flush private addresses as it can't generate PAs
+        * with the guest's HKID.  Shared memory isn't subject to integrity
+        * checking, i.e. the VMM doesn't need to flush for its own protection.
+        *
+        * There's no need to flush when converting from shared to private,
+        * as flushing is the VMM's responsibility in this case, e.g. it must
+        * flush to avoid integrity failures in the face of a buggy or
+        * malicious guest.
+        */
+       return !private;
+}
+
+static bool tdx_cache_flush_required(void)
+{
+       /*
+        * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence.
+        * TDX doesn't have such capability.
+        *
+        * Flush cache unconditionally.
+        */
+       return true;
+}
+
+static bool try_accept_one(phys_addr_t *start, unsigned long len,
+                         enum pg_level pg_level)
+{
+       unsigned long accept_size = page_level_size(pg_level);
+       u64 tdcall_rcx;
+       u8 page_size;
+
+       if (!IS_ALIGNED(*start, accept_size))
+               return false;
+
+       if (len < accept_size)
+               return false;
+
+       /*
+        * Pass the page physical address to the TDX module to accept the
+        * pending, private page.
+        *
+        * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+        */
+       switch (pg_level) {
+       case PG_LEVEL_4K:
+               page_size = 0;
+               break;
+       case PG_LEVEL_2M:
+               page_size = 1;
+               break;
+       case PG_LEVEL_1G:
+               page_size = 2;
+               break;
+       default:
+               return false;
+       }
+
+       tdcall_rcx = *start | page_size;
+       if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+               return false;
+
+       *start += accept_size;
+       return true;
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest.  The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+       phys_addr_t start = __pa(vaddr);
+       phys_addr_t end   = __pa(vaddr + numpages * PAGE_SIZE);
+
+       if (!enc) {
+               /* Set the shared (decrypted) bits: */
+               start |= cc_mkdec(0);
+               end   |= cc_mkdec(0);
+       }
+
+       /*
+        * Notify the VMM about page mapping conversion. More info about ABI
+        * can be found in TDX Guest-Host-Communication Interface (GHCI),
+        * section "TDG.VP.VMCALL<MapGPA>"
+        */
+       if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+               return false;
+
+       /* private->shared conversion  requires only MapGPA call */
+       if (!enc)
+               return true;
+
+       /*
+        * For shared->private conversion, accept the page using
+        * TDX_ACCEPT_PAGE TDX module call.
+        */
+       while (start < end) {
+               unsigned long len = end - start;
+
+               /*
+                * Try larger accepts first. It gives chance to VMM to keep
+                * 1G/2M SEPT entries where possible and speeds up process by
+                * cutting number of hypercalls (if successful).
+                */
+
+               if (try_accept_one(&start, len, PG_LEVEL_1G))
+                       continue;
+
+               if (try_accept_one(&start, len, PG_LEVEL_2M))
+                       continue;
+
+               if (!try_accept_one(&start, len, PG_LEVEL_4K))
+                       return false;
+       }
+
+       return true;
+}
+
+void __init tdx_early_init(void)
+{
+       u64 cc_mask;
+       u32 eax, sig[3];
+
+       cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2],  &sig[1]);
+
+       if (memcmp(TDX_IDENT, sig, sizeof(sig)))
+               return;
+
+       setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
+
+       cc_set_vendor(CC_VENDOR_INTEL);
+       cc_mask = get_cc_mask();
+       cc_set_mask(cc_mask);
+
+       /*
+        * All bits above GPA width are reserved and kernel treats shared bit
+        * as flag, not as part of physical address.
+        *
+        * Adjust physical mask to only cover valid GPA bits.
+        */
+       physical_mask &= cc_mask - 1;
+
+       x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+       x86_platform.guest.enc_tlb_flush_required   = tdx_tlb_flush_required;
+       x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+
+       pr_info("Guest detected\n");
+}
index a4c061fb7c6ea0c3a15201ef369e61014315907c..29b36e9e4e741e2e36c69219a25934d0bfa2f4b4 100644 (file)
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
        .if \save_ret
        pushq   %rsi            /* pt_regs->si */
        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
@@ -73,7 +73,7 @@ For 32-bit we have the following conventions - kernel is built with
        pushq   %rsi            /* pt_regs->si */
        .endif
        pushq   \rdx            /* pt_regs->dx */
-       pushq   %rcx            /* pt_regs->cx */
+       pushq   \rcx            /* pt_regs->cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
        pushq   %r9             /* pt_regs->r9 */
@@ -99,6 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
         * well before they could be put to use in a speculative execution
         * gadget.
         */
+       xorl    %esi,  %esi     /* nospec si  */
        xorl    %edx,  %edx     /* nospec dx  */
        xorl    %ecx,  %ecx     /* nospec cx  */
        xorl    %r8d,  %r8d     /* nospec r8  */
@@ -114,32 +115,24 @@ For 32-bit we have the following conventions - kernel is built with
 
 .endm
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
-       PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+       PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
        CLEAR_REGS
 .endm
 
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %r11
-       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %rcx
-       .endif
        popq %rdx
        popq %rsi
        .if \pop_rdi
index 4faac48ebec55ede6ce7cadda7b863b563b74b48..4300ba49b5eeace08b31c83247e0e5a6a37f8d35 100644 (file)
@@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
         * perf profiles. Nothing jumps here.
         */
 syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-       POP_REGS pop_rdi=0 skip_r11rcx=1
+       POP_REGS pop_rdi=0
 
        /*
         * Now all regs are restored except RSP and RDI.
@@ -215,8 +214,13 @@ syscall_return_via_sysret:
 
        popq    %rdi
        popq    %rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
        swapgs
        sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+       int3
 SYM_CODE_END(entry_SYSCALL_64)
 
 /*
@@ -318,6 +322,14 @@ SYM_CODE_END(ret_from_fork)
 #endif
 .endm
 
+/* Save all registers in pt_regs */
+SYM_CODE_START_LOCAL(push_and_clear_regs)
+       UNWIND_HINT_FUNC
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
+       RET
+SYM_CODE_END(push_and_clear_regs)
+
 /**
  * idtentry_body - Macro to emit code calling the C function
  * @cfunc:             C function to be called
@@ -325,7 +337,21 @@ SYM_CODE_END(ret_from_fork)
  */
 .macro idtentry_body cfunc has_error_code:req
 
-       call    error_entry
+       call push_and_clear_regs
+       UNWIND_HINT_REGS
+
+       /*
+        * Call error_entry() and switch to the task stack if from userspace.
+        *
+        * When in XENPV, it is already in the task stack, and it can't fault
+        * for native_iret() nor native_load_gs_index() since XENPV uses its
+        * own pvops for IRET and load_gs_index().  And it doesn't need to
+        * switch the CR3.  So it can skip invoking error_entry().
+        */
+       ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+               "", X86_FEATURE_XENPV
+
+       ENCODE_FRAME_POINTER
        UNWIND_HINT_REGS
 
        movq    %rsp, %rdi                      /* pt_regs pointer into 1st argument*/
@@ -337,6 +363,9 @@ SYM_CODE_END(ret_from_fork)
 
        call    \cfunc
 
+       /* For some configurations \cfunc ends up being a noreturn. */
+       REACHABLE
+
        jmp     error_return
 .endm
 
@@ -355,6 +384,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
        ENDBR
        ASM_CLAC
+       cld
 
        .if \has_error_code == 0
                pushq   $-1                     /* ORIG_RAX: no syscall to restart */
@@ -423,6 +453,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS
        ENDBR
        ASM_CLAC
+       cld
 
        pushq   $-1                     /* ORIG_RAX: no syscall to restart */
 
@@ -479,6 +510,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS
        ENDBR
        ASM_CLAC
+       cld
 
        /*
         * If the entry is from userspace, switch stacks and treat it as
@@ -505,6 +537,7 @@ SYM_CODE_START(\asmsym)
        call    vc_switch_off_ist
        movq    %rax, %rsp              /* Switch to new stack */
 
+       ENCODE_FRAME_POINTER
        UNWIND_HINT_REGS
 
        /* Update pt_regs */
@@ -541,6 +574,7 @@ SYM_CODE_START(\asmsym)
        UNWIND_HINT_IRET_REGS offset=8
        ENDBR
        ASM_CLAC
+       cld
 
        /* paranoid_entry returns GS information for paranoid_exit in EBX. */
        call    paranoid_entry
@@ -866,7 +900,6 @@ SYM_CODE_END(xen_failsafe_callback)
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
        UNWIND_HINT_FUNC
-       cld
        PUSH_AND_CLEAR_REGS save_ret=1
        ENCODE_FRAME_POINTER 8
 
@@ -980,13 +1013,10 @@ SYM_CODE_START_LOCAL(paranoid_exit)
 SYM_CODE_END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch GS if needed.
+ * Switch GS and CR3 if needed.
  */
 SYM_CODE_START_LOCAL(error_entry)
        UNWIND_HINT_FUNC
-       cld
-       PUSH_AND_CLEAR_REGS save_ret=1
-       ENCODE_FRAME_POINTER 8
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -994,19 +1024,15 @@ SYM_CODE_START_LOCAL(error_entry)
         * We entered from user mode or we're pretending to have entered
         * from user mode due to an IRET fault.
         */
-       SWAPGS
+       swapgs
        FENCE_SWAPGS_USER_ENTRY
        /* We have user CR3.  Change to kernel CR3. */
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
 .Lerror_entry_from_usermode_after_swapgs:
        /* Put us onto the real thread stack. */
-       popq    %r12                            /* save return addr in %12 */
-       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
        call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-       ENCODE_FRAME_POINTER
-       pushq   %r12
        RET
 
        /*
@@ -1030,7 +1056,7 @@ SYM_CODE_START_LOCAL(error_entry)
         * gsbase and proceed.  We'll fix up the exception and land in
         * .Lgs_change's error handler with kernel gsbase.
         */
-       SWAPGS
+       swapgs
 
        /*
         * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
@@ -1038,6 +1064,7 @@ SYM_CODE_START_LOCAL(error_entry)
         */
 .Lerror_entry_done_lfence:
        FENCE_SWAPGS_KERNEL_ENTRY
+       leaq    8(%rsp), %rax                   /* return pt_regs pointer */
        RET
 
 .Lbstep_iret:
@@ -1050,7 +1077,7 @@ SYM_CODE_START_LOCAL(error_entry)
         * We came from an IRET to user mode, so we have user
         * gsbase and CR3.  Switch to kernel gsbase and CR3:
         */
-       SWAPGS
+       swapgs
        FENCE_SWAPGS_USER_ENTRY
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
@@ -1058,9 +1085,9 @@ SYM_CODE_START_LOCAL(error_entry)
         * Pretend that the exception came from user mode: set up pt_regs
         * as if we faulted immediately after IRET.
         */
-       mov     %rsp, %rdi
+       leaq    8(%rsp), %rdi                   /* arg0 = pt_regs pointer */
        call    fixup_bad_iret
-       mov     %rax, %rsp
+       mov     %rax, %rdi
        jmp     .Lerror_entry_from_usermode_after_swapgs
 SYM_CODE_END(error_entry)
 
@@ -1123,6 +1150,7 @@ SYM_CODE_START(asm_exc_nmi)
         */
 
        ASM_CLAC
+       cld
 
        /* Use %rdx as our temp variable throughout */
        pushq   %rdx
@@ -1142,7 +1170,6 @@ SYM_CODE_START(asm_exc_nmi)
         */
 
        swapgs
-       cld
        FENCE_SWAPGS_USER_ENTRY
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
        movq    %rsp, %rdx
index 4fdb007cddbd12d6c802915e3dad27f73aa1c392..d1052742ad0cd51c3628978ec53d1a50b12445be 100644 (file)
@@ -50,7 +50,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
        UNWIND_HINT_EMPTY
        ENDBR
        /* Interrupts are off on entry. */
-       SWAPGS
+       swapgs
 
        pushq   %rax
        SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -83,32 +83,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
        movl    %eax, %eax
 
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-       pushq   %rdx                    /* pt_regs->dx */
-       pushq   %rcx                    /* pt_regs->cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   $0                      /* pt_regs->r8  = 0 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   $0                      /* pt_regs->r9  = 0 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   $0                      /* pt_regs->r10 = 0 */
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   $0                      /* pt_regs->r11 = 0 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   $0                      /* pt_regs->r12 = 0 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   $0                      /* pt_regs->r13 = 0 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        cld
@@ -225,35 +200,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
 SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
        movl    %eax, %eax              /* discard orig_ax high bits */
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
-       pushq   %rsi                    /* pt_regs->si */
-       xorl    %esi, %esi              /* nospec   si */
-       pushq   %rdx                    /* pt_regs->dx */
-       xorl    %edx, %edx              /* nospec   dx */
-       pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
-       xorl    %ecx, %ecx              /* nospec   cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   $0                      /* pt_regs->r8  = 0 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   $0                      /* pt_regs->r9  = 0 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   $0                      /* pt_regs->r10 = 0 */
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   $0                      /* pt_regs->r11 = 0 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   $0                      /* pt_regs->r12 = 0 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   $0                      /* pt_regs->r13 = 0 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   $0                      /* pt_regs->r14 = 0 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   $0                      /* pt_regs->r15 = 0 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        movq    %rsp, %rdi
@@ -297,6 +244,8 @@ sysret32_from_system_call:
         * code.  We zero R8-R10 to avoid info leaks.
          */
        movq    RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
 
        /*
         * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -314,6 +263,9 @@ sysret32_from_system_call:
        xorl    %r10d, %r10d
        swapgs
        sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+       ANNOTATE_NOENDBR
+       int3
 SYM_CODE_END(entry_SYSCALL_compat)
 
 /*
@@ -362,54 +314,25 @@ SYM_CODE_START(entry_INT80_compat)
 
        /* switch to thread stack expects orig_ax and rdi to be pushed */
        pushq   %rax                    /* pt_regs->orig_ax */
-       pushq   %rdi                    /* pt_regs->di */
 
        /* Need to switch before accessing the thread stack. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
        /* In the Xen PV case we already run on the thread stack. */
        ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
 
-       movq    %rsp, %rdi
+       movq    %rsp, %rax
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-       pushq   6*8(%rdi)               /* regs->ss */
-       pushq   5*8(%rdi)               /* regs->rsp */
-       pushq   4*8(%rdi)               /* regs->eflags */
-       pushq   3*8(%rdi)               /* regs->cs */
-       pushq   2*8(%rdi)               /* regs->ip */
-       pushq   1*8(%rdi)               /* regs->orig_ax */
-       pushq   (%rdi)                  /* pt_regs->di */
+       pushq   5*8(%rax)               /* regs->ss */
+       pushq   4*8(%rax)               /* regs->rsp */
+       pushq   3*8(%rax)               /* regs->eflags */
+       pushq   2*8(%rax)               /* regs->cs */
+       pushq   1*8(%rax)               /* regs->ip */
+       pushq   0*8(%rax)               /* regs->orig_ax */
 .Lint80_keep_stack:
 
-       pushq   %rsi                    /* pt_regs->si */
-       xorl    %esi, %esi              /* nospec   si */
-       pushq   %rdx                    /* pt_regs->dx */
-       xorl    %edx, %edx              /* nospec   dx */
-       pushq   %rcx                    /* pt_regs->cx */
-       xorl    %ecx, %ecx              /* nospec   cx */
-       pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   %r8                     /* pt_regs->r8 */
-       xorl    %r8d, %r8d              /* nospec   r8 */
-       pushq   %r9                     /* pt_regs->r9 */
-       xorl    %r9d, %r9d              /* nospec   r9 */
-       pushq   %r10                    /* pt_regs->r10*/
-       xorl    %r10d, %r10d            /* nospec   r10 */
-       pushq   %r11                    /* pt_regs->r11 */
-       xorl    %r11d, %r11d            /* nospec   r11 */
-       pushq   %rbx                    /* pt_regs->rbx */
-       xorl    %ebx, %ebx              /* nospec   rbx */
-       pushq   %rbp                    /* pt_regs->rbp */
-       xorl    %ebp, %ebp              /* nospec   rbp */
-       pushq   %r12                    /* pt_regs->r12 */
-       xorl    %r12d, %r12d            /* nospec   r12 */
-       pushq   %r13                    /* pt_regs->r13 */
-       xorl    %r13d, %r13d            /* nospec   r13 */
-       pushq   %r14                    /* pt_regs->r14 */
-       xorl    %r14d, %r14d            /* nospec   r14 */
-       pushq   %r15                    /* pt_regs->r15 */
-       xorl    %r15d, %r15d            /* nospec   r15 */
-
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
        UNWIND_HINT_REGS
 
        cld
index e88791b420eeb85fa143d831870829419c78323e..fc7f458eb3de6351ee25bee5e8b4ae5df8564245 100644 (file)
@@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
        INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
        INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
        INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
        INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
        INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
        EVENT_EXTRA_END
@@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
                        /* Disabled fixed counters which are not in CPUID */
                        c->idxmsk64 &= intel_ctrl;
 
-                       if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
+                       /*
+                        * Don't extend the pseudo-encoding to the
+                        * generic counters
+                        */
+                       if (!use_fixed_pseudo_encoding(c->code))
                                c->idxmsk64 |= (1ULL << num_counters) - 1;
                }
                c->idxmsk64 &=
@@ -6212,6 +6216,7 @@ __init int intel_pmu_init(void)
 
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
+       case INTEL_FAM6_RAPTORLAKE:
                /*
                 * Alder Lake has 2 types of CPU, core and atom.
                 *
index c6262b154c3a292ea1b985193c4cd4b79640ad6c..48e5db21142c2257a29233e9037fb881583ad85d 100644 (file)
@@ -40,7 +40,7 @@
  * Model specific counters:
  *     MSR_CORE_C1_RES: CORE C1 Residency Counter
  *                      perf code: 0x00
- *                      Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL
+ *                      Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
  *                      Scope: Core (each processor core has a MSR)
  *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
  *                            perf code: 0x01
  *                            perf code: 0x02
  *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
  *                                             SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- *                                             TGL,TNT,RKL,ADL
+ *                                             TGL,TNT,RKL,ADL,RPL,SPR
  *                            Scope: Core
  *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *                            perf code: 0x03
  *                            Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
- *                                             ICL,TGL,RKL,ADL
+ *                                             ICL,TGL,RKL,ADL,RPL
  *                            Scope: Core
  *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *                            perf code: 0x00
  *                            Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
- *                                             KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL
+ *                                             KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
+ *                                             RPL,SPR
  *                            Scope: Package (physical package)
  *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *                            perf code: 0x01
  *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
  *                                             GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
- *                                             ADL
+ *                                             ADL,RPL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *                            perf code: 0x02
  *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
  *                                             SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- *                                             TGL,TNT,RKL,ADL
+ *                                             TGL,TNT,RKL,ADL,RPL,SPR
  *                            Scope: Package (physical package)
  *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *                            perf code: 0x03
  *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
- *                                             KBL,CML,ICL,TGL,RKL,ADL
+ *                                             KBL,CML,ICL,TGL,RKL,ADL,RPL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
  *                            perf code: 0x04
  *                            Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- *                                             ADL
+ *                                             ADL,RPL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
  *                            perf code: 0x05
  *                            Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- *                                             ADL
+ *                                             ADL,RPL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *                            perf code: 0x06
  *                            Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
- *                                             TNT,RKL,ADL
+ *                                             TNT,RKL,ADL,RPL
  *                            Scope: Package (physical package)
  *
  */
@@ -674,12 +675,14 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_cstates),
+       X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &icx_cstates),
 
        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_cstates),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_cstates),
        { },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
index e497da9bf42707b654dc00d94ad5c998927918b2..7695dcae280e7067db8658827398489efbc04e54 100644 (file)
@@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
        {},
index f698a55bde8189884793e40481f060e132eef42b..4262351f52b60b5fd0a0f1bbbf2828c61f33c76d 100644 (file)
 #define PCI_DEVICE_ID_INTEL_ADL_14_IMC         0x4650
 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC         0x4668
 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC         0x4670
+#define PCI_DEVICE_ID_INTEL_RPL_1_IMC          0xA700
+#define PCI_DEVICE_ID_INTEL_RPL_2_IMC          0xA702
+#define PCI_DEVICE_ID_INTEL_RPL_3_IMC          0xA706
+#define PCI_DEVICE_ID_INTEL_RPL_4_IMC          0xA709
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
        { /* end: all zeroes */ }
 };
 
index 96c775abe31ff7ebbf0fd39b1c6e65395b78af96..6d759f88315c6ba9bb3bdead7611196a607f43c7 100644 (file)
@@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data)
        case INTEL_FAM6_ROCKETLAKE:
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
+       case INTEL_FAM6_RAPTORLAKE:
                if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
                        return true;
                break;
index 8e4d0391ff6c9bafa9bcb7dde33478bd6f569d09..e481056698de145f82c770ca9730168a46738dab 100644 (file)
@@ -5,7 +5,5 @@
 
 obj-$(CONFIG_IA32_EMULATION) := ia32_signal.o
 
-obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
-
 audit-class-$(CONFIG_AUDIT) := audit.o
 obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
deleted file mode 100644 (file)
index 9bd1524..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  a.out loader for x86-64
- *
- *  Copyright (C) 1991, 1992, 1996  Linus Torvalds
- *  Hacked together by Andi Kleen
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/jiffies.h>
-#include <linux/perf_event.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/user32.h>
-#include <asm/ia32.h>
-
-#undef WARN_OLD
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file *);
-
-static struct linux_binfmt aout_format = {
-       .module         = THIS_MODULE,
-       .load_binary    = load_aout_binary,
-       .load_shlib     = load_aout_library,
-};
-
-static int set_brk(unsigned long start, unsigned long end)
-{
-       start = PAGE_ALIGN(start);
-       end = PAGE_ALIGN(end);
-       if (end <= start)
-               return 0;
-       return vm_brk(start, end - start);
-}
-
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
-{
-       u32 __user *argv, *envp, *sp;
-       int argc = bprm->argc, envc = bprm->envc;
-
-       sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
-       sp -= envc+1;
-       envp = sp;
-       sp -= argc+1;
-       argv = sp;
-       put_user((unsigned long) envp, --sp);
-       put_user((unsigned long) argv, --sp);
-       put_user(argc, --sp);
-       current->mm->arg_start = (unsigned long) p;
-       while (argc-- > 0) {
-               char c;
-
-               put_user((u32)(unsigned long)p, argv++);
-               do {
-                       get_user(c, p++);
-               } while (c);
-       }
-       put_user(0, argv);
-       current->mm->arg_end = current->mm->env_start = (unsigned long) p;
-       while (envc-- > 0) {
-               char c;
-
-               put_user((u32)(unsigned long)p, envp++);
-               do {
-                       get_user(c, p++);
-               } while (c);
-       }
-       put_user(0, envp);
-       current->mm->env_end = (unsigned long) p;
-       return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries.  There is no binary dependent code anywhere else.
- */
-static int load_aout_binary(struct linux_binprm *bprm)
-{
-       unsigned long error, fd_offset, rlim;
-       struct pt_regs *regs = current_pt_regs();
-       struct exec ex;
-       int retval;
-
-       ex = *((struct exec *) bprm->buf);              /* exec-header */
-       if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
-            N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
-           N_TRSIZE(ex) || N_DRSIZE(ex) ||
-           i_size_read(file_inode(bprm->file)) <
-           ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
-               return -ENOEXEC;
-       }
-
-       fd_offset = N_TXTOFF(ex);
-
-       /* Check initial limits. This avoids letting people circumvent
-        * size limits imposed on them by creating programs with large
-        * arrays in the data or bss.
-        */
-       rlim = rlimit(RLIMIT_DATA);
-       if (rlim >= RLIM_INFINITY)
-               rlim = ~0;
-       if (ex.a_data + ex.a_bss > rlim)
-               return -ENOMEM;
-
-       /* Flush all traces of the currently running executable */
-       retval = begin_new_exec(bprm);
-       if (retval)
-               return retval;
-
-       /* OK, This is the point of no return */
-       set_personality(PER_LINUX);
-       set_personality_ia32(false);
-
-       setup_new_exec(bprm);
-
-       regs->cs = __USER32_CS;
-       regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-               regs->r13 = regs->r14 = regs->r15 = 0;
-
-       current->mm->end_code = ex.a_text +
-               (current->mm->start_code = N_TXTADDR(ex));
-       current->mm->end_data = ex.a_data +
-               (current->mm->start_data = N_DATADDR(ex));
-       current->mm->brk = ex.a_bss +
-               (current->mm->start_brk = N_BSSADDR(ex));
-
-       retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-       if (retval < 0)
-               return retval;
-
-       if (N_MAGIC(ex) == OMAGIC) {
-               unsigned long text_addr, map_size;
-
-               text_addr = N_TXTADDR(ex);
-               map_size = ex.a_text+ex.a_data;
-
-               error = vm_brk(text_addr & PAGE_MASK, map_size);
-
-               if (error)
-                       return error;
-
-               error = read_code(bprm->file, text_addr, 32,
-                                 ex.a_text + ex.a_data);
-               if ((signed long)error < 0)
-                       return error;
-       } else {
-#ifdef WARN_OLD
-               static unsigned long error_time, error_time2;
-               if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-                   (N_MAGIC(ex) != NMAGIC) &&
-                               time_after(jiffies, error_time2 + 5*HZ)) {
-                       printk(KERN_NOTICE "executable not page aligned\n");
-                       error_time2 = jiffies;
-               }
-
-               if ((fd_offset & ~PAGE_MASK) != 0 &&
-                           time_after(jiffies, error_time + 5*HZ)) {
-                       printk(KERN_WARNING
-                              "fd_offset is not page aligned. Please convert "
-                              "program: %pD\n",
-                              bprm->file);
-                       error_time = jiffies;
-               }
-#endif
-
-               if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
-                       error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-                       if (error)
-                               return error;
-
-                       read_code(bprm->file, N_TXTADDR(ex), fd_offset,
-                                       ex.a_text+ex.a_data);
-                       goto beyond_if;
-               }
-
-               error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
-                               PROT_READ | PROT_EXEC,
-                               MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                               fd_offset);
-
-               if (error != N_TXTADDR(ex))
-                       return error;
-
-               error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
-                               PROT_READ | PROT_WRITE | PROT_EXEC,
-                               MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                               fd_offset + ex.a_text);
-               if (error != N_DATADDR(ex))
-                       return error;
-       }
-
-beyond_if:
-       error = set_brk(current->mm->start_brk, current->mm->brk);
-       if (error)
-               return error;
-
-       set_binfmt(&aout_format);
-
-       current->mm->start_stack =
-               (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
-       /* start thread */
-       loadsegment(fs, 0);
-       loadsegment(ds, __USER32_DS);
-       loadsegment(es, __USER32_DS);
-       load_gs_index(0);
-       (regs)->ip = ex.a_entry;
-       (regs)->sp = current->mm->start_stack;
-       (regs)->flags = 0x200;
-       (regs)->cs = __USER32_CS;
-       (regs)->ss = __USER32_DS;
-       regs->r8 = regs->r9 = regs->r10 = regs->r11 =
-       regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-       return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
-       unsigned long bss, start_addr, len, error;
-       int retval;
-       struct exec ex;
-       loff_t pos = 0;
-
-       retval = -ENOEXEC;
-       error = kernel_read(file, &ex, sizeof(ex), &pos);
-       if (error != sizeof(ex))
-               goto out;
-
-       /* We come in here for the regular a.out style of shared libraries */
-       if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
-           N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-           i_size_read(file_inode(file)) <
-           ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
-               goto out;
-       }
-
-       if (N_FLAGS(ex))
-               goto out;
-
-       /* For  QMAGIC, the starting address is 0x20 into the page.  We mask
-          this off to get the starting address for the page */
-
-       start_addr =  ex.a_entry & 0xfffff000;
-
-       if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-#ifdef WARN_OLD
-               static unsigned long error_time;
-               if (time_after(jiffies, error_time + 5*HZ)) {
-                       printk(KERN_WARNING
-                              "N_TXTOFF is not page aligned. Please convert "
-                              "library: %pD\n",
-                              file);
-                       error_time = jiffies;
-               }
-#endif
-               retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
-               if (retval)
-                       goto out;
-
-               read_code(file, start_addr, N_TXTOFF(ex),
-                         ex.a_text + ex.a_data);
-               retval = 0;
-               goto out;
-       }
-       /* Now use mmap to map the library into memory. */
-       error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
-                       PROT_READ | PROT_WRITE | PROT_EXEC,
-                       MAP_FIXED | MAP_PRIVATE | MAP_32BIT,
-                       N_TXTOFF(ex));
-       retval = error;
-       if (error != start_addr)
-               goto out;
-
-       len = PAGE_ALIGN(ex.a_text + ex.a_data);
-       bss = ex.a_text + ex.a_data + ex.a_bss;
-       if (bss > len) {
-               retval = vm_brk(start_addr + len, bss - len);
-               if (retval)
-                       goto out;
-       }
-       retval = 0;
-out:
-       return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
-       register_binfmt(&aout_format);
-       return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
-       unregister_binfmt(&aout_format);
-}
-
-module_init(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
index 9aff97f0de7fd24074c7ddb17e55bd08746f285c..d937c55e717e655992b32eaefecb3f189d34f045 100644 (file)
 
 /* Asm macros */
 
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE()                                 \
+do {                                                           \
+       if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))       \
+               wbinvd();                                       \
+} while (0)
 
 int __acpi_acquire_global_lock(unsigned int *lock);
 int __acpi_release_global_lock(unsigned int *lock);
index 48067af946785b1569a802e3e2811eb67fbd7386..bd8ae0a7010ae572159e20d1c8011219e78cc743 100644 (file)
@@ -328,6 +328,8 @@ struct apic {
 
        /* wakeup_secondary_cpu */
        int     (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+       /* wakeup secondary CPU using 64-bit wakeup point */
+       int     (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip);
 
        void    (*inquire_remote_apic)(int apicid);
 
@@ -488,6 +490,11 @@ static inline unsigned int read_apic_id(void)
        return apic->get_apic_id(reg);
 }
 
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
+extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler);
+#endif
+
 extern int default_apic_id_valid(u32 apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
 extern void default_setup_apic_routing(void);
index c878fed3056fd809ffb2c541acb04e44e0bfe853..fbcfec4dc4ccd74d983d84ebad2c6b4d188bde1a 100644 (file)
 
 # define DEFINE_EXTABLE_TYPE_REG \
        ".macro extable_type_reg type:req reg:req\n"                                            \
-       ".set found, 0\n"                                                                       \
-       ".set regnr, 0\n"                                                                       \
+       ".set .Lfound, 0\n"                                                                     \
+       ".set .Lregnr, 0\n"                                                                     \
        ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n"               \
        ".ifc \\reg, %%\\rs\n"                                                                  \
-       ".set found, found+1\n"                                                                 \
-       ".long \\type + (regnr << 8)\n"                                                         \
+       ".set .Lfound, .Lfound+1\n"                                                             \
+       ".long \\type + (.Lregnr << 8)\n"                                                       \
        ".endif\n"                                                                              \
-       ".set regnr, regnr+1\n"                                                                 \
+       ".set .Lregnr, .Lregnr+1\n"                                                             \
        ".endr\n"                                                                               \
-       ".set regnr, 0\n"                                                                       \
+       ".set .Lregnr, 0\n"                                                                     \
        ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n"       \
        ".ifc \\reg, %%\\rs\n"                                                                  \
-       ".set found, found+1\n"                                                                 \
-       ".long \\type + (regnr << 8)\n"                                                         \
+       ".set .Lfound, .Lfound+1\n"                                                             \
+       ".long \\type + (.Lregnr << 8)\n"                                                       \
        ".endif\n"                                                                              \
-       ".set regnr, regnr+1\n"                                                                 \
+       ".set .Lregnr, .Lregnr+1\n"                                                             \
        ".endr\n"                                                                               \
-       ".if (found != 1)\n"                                                                    \
+       ".if (.Lfound != 1)\n"                                                                  \
        ".error \"extable_type_reg: bad register argument\"\n"                                  \
        ".endif\n"                                                                              \
        ".endm\n"
index 981fe923a59fe5983d385ee042e339aa6cfd131f..53e9b0620d969339a6f1aa890256fc7cc93b2968 100644 (file)
@@ -74,6 +74,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
                        BOOT_PARAM_PRESERVE(hdr),
                        BOOT_PARAM_PRESERVE(e820_table),
                        BOOT_PARAM_PRESERVE(eddbuf),
+                       BOOT_PARAM_PRESERVE(cc_blob_address),
                };
 
                memset(&scratch, 0, sizeof(scratch));
index 4d20a293c6fd420952f7897840b06cafe71a1d0c..a3ec87d198ac8398309e2962b5bcc59eeb074692 100644 (file)
@@ -18,7 +18,7 @@
 #ifdef CONFIG_X86_32
 # define __BUG_REL(val)        ".long " __stringify(val)
 #else
-# define __BUG_REL(val)        ".long " __stringify(val) " - 2b"
+# define __BUG_REL(val)        ".long " __stringify(val) " - ."
 #endif
 
 #ifdef CONFIG_DEBUG_BUGVERBOSE
@@ -78,9 +78,9 @@ do {                                                          \
  */
 #define __WARN_FLAGS(flags)                                    \
 do {                                                           \
-       __auto_type f = BUGFLAG_WARNING|(flags);                \
+       __auto_type __flags = BUGFLAG_WARNING|(flags);          \
        instrumentation_begin();                                \
-       _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE);                  \
+       _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE);            \
        instrumentation_end();                                  \
 } while (0)
 
index 7516e4199b3c61cc90295a7506b52dff62183743..20fd0acd7d800b58a8b95b2ff807899af94cfae1 100644 (file)
@@ -28,15 +28,13 @@ typedef u16         compat_ipc_pid_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 struct compat_stat {
-       compat_dev_t    st_dev;
-       u16             __pad1;
+       u32             st_dev;
        compat_ino_t    st_ino;
        compat_mode_t   st_mode;
        compat_nlink_t  st_nlink;
        __compat_uid_t  st_uid;
        __compat_gid_t  st_gid;
-       compat_dev_t    st_rdev;
-       u16             __pad2;
+       u32             st_rdev;
        u32             st_size;
        u32             st_blksize;
        u32             st_blocks;
index 86e5e4e26fcbefc68c961d9395e163f6d840a878..e89772dc17f10e1643e6bc973d9e486c59381eff 100644 (file)
@@ -36,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, int action);
 #endif
 #endif
 
+extern void ap_init_aperfmperf(void);
+
 int mwait_usable(const struct cpuinfo_x86 *);
 
 unsigned int x86_family(unsigned int sig);
index 1261842d006c73a751354201d35a2afdb513c96a..66d3e3b1d24d8dc4f8689db628668d2859718fb7 100644 (file)
@@ -34,14 +34,17 @@ enum cpuid_leafs
        CPUID_8000_001F_EAX,
 };
 
+#define X86_CAP_FMT_NUM "%d:%d"
+#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
+
 #ifdef CONFIG_X86_FEATURE_NAMES
 extern const char * const x86_cap_flags[NCAPINTS*32];
 extern const char * const x86_power_flags[32];
 #define X86_CAP_FMT "%s"
 #define x86_cap_flag(flag) x86_cap_flags[flag]
 #else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#define X86_CAP_FMT X86_CAP_FMT_NUM
+#define x86_cap_flag x86_cap_flag_num
 #endif
 
 /*
index 73e643ae94b6f2206d8dac0fdd10477e7c1eb8df..6e0dbbf847f33ed92d1514a4085cc25ceb41fcb2 100644 (file)
 #define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-/* FREE!                                ( 7*32+10) */
+#define X86_FEATURE_XCOMPACTED         ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
 #define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE          ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_LFENCE   ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 #define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* "" PV unlock function */
 #define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* "" PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST          ( 8*32+22) /* Intel Trust Domain Extensions Guest */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
new file mode 100644 (file)
index 0000000..70b2db1
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPUID-related helpers/definitions
+ *
+ * Derived from arch/x86/kvm/cpuid.c
+ */
+
+#ifndef _ASM_X86_CPUID_H
+#define _ASM_X86_CPUID_H
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+       switch (function) {
+       case 4:
+       case 7:
+       case 0xb:
+       case 0xd:
+       case 0xf:
+       case 0x10:
+       case 0x12:
+       case 0x14:
+       case 0x17:
+       case 0x18:
+       case 0x1d:
+       case 0x1e:
+       case 0x1f:
+       case 0x8000001d:
+               return true;
+       }
+
+       return false;
+}
+
+#endif /* _ASM_X86_CPUID_H */
index 1231d63f836d81386f122c1022a7f5a25d503ef1..36369e76cc631ecbb488c175d3c330cebee88737 100644 (file)
  * cpu_feature_enabled().
  */
 
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP  0
-#else
-# define DISABLE_SMAP  (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
 #ifdef CONFIG_X86_UMIP
 # define DISABLE_UMIP  0
 #else
 # define DISABLE_SGX   (1 << (X86_FEATURE_SGX & 31))
 #endif
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+# define DISABLE_TDX_GUEST     0
+#else
+# define DISABLE_TDX_GUEST     (1 << (X86_FEATURE_TDX_GUEST & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -79,8 +79,8 @@
 #define DISABLED_MASK5 0
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
+#define DISABLED_MASK9 (DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        0
 #define DISABLED_MASK12        0
index 98938a68251cc49fc755328edea827fbe52d11df..bed74a0f2932dfdba626b239baddfbd07d06494f 100644 (file)
@@ -357,6 +357,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
                                                   runtime),            \
                                    func, __VA_ARGS__))
 
+#define efi_dxe_call(func, ...)                                                \
+       (efi_is_native()                                                \
+               ? efi_dxe_table->func(__VA_ARGS__)                      \
+               : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
+
 #else /* CONFIG_EFI_MIXED */
 
 static inline bool efi_is_64bit(void)
index 29fea180a6658e84bd0a094d4e767558cc91b4e1..cb0ff1055ab1632f0c22b1f470ea8d0e57a23ffd 100644 (file)
@@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled;
  * now struct_user_regs, they are different)
  */
 
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)        \
+#define ELF_CORE_COPY_REGS(pr_reg, regs)       \
 do {                                           \
        pr_reg[0] = regs->bx;                   \
        pr_reg[1] = regs->cx;                   \
@@ -128,6 +128,7 @@ do {                                                \
        pr_reg[7] = regs->ds;                   \
        pr_reg[8] = regs->es;                   \
        pr_reg[9] = regs->fs;                   \
+       savesegment(gs, pr_reg[10]);            \
        pr_reg[11] = regs->orig_ax;             \
        pr_reg[12] = regs->ip;                  \
        pr_reg[13] = regs->cs;                  \
@@ -136,18 +137,6 @@ do {                                               \
        pr_reg[16] = regs->ss;                  \
 } while (0);
 
-#define ELF_CORE_COPY_REGS(pr_reg, regs)       \
-do {                                           \
-       ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-       pr_reg[10] = get_user_gs(regs);         \
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)        \
-do {                                           \
-       ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-       savesegment(gs, pr_reg[10]);            \
-} while (0);
-
 #define ELF_PLATFORM   (utsname()->machine)
 #define set_personality_64bit()        do { } while (0)
 
index c83b3020350ac264ebd65217bc50e3e9c957376b..6b0f31fb53f7e27e6d21a2076914a808b24c0685 100644 (file)
@@ -162,7 +162,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
 }
 
 /* prctl */
-struct task_struct;
-extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
 
 #endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644 (file)
index e69de29..0000000
index 7924f27f5c8b14c52b7d1a742eb1459cf48e14ee..72184b0b2219e88502bad42ee77fec36bede52c3 100644 (file)
@@ -632,6 +632,10 @@ DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER,     exc_xen_hypervisor_callback);
 DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER,   exc_xen_unknown_trap);
 #endif
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE,          exc_virtualization_exception);
+#endif
+
 /* Device interrupts common/spurious */
 DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER,   common_interrupt);
 #ifdef CONFIG_X86_LOCAL_APIC
index 048b6d5aff504f394baeca8d3bf1f39e816bfdd5..def6ca121111ce2873749ea418c43169caadb412 100644 (file)
@@ -26,6 +26,7 @@
  *             _G      - parts with extra graphics on
  *             _X      - regular server parts
  *             _D      - micro server parts
+ *             _N,_P   - other mobile parts
  *
  *             Historical OPTDIFFs:
  *
 
 #define INTEL_FAM6_ALDERLAKE           0x97    /* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L         0x9A    /* Golden Cove / Gracemont */
+#define INTEL_FAM6_ALDERLAKE_N         0xBE
 
 #define INTEL_FAM6_RAPTORLAKE          0xB7
+#define INTEL_FAM6_RAPTORLAKE_P                0xBA
 
 /* "Small Core" Processors (Atom) */
 
index f6d91ecb80267840e8f3b17e10986a2af28ef6ec..1870b99c3356078e495ad4bcd12ac7e4cab3d4bb 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
 
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
@@ -210,8 +211,6 @@ void __iomem *ioremap(resource_size_t offset, unsigned long size);
 extern void iounmap(volatile void __iomem *addr);
 #define iounmap iounmap
 
-extern void set_iounmap_nonlazy(void);
-
 #ifdef __KERNEL__
 
 void memcpy_fromio(void *, const volatile void __iomem *, size_t);
@@ -258,37 +257,23 @@ static inline void slow_down_io(void)
 #endif
 
 #define BUILDIO(bwl, bw, type)                                         \
-static inline void out##bwl(unsigned type value, int port)             \
-{                                                                      \
-       asm volatile("out" #bwl " %" #bw "0, %w1"                       \
-                    : : "a"(value), "Nd"(port));                       \
-}                                                                      \
-                                                                       \
-static inline unsigned type in##bwl(int port)                          \
-{                                                                      \
-       unsigned type value;                                            \
-       asm volatile("in" #bwl " %w1, %" #bw "0"                        \
-                    : "=a"(value) : "Nd"(port));                       \
-       return value;                                                   \
-}                                                                      \
-                                                                       \
-static inline void out##bwl##_p(unsigned type value, int port)         \
+static inline void out##bwl##_p(type value, u16 port)                  \
 {                                                                      \
        out##bwl(value, port);                                          \
        slow_down_io();                                                 \
 }                                                                      \
                                                                        \
-static inline unsigned type in##bwl##_p(int port)                      \
+static inline type in##bwl##_p(u16 port)                               \
 {                                                                      \
-       unsigned type value = in##bwl(port);                            \
+       type value = in##bwl(port);                                     \
        slow_down_io();                                                 \
        return value;                                                   \
 }                                                                      \
                                                                        \
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
 {                                                                      \
        if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {          \
-               unsigned type *value = (unsigned type *)addr;           \
+               type *value = (type *)addr;                             \
                while (count) {                                         \
                        out##bwl(*value, port);                         \
                        value++;                                        \
@@ -301,10 +286,10 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
        }                                                               \
 }                                                                      \
                                                                        \
-static inline void ins##bwl(int port, void *addr, unsigned long count) \
+static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
 {                                                                      \
        if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {          \
-               unsigned type *value = (unsigned type *)addr;           \
+               type *value = (type *)addr;                             \
                while (count) {                                         \
                        *value = in##bwl(port);                         \
                        value++;                                        \
@@ -317,13 +302,11 @@ static inline void ins##bwl(int port, void *addr, unsigned long count)    \
        }                                                               \
 }
 
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l,  , u32)
+#undef BUILDIO
 
-#define inb inb
-#define inw inw
-#define inl inl
 #define inb_p inb_p
 #define inw_p inw_p
 #define inl_p inl_p
@@ -331,9 +314,6 @@ BUILDIO(l, , int)
 #define insw insw
 #define insl insl
 
-#define outb outb
-#define outw outw
-#define outl outl
 #define outb_p outb_p
 #define outw_p outw_p
 #define outl_p outl_p
index 111104d1c2cd1c2650f5e7786fbfe08d7e69ac80..7793e52d6237a42279c716e2b1b94d3fa03e48a6 100644 (file)
@@ -137,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
        if (!arch_irqs_disabled_flags(flags))
                arch_local_irq_enable();
 }
-#else
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_XEN_PV
-#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-#else
-#define SWAPGS swapgs
-#endif
-#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif
index 3c368b639c0462e2cd8b0757a255e94e4d56f68b..1a6d7e3f6c32c7f88917270b1f478608d6017830 100644 (file)
@@ -118,6 +118,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_register_region)
 KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
 KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
 KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
+KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
 KVM_X86_OP(get_msr_feature)
 KVM_X86_OP(can_emulate_instruction)
 KVM_X86_OP(apic_init_signal_blocked)
index d23e80a56eb867764527c356fcaa927c55cc7fc2..4ff36610af6ab5252d4956aa57d572bf45ee287a 100644 (file)
@@ -974,12 +974,10 @@ enum hv_tsc_page_status {
        HV_TSC_PAGE_UNSET = 0,
        /* TSC page MSR was written by the guest, update pending */
        HV_TSC_PAGE_GUEST_CHANGED,
-       /* TSC page MSR was written by KVM userspace, update pending */
+       /* TSC page update was triggered from the host side */
        HV_TSC_PAGE_HOST_CHANGED,
        /* TSC page was properly set up and is currently active  */
        HV_TSC_PAGE_SET,
-       /* TSC page is currently being updated and therefore is inactive */
-       HV_TSC_PAGE_UPDATING,
        /* TSC page was set up with an inaccessible GPA */
        HV_TSC_PAGE_BROKEN,
 };
@@ -1052,6 +1050,7 @@ enum kvm_apicv_inhibit {
        APICV_INHIBIT_REASON_X2APIC,
        APICV_INHIBIT_REASON_BLOCKIRQ,
        APICV_INHIBIT_REASON_ABSENT,
+       APICV_INHIBIT_REASON_SEV,
 };
 
 struct kvm_arch {
@@ -1485,6 +1484,7 @@ struct kvm_x86_ops {
        int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
        int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+       void (*guest_memory_reclaimed)(struct kvm *kvm);
 
        int (*get_msr_feature)(struct kvm_msr_entry *entry);
 
@@ -1585,8 +1585,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 #define kvm_arch_pmi_in_guest(vcpu) \
        ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+void kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
index 56935ebb1dfe1a418f3b2cb66f2b2cd33de4aef1..57bc74e112f20936d6ee2601443892ecc083b533 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
+#include <asm/tdx.h>
+
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
@@ -32,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 static inline long kvm_hypercall0(unsigned int nr)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr)
@@ -42,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
 static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1)
@@ -53,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
                                  unsigned long p2)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2)
@@ -64,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
                                  unsigned long p2, unsigned long p3)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -76,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
                                  unsigned long p4)
 {
        long ret;
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
        asm volatile(KVM_HYPERCALL
                     : "=a"(ret)
                     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
index e2c6f433ed100b0b131b8cb8008fa659fb9b97d4..88ceaf3648b32dbee9fc3fba8bfb61548d302328 100644 (file)
@@ -49,9 +49,6 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
 void __init sev_es_init_vc_handling(void);
 
 #define __bss_decrypted __section(".bss..decrypted")
@@ -89,6 +86,9 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
 
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
 /*
  * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
  * writing to or comparing values from the cr3 register.  Having the
index d6bfdfb0f0afe56710491bc5a19b63858dc205f7..0c3d3440fe27876c0438d16d9dead603498ea3e8 100644 (file)
@@ -131,10 +131,12 @@ extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 void reload_early_microcode(void);
 extern bool initrd_gone;
+void microcode_bsp_resume(void);
 #else
 static inline void __init load_ucode_bsp(void)                 { }
 static inline void load_ucode_ap(void)                         { }
 static inline void reload_early_microcode(void)                        { }
+static inline void microcode_bsp_resume(void)                  { }
 #endif
 
 #endif /* _ASM_X86_MICROCODE_H */
index 27516046117a389a80b962579b8c42e6f17e67b9..b8d40ddeab00f9b3989962d095c8d62f73a2d3dc 100644 (file)
@@ -141,7 +141,7 @@ do {                                                \
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)                 \
 do {                                           \
-       lazy_load_gs(0);                        \
+       loadsegment(gs, 0);                     \
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)                 \
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644 (file)
index e69de29..0000000
index b85147d75626e366422b7ed9d290e19ff19c3a4e..d71c7e8b738d2a309335f61b236079721568d85c 100644 (file)
@@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
 /* Structs and defines for the X86 specific MSI message format */
 
 typedef struct x86_msi_data {
-       u32     vector                  :  8,
-               delivery_mode           :  3,
-               dest_mode_logical       :  1,
-               reserved                :  2,
-               active_low              :  1,
-               is_level                :  1;
-
-       u32     dmar_subhandle;
+       union {
+               struct {
+                       u32     vector                  :  8,
+                               delivery_mode           :  3,
+                               dest_mode_logical       :  1,
+                               reserved                :  2,
+                               active_low              :  1,
+                               is_level                :  1;
+               };
+               u32     dmar_subhandle;
+       };
 } __attribute__ ((packed)) arch_msi_msg_data_t;
 #define arch_msi_msg_data      x86_msi_data
 
index 0eb90d21049e84a287f20b5abaf83ba359e0c824..5555b2f9af462e4e67da0a4b69b0df6fe6cd2f52 100644 (file)
 #define TSX_CTRL_RTM_DISABLE           BIT(0)  /* Disable RTM feature */
 #define TSX_CTRL_CPUID_CLEAR           BIT(1)  /* Disable TSX enumeration */
 
-/* SRBDS support */
 #define MSR_IA32_MCU_OPT_CTRL          0x00000123
-#define RNGDS_MITG_DIS                 BIT(0)
+#define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
+#define RTM_ALLOW                      BIT(1)  /* TSX development mode */
 
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
 #define MSR_AMD64_SEV                  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT      0
 #define MSR_AMD64_SEV_ES_ENABLED_BIT   1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT  2
 #define MSR_AMD64_SEV_ENABLED          BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
 #define MSR_AMD64_SEV_ES_ENABLED       BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED      BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
 
 #define MSR_AMD64_VIRT_SPEC_CTRL       0xc001011f
 
index d42e6c6b47b1e4791e5da4fac371e41027f77a51..65ec1965cd2810323ab71a8d5cb79851845237c4 100644 (file)
 #include <asm/errno.h>
 #include <asm/cpumask.h>
 #include <uapi/asm/msr.h>
-
-struct msr {
-       union {
-               struct {
-                       u32 l;
-                       u32 h;
-               };
-               u64 q;
-       };
-};
+#include <asm/shared/msr.h>
 
 struct msr_info {
        u32 msr_no;
index 1cb9c17a4cb4b1fba49646749ee2a6400ab6fb93..5c5f1e56c4048db1a725b450e6f700b484d150d2 100644 (file)
@@ -47,6 +47,7 @@ struct nmiaction {
 #define register_nmi_handler(t, fn, fg, n, init...)    \
 ({                                                     \
        static struct nmiaction init fn##_na = {        \
+               .list = LIST_HEAD_INIT(fn##_na.list),   \
                .handler = (fn),                        \
                .name = (n),                            \
                .flags = (fg),                          \
index a0627dfae5412a0b01fb57804fa2e6db454a7134..1307cd689d2a26c893b92c20038f4e24788d2cc4 100644 (file)
@@ -93,6 +93,15 @@ struct irq_routing_table {
        struct irq_info slots[];
 } __attribute__((packed));
 
+struct irt_routing_table {
+       u32 signature;                  /* IRT_SIGNATURE should be here */
+       u8 size;                        /* Number of entries provided */
+       u8 used;                        /* Number of entries actually used */
+       u16 exclusive_irqs;             /* IRQs devoted exclusively to
+                                          PCI usage */
+       struct irq_info slots[];
+} __attribute__((packed));
+
 extern unsigned int pcibios_irq_mask;
 
 extern raw_spinlock_t pci_config_lock;
index a3c33b79fb8659212f637055658dad544ecbdbcb..13c0d63ed55e428226ecd004dd078ed9d412eb55 100644 (file)
@@ -38,9 +38,9 @@
 #define arch_raw_cpu_ptr(ptr)                          \
 ({                                                     \
        unsigned long tcp_ptr__;                        \
-       asm volatile("add " __percpu_arg(1) ", %0"      \
-                    : "=r" (tcp_ptr__)                 \
-                    : "m" (this_cpu_off), "0" (ptr));  \
+       asm ("add " __percpu_arg(1) ", %0"              \
+            : "=r" (tcp_ptr__)                         \
+            : "m" (this_cpu_off), "0" (ptr));          \
        (typeof(*(ptr)) __kernel __force *)tcp_ptr__;   \
 })
 #else
index 58d9e4b1fa0add445ef39ff4032fc4ae5bde6a5c..b06e4c573adddad0d734aeb72cff9afc1744285b 100644 (file)
@@ -241,6 +241,11 @@ struct x86_pmu_capability {
 #define INTEL_PMC_IDX_FIXED_SLOTS      (INTEL_PMC_IDX_FIXED + 3)
 #define INTEL_PMC_MSK_FIXED_SLOTS      (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
 
+static inline bool use_fixed_pseudo_encoding(u64 code)
+{
+       return !(code & 0xff);
+}
+
 /*
  * We model BTS tracing as another fixed-mode PMC.
  *
index 40497a9020c6eb7f42eadc498cdd3b70325e219a..407084d9fd99a2eee1fa70504dcd6cc44196b12b 100644 (file)
@@ -559,10 +559,6 @@ static inline void update_page_count(int level, unsigned long pages) { }
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                                    unsigned int *level);
-
-struct mm_struct;
-extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
-                                  unsigned int *level);
 extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
index 1d5f14aff5f6fd975143874cf1010b32c999ee11..2e6c04d8a45b487868b87c1e75c047cf7b509020 100644 (file)
@@ -41,9 +41,6 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
        return __arch_override_mprotect_pkey(vma, prot, pkey);
 }
 
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
 
 #define mm_pkey_allocation_map(mm)     (mm->context.pkey_allocation_map)
@@ -118,11 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
        return 0;
 }
 
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-               unsigned long init_val);
-
 static inline int vma_pkey(struct vm_area_struct *vma)
 {
        unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
index feed36d44d0440f1739f1f63d4c7826331940132..12ef86b19910d3b80050aa3553f1ae1ee2e19672 100644 (file)
@@ -13,6 +13,8 @@ void syscall_init(void);
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
 void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
@@ -35,11 +39,9 @@ void xen_entry_INT80_compat(void);
 #endif
 
 void x86_configure_nx(void);
-void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl_common(struct task_struct *task, int option,
-                         unsigned long arg2);
+long do_arch_prctl_common(int option, unsigned long arg2);
 
 #endif /* _ASM_X86_PROTO_H */
index 4357e0f2cd5f702aacc7ec0a21ed2047bb5123cb..f4db78b09c8f0be1e0a904394d48f0a45b246cc8 100644 (file)
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
        bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
                    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
 
+       ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+                     regs->ip <  (unsigned long)entry_SYSRETQ_end);
 #ifdef CONFIG_IA32_EMULATION
        ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
                      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+       ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+                     regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
 #endif
 
        return ret;
index 331474b150f16964e1ba8788e6a623d0214aee62..fd6f6e5b755a7823c50ef4f6e2fe4daaee09caf5 100644 (file)
@@ -25,6 +25,7 @@ struct real_mode_header {
        u32     sev_es_trampoline_start;
 #endif
 #ifdef CONFIG_X86_64
+       u32     trampoline_start64;
        u32     trampoline_pgd;
 #endif
        /* ACPI S3 wakeup */
index 656ed6531d035d20d9aed5aaaddc79a32b065619..2e7890dd58a47714fbba4bbddb68e05605835ee5 100644 (file)
@@ -350,18 +350,6 @@ static inline void __loadsegment_fs(unsigned short value)
 #define savesegment(seg, value)                                \
        asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
-/*
- * x86-32 user GS accessors.  This is ugly and could do with some cleaning up.
- */
-#ifdef CONFIG_X86_32
-# define get_user_gs(regs)             (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v)          loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk)             ((tsk)->thread.gs)
-# define lazy_save_gs(v)               savesegment(gs, (v))
-# define lazy_load_gs(v)               loadsegment(gs, (v))
-# define load_gs_index(v)              loadsegment(gs, (v))
-#endif /* X86_32 */
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
index 896e48d45828cba79d15d5d9a98ea55fc62f9bbe..7590ac2570b964a80ff0e78f8044a1046626cec7 100644 (file)
@@ -50,7 +50,6 @@ extern unsigned long saved_video_mode;
 extern void reserve_standard_io_resources(void);
 extern void i386_reserve_resources(void);
 extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
 extern void startup_64_setup_env(unsigned long physbase);
 extern void early_setup_idt(void);
 extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -109,27 +108,19 @@ extern unsigned long _brk_end;
 void *extend_brk(size_t size, size_t align);
 
 /*
- * Reserve space in the brk section.  The name must be unique within
- * the file, and somewhat descriptive.  The size is in bytes.  Must be
- * used at file scope.
+ * Reserve space in the brk section.  The name must be unique within the file,
+ * and somewhat descriptive.  The size is in bytes.
  *
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to.  We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The allocation is done using inline asm (rather than using a section
+ * attribute on a normal variable) in order to allow the use of @nobits, so
+ * that it doesn't take up any space in the vmlinux file.
  */
-#define RESERVE_BRK(name,sz)                                           \
-       static void __section(".discard.text") __noendbr __used notrace \
-       __brk_reservation_fn_##name##__(void) {                         \
-               asm volatile (                                          \
-                       ".pushsection .brk_reservation,\"aw\",@nobits;" \
-                       ".brk." #name ":"                               \
-                       " 1:.skip %c0;"                                 \
-                       " .size .brk." #name ", . - 1b;"                \
-                       " .popsection"                                  \
-                       : : "i" (sz));                                  \
-       }
+#define RESERVE_BRK(name, size)                                                \
+       asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t"          \
+           ".brk." #name ":\n\t"                                       \
+           ".skip " __stringify(size) "\n\t"                           \
+           ".size .brk." #name ", " __stringify(size) "\n\t"           \
+           ".popsection\n\t")
 
 extern void probe_roms(void);
 #ifdef __i386__
index 1b2fd32b42fe4906d19b912486f5f7ba32e84ff4..b8357d6ecd47ef6766a0fe9fe5161f6447228c16 100644 (file)
 #define GHCB_MSR_AP_RESET_HOLD_REQ     0x006
 #define GHCB_MSR_AP_RESET_HOLD_RESP    0x007
 
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ           0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v)                    \
+       /* GHCBData[63:12] */                           \
+       (((u64)((v) & GENMASK_ULL(51, 0)) << 12) |      \
+       /* GHCBData[11:0] */                            \
+       GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP          0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v)                   \
+       /* GHCBData[63:12] */                           \
+       (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ *   0x0001    Page assignment, Private
+ *   0x0002    Page assignment, Shared
+ */
+enum psc_op {
+       SNP_PAGE_STATE_PRIVATE = 1,
+       SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ               0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op)                  \
+       /* GHCBData[55:52] */                           \
+       (((u64)((op) & 0xf) << 52) |                    \
+       /* GHCBData[51:12] */                           \
+       ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) |     \
+       /* GHCBData[11:0] */                            \
+       GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_RESP              0x015
+#define GHCB_MSR_PSC_RESP_VAL(val)                     \
+       /* GHCBData[63:32] */                           \
+       (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+
 /* GHCB Hypervisor Feature Request/Response */
 #define GHCB_MSR_HV_FT_REQ             0x080
 #define GHCB_MSR_HV_FT_RESP            0x081
+#define GHCB_MSR_HV_FT_RESP_VAL(v)                     \
+       /* GHCBData[63:12] */                           \
+       (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP                 BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION     BIT_ULL(1)
+
+/* SNP Page State Change NAE event */
+#define VMGEXIT_PSC_MAX_ENTRY          253
+
+struct psc_hdr {
+       u16 cur_entry;
+       u16 end_entry;
+       u32 reserved;
+} __packed;
+
+struct psc_entry {
+       u64     cur_page        : 12,
+               gfn             : 40,
+               operation       : 4,
+               pagesize        : 1,
+               reserved        : 7;
+} __packed;
+
+struct snp_psc_desc {
+       struct psc_hdr hdr;
+       struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+/* Guest message request error code */
+#define SNP_GUEST_REQ_INVALID_LEN      BIT_ULL(32)
 
 #define GHCB_MSR_TERM_REQ              0x100
 #define GHCB_MSR_TERM_REASON_SET_POS   12
         /* GHCBData[23:16] */                          \
        ((((u64)reason_val) & 0xff) << 16))
 
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN               0
 #define GHCB_SEV_ES_GEN_REQ            0
 #define GHCB_SEV_ES_PROT_UNSUPPORTED   1
+#define GHCB_SNP_UNSUPPORTED           2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX             1
+#define GHCB_TERM_REGISTER             0       /* GHCB GPA registration failure */
+#define GHCB_TERM_PSC                  1       /* Page State Change failure */
+#define GHCB_TERM_PVALIDATE            2       /* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0            3       /* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID                        4       /* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV             5       /* CPUID failure during hypervisor fallback */
 
 #define GHCB_RESP_CODE(v)              ((v) & GHCB_MSR_INFO_MASK)
 
index ec060c43358972671d34d445e3b6206e385da1ff..19514524f0f8bac6ae0add43b0ba4f054126a840 100644 (file)
 #include <linux/types.h>
 #include <asm/insn.h>
 #include <asm/sev-common.h>
+#include <asm/bootparam.h>
 
-#define GHCB_PROTO_OUR         0x0001UL
-#define GHCB_PROTOCOL_MAX      1ULL
+#define GHCB_PROTOCOL_MIN      1ULL
+#define GHCB_PROTOCOL_MAX      2ULL
 #define GHCB_DEFAULT_USAGE     0ULL
 
 #define        VMGEXIT()                       { asm volatile("rep; vmmcall\n\r"); }
@@ -42,6 +43,24 @@ struct es_em_ctxt {
        struct es_fault_info fi;
 };
 
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC  0x45444d41
+struct cc_blob_sev_info {
+       u32 magic;
+       u16 version;
+       u16 reserved;
+       u64 secrets_phys;
+       u32 secrets_len;
+       u32 rsvd1;
+       u64 cpuid_phys;
+       u32 cpuid_len;
+       u32 rsvd2;
+} __packed;
+
 void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
 
 static inline u64 lower_bits(u64 val, unsigned int bits)
@@ -60,6 +79,61 @@ extern void vc_no_ghcb(void);
 extern void vc_boot_ghcb(void);
 extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
 
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE                255
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K                 0
+
+#define RMPADJUST_VMSA_PAGE_BIT                BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+       unsigned long req_gpa;
+       unsigned long resp_gpa;
+       unsigned long data_gpa;
+       unsigned int data_npages;
+};
+
+struct sev_guest_platform_data {
+       u64 secrets_gpa;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+       u32 msg_seqno_0;
+       u32 msg_seqno_1;
+       u32 msg_seqno_2;
+       u32 msg_seqno_3;
+       u64 ap_jump_table_pa;
+       u8 rsvd[40];
+       u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN          32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page_layout {
+       u32 version;
+       u32 imien       : 1,
+           rsvd1       : 31;
+       u32 fms;
+       u32 rsvd2;
+       u8 gosvw[16];
+       u8 vmpck0[VMPCK_KEY_LEN];
+       u8 vmpck1[VMPCK_KEY_LEN];
+       u8 vmpck2[VMPCK_KEY_LEN];
+       u8 vmpck3[VMPCK_KEY_LEN];
+       struct secrets_os_area os_area;
+       u8 rsvd3[3840];
+} __packed;
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 extern struct static_key_false sev_es_enable_key;
 extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -87,12 +161,71 @@ extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
                                          struct es_em_ctxt *ctxt,
                                          u64 exit_code, u64 exit_info_1,
                                          u64 exit_info_2);
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+       int rc;
+
+       /* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+       asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+                    : "=a"(rc)
+                    : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+                    : "memory", "cc");
+
+       return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+       bool no_rmpupdate;
+       int rc;
+
+       /* "pvalidate" mnemonic support in binutils 2.36 and newer */
+       asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+                    CC_SET(c)
+                    : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+                    : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+                    : "memory", "cc");
+
+       if (no_rmpupdate)
+               return PVALIDATE_FAIL_NOUPDATE;
+
+       return rc;
+}
+void setup_ghcb(void);
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+                                        unsigned int npages);
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+                                       unsigned int npages);
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void snp_abort(void);
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
 static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_abort(void) { }
+static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input,
+                                         unsigned long *fw_err)
+{
+       return -ENOTTY;
+}
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644 (file)
index 0000000..c0ef921
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type)                                         \
+static inline void __out##bwl(type value, u16 port)                    \
+{                                                                      \
+       asm volatile("out" #bwl " %" #bw "0, %w1"                       \
+                    : : "a"(value), "Nd"(port));                       \
+}                                                                      \
+                                                                       \
+static inline type __in##bwl(u16 port)                                 \
+{                                                                      \
+       type value;                                                     \
+       asm volatile("in" #bwl " %w1, %" #bw "0"                        \
+                    : "=a"(value) : "Nd"(port));                       \
+       return value;                                                   \
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l,  , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644 (file)
index 0000000..1e6ec10
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+       union {
+               struct {
+                       u32 l;
+                       u32 h;
+               };
+               u64 q;
+       };
+};
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644 (file)
index 0000000..e53f262
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD  0
+
+#define TDX_HCALL_HAS_OUTPUT   BIT(0)
+#define TDX_HCALL_ISSUE_STI    BIT(1)
+
+#define TDX_CPUID_LEAF_ID      0x21
+#define TDX_IDENT              "IntelTDX    "
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used in __tdx_hypercall() to pass down and get back registers' values of
+ * the TDCALL instruction when requesting services from the VMM.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_hypercall_args {
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+};
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags);
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
index d17b39893b7973073f21814e41d841aed5fec11a..bab490379c65f6b29b4ffa110c627790bd126de3 100644 (file)
 
 #ifdef __ASSEMBLY__
 
-#ifdef CONFIG_X86_SMAP
-
 #define ASM_CLAC \
        ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
 
 #define ASM_STAC \
        ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
 
-#else /* CONFIG_X86_SMAP */
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
 #else /* __ASSEMBLY__ */
 
-#ifdef CONFIG_X86_SMAP
-
 static __always_inline void clac(void)
 {
        /* Note: a barrier is implicit in alternative() */
@@ -76,19 +65,6 @@ static __always_inline void smap_restore(unsigned long flags)
 #define ASM_STAC \
        ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
 
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-static inline unsigned long smap_save(void) { return 0; }
-static inline void smap_restore(unsigned long flags) { }
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_SMAP_H */
index 68c257a3de0d393b9ba904526fb485bb5f12ab38..45b18eb94fa1a854933f78ed6df4371d4855bb4d 100644 (file)
@@ -184,14 +184,15 @@ static inline void wbinvd(void)
        native_wbinvd();
 }
 
-#ifdef CONFIG_X86_64
 
 static inline void load_gs_index(unsigned int selector)
 {
+#ifdef CONFIG_X86_64
        native_load_gs_index(selector);
-}
-
+#else
+       loadsegment(gs, selector);
 #endif
+}
 
 #endif /* CONFIG_PARAVIRT_XXL */
 
index ed4f8bb6c2d9c293f2ca39c9917669afcef83f77..2d8dacd026437a2968c29226f7f9aeaefa96f45e 100644 (file)
@@ -26,6 +26,7 @@
            ".align 4                                           \n"     \
            ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
            STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
+           ANNOTATE_NOENDBR                                            \
            insns "                                             \n"     \
            ".byte 0x53, 0x43, 0x54                             \n"     \
            ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n"     \
@@ -38,6 +39,8 @@
 #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)                       \
        __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
 
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)                       \
+       ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
 
 #define ARCH_ADD_TRAMP_KEY(name)                                       \
        asm(".pushsection .static_call_tramp_key, \"a\"         \n"     \
index 7b132d0312ebfa914507a2d365e0bc59ce952d2b..a800abb1a99255b7ece0f95f2adc9953303da532 100644 (file)
@@ -19,7 +19,6 @@ struct saved_context {
        u16 gs;
        unsigned long cr0, cr2, cr3, cr4;
        u64 misc_enable;
-       bool misc_enable_saved;
        struct saved_msrs saved_msrs;
        struct desc_ptr gdt_desc;
        struct desc_ptr idt;
@@ -28,6 +27,7 @@ struct saved_context {
        unsigned long tr;
        unsigned long safety;
        unsigned long return_address;
+       bool misc_enable_saved;
 } __attribute__((packed));
 
 /* routines for saving/restoring kernel state */
index 35bb35d28733e52b7fe61886558bde878fbf01d3..54df06687d8348acbce916d589c53ddf73198ea2 100644 (file)
  * Image of the saved processor state, used by the low level ACPI suspend to
  * RAM code and by the low level hibernation code.
  *
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
  */
 struct saved_context {
        struct pt_regs regs;
@@ -36,7 +40,6 @@ struct saved_context {
 
        unsigned long cr0, cr2, cr3, cr4;
        u64 misc_enable;
-       bool misc_enable_saved;
        struct saved_msrs saved_msrs;
        unsigned long efer;
        u16 gdt_pad; /* Unused */
@@ -48,6 +51,7 @@ struct saved_context {
        unsigned long tr;
        unsigned long safety;
        unsigned long return_address;
+       bool misc_enable_saved;
 } __attribute__((packed));
 
 #define loaddebug(thread,register) \
index f70a5108d46421d7d5887526790eead68b8fa921..ec623e7da33d5d46ead369fdfc1a31381676cfb3 100644 (file)
@@ -271,6 +271,7 @@ struct vmcb_seg {
        u64 base;
 } __packed;
 
+/* Save area definition for legacy and SEV-MEM guests */
 struct vmcb_save_area {
        struct vmcb_seg es;
        struct vmcb_seg cs;
@@ -282,12 +283,12 @@ struct vmcb_save_area {
        struct vmcb_seg ldtr;
        struct vmcb_seg idtr;
        struct vmcb_seg tr;
-       u8 reserved_1[43];
+       u8 reserved_1[42];
+       u8 vmpl;
        u8 cpl;
        u8 reserved_2[4];
        u64 efer;
-       u8 reserved_3[104];
-       u64 xss;                /* Valid for SEV-ES only */
+       u8 reserved_3[112];
        u64 cr4;
        u64 cr3;
        u64 cr0;
@@ -297,7 +298,9 @@ struct vmcb_save_area {
        u64 rip;
        u8 reserved_4[88];
        u64 rsp;
-       u8 reserved_5[24];
+       u64 s_cet;
+       u64 ssp;
+       u64 isst_addr;
        u64 rax;
        u64 star;
        u64 lstar;
@@ -308,29 +311,145 @@ struct vmcb_save_area {
        u64 sysenter_esp;
        u64 sysenter_eip;
        u64 cr2;
-       u8 reserved_6[32];
+       u8 reserved_5[32];
        u64 g_pat;
        u64 dbgctl;
        u64 br_from;
        u64 br_to;
        u64 last_excp_from;
        u64 last_excp_to;
-
-       /*
-        * The following part of the save area is valid only for
-        * SEV-ES guests when referenced through the GHCB or for
-        * saving to the host save area.
-        */
-       u8 reserved_7[72];
+       u8 reserved_6[72];
        u32 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
-       u8 reserved_7b[4];
+} __packed;
+
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+       struct vmcb_seg es;
+       struct vmcb_seg cs;
+       struct vmcb_seg ss;
+       struct vmcb_seg ds;
+       struct vmcb_seg fs;
+       struct vmcb_seg gs;
+       struct vmcb_seg gdtr;
+       struct vmcb_seg ldtr;
+       struct vmcb_seg idtr;
+       struct vmcb_seg tr;
+       u64 vmpl0_ssp;
+       u64 vmpl1_ssp;
+       u64 vmpl2_ssp;
+       u64 vmpl3_ssp;
+       u64 u_cet;
+       u8 reserved_1[2];
+       u8 vmpl;
+       u8 cpl;
+       u8 reserved_2[4];
+       u64 efer;
+       u8 reserved_3[104];
+       u64 xss;
+       u64 cr4;
+       u64 cr3;
+       u64 cr0;
+       u64 dr7;
+       u64 dr6;
+       u64 rflags;
+       u64 rip;
+       u64 dr0;
+       u64 dr1;
+       u64 dr2;
+       u64 dr3;
+       u64 dr0_addr_mask;
+       u64 dr1_addr_mask;
+       u64 dr2_addr_mask;
+       u64 dr3_addr_mask;
+       u8 reserved_4[24];
+       u64 rsp;
+       u64 s_cet;
+       u64 ssp;
+       u64 isst_addr;
+       u64 rax;
+       u64 star;
+       u64 lstar;
+       u64 cstar;
+       u64 sfmask;
+       u64 kernel_gs_base;
+       u64 sysenter_cs;
+       u64 sysenter_esp;
+       u64 sysenter_eip;
+       u64 cr2;
+       u8 reserved_5[32];
+       u64 g_pat;
+       u64 dbgctl;
+       u64 br_from;
+       u64 br_to;
+       u64 last_excp_from;
+       u64 last_excp_to;
+       u8 reserved_7[80];
        u32 pkru;
-       u8 reserved_7a[20];
-       u64 reserved_8;         /* rax already available at 0x01f8 */
+       u8 reserved_8[20];
+       u64 reserved_9;         /* rax already available at 0x01f8 */
+       u64 rcx;
+       u64 rdx;
+       u64 rbx;
+       u64 reserved_10;        /* rsp already available at 0x01d8 */
+       u64 rbp;
+       u64 rsi;
+       u64 rdi;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+       u8 reserved_11[16];
+       u64 guest_exit_info_1;
+       u64 guest_exit_info_2;
+       u64 guest_exit_int_info;
+       u64 guest_nrip;
+       u64 sev_features;
+       u64 vintr_ctrl;
+       u64 guest_exit_code;
+       u64 virtual_tom;
+       u64 tlb_id;
+       u64 pcpu_id;
+       u64 event_inj;
+       u64 xcr0;
+       u8 reserved_12[16];
+
+       /* Floating point area */
+       u64 x87_dp;
+       u32 mxcsr;
+       u16 x87_ftw;
+       u16 x87_fsw;
+       u16 x87_fcw;
+       u16 x87_fop;
+       u16 x87_ds;
+       u16 x87_cs;
+       u64 x87_rip;
+       u8 fpreg_x87[80];
+       u8 fpreg_xmm[256];
+       u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+       u8 reserved_1[203];
+       u8 cpl;
+       u8 reserved_2[116];
+       u64 xss;
+       u8 reserved_3[24];
+       u64 dr7;
+       u8 reserved_4[16];
+       u64 rip;
+       u8 reserved_5[88];
+       u64 rsp;
+       u8 reserved_6[24];
+       u64 rax;
+       u8 reserved_7[264];
        u64 rcx;
        u64 rdx;
        u64 rbx;
-       u64 reserved_9;         /* rsp already available at 0x01d8 */
+       u8 reserved_8[8];
        u64 rbp;
        u64 rsi;
        u64 rdi;
@@ -342,22 +461,24 @@ struct vmcb_save_area {
        u64 r13;
        u64 r14;
        u64 r15;
-       u8 reserved_10[16];
+       u8 reserved_9[16];
        u64 sw_exit_code;
        u64 sw_exit_info_1;
        u64 sw_exit_info_2;
        u64 sw_scratch;
-       u8 reserved_11[56];
+       u8 reserved_10[56];
        u64 xcr0;
        u8 valid_bitmap[16];
        u64 x87_state_gpa;
 } __packed;
 
+#define GHCB_SHARED_BUF_SIZE   2032
+
 struct ghcb {
-       struct vmcb_save_area save;
-       u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+       struct ghcb_save_area save;
+       u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
 
-       u8 shared_buffer[2032];
+       u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
 
        u8 reserved_1[10];
        u16 protocol_version;   /* negotiated SEV-ES/GHCB protocol version */
@@ -365,13 +486,17 @@ struct ghcb {
 } __packed;
 
 
-#define EXPECTED_VMCB_SAVE_AREA_SIZE           1032
+#define EXPECTED_VMCB_SAVE_AREA_SIZE           740
+#define EXPECTED_GHCB_SAVE_AREA_SIZE           1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE         1648
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE                1024
 #define EXPECTED_GHCB_SIZE                     PAGE_SIZE
 
 static inline void __unused_size_checks(void)
 {
        BUILD_BUG_ON(sizeof(struct vmcb_save_area)      != EXPECTED_VMCB_SAVE_AREA_SIZE);
+       BUILD_BUG_ON(sizeof(struct ghcb_save_area)      != EXPECTED_GHCB_SAVE_AREA_SIZE);
+       BUILD_BUG_ON(sizeof(struct sev_es_save_area)    != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
        BUILD_BUG_ON(sizeof(struct vmcb_control_area)   != EXPECTED_VMCB_CONTROL_AREA_SIZE);
        BUILD_BUG_ON(sizeof(struct ghcb)                != EXPECTED_GHCB_SIZE);
 }
@@ -441,7 +566,7 @@ struct vmcb {
 /* GHCB Accessor functions */
 
 #define GHCB_BITMAP_IDX(field)                                                 \
-       (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+       (offsetof(struct ghcb_save_area, field) / sizeof(u64))
 
 #define DEFINE_GHCB_ACCESSORS(field)                                           \
        static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb)     \
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644 (file)
index 0000000..020c81a
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <asm/ptrace.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR                      _BITUL(63)
+#define TDX_SW_ERROR                   (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID     (TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used to gather the output registers values of the TDCALL and SEAMCALL
+ * instructions when requesting services from the TDX module.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_module_output {
+       u64 rcx;
+       u64 rdx;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+};
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+       u64 exit_reason;
+       u64 exit_qual;
+       /* Guest Linear (virtual) Address */
+       u64 gla;
+       /* Guest Physical Address */
+       u64 gpa;
+       u32 instr_len;
+       u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+/* Used to communicate with the TDX module */
+u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+                     struct tdx_module_output *out);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_safe_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+                      unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+                                    unsigned long p2, unsigned long p3,
+                                    unsigned long p4)
+{
+       return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_TDX_H */
index 9619385bf7494b947330bb1203e30207300bb029..458c891a82736549469c2c09b38848f18634412a 100644 (file)
@@ -212,30 +212,19 @@ static inline long arch_scale_freq_capacity(int cpu)
 }
 #define arch_scale_freq_capacity arch_scale_freq_capacity
 
-extern void arch_scale_freq_tick(void);
-#define arch_scale_freq_tick arch_scale_freq_tick
-
 extern void arch_set_max_freq_ratio(bool turbo_disabled);
-void init_freq_invariance(bool secondary, bool cppc_ready);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
 #else
-static inline void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-}
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
 #endif
 
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
 #ifdef CONFIG_ACPI_CPPC_LIB
 void init_freq_invariance_cppc(void);
 #define arch_init_invariance_cppc init_freq_invariance_cppc
-
-bool amd_set_max_freq_ratio(u64 *ratio);
-#else
-static inline bool amd_set_max_freq_ratio(u64 *ratio)
-{
-       return false;
-}
 #endif
 
 #endif /* _ASM_X86_TOPOLOGY_H */
index 35317c5c551d98eee0fd664ccfcfcfbb7e8cd92a..47ecfff2c83dade6d993461c1b9c7494a05ffb28 100644 (file)
@@ -13,7 +13,7 @@
 #ifdef CONFIG_X86_64
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
 void __init trap_init(void);
 asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
 #endif
index b25d3f82c2f36a9a210863d0540f2adc5744238a..bea5cdcdf53252bf4fed4199ee2c7b11a48416ac 100644 (file)
@@ -10,6 +10,7 @@
 #define SETUP_EFI                      4
 #define SETUP_APPLE_PROPERTIES         5
 #define SETUP_JAILHOUSE                        6
+#define SETUP_CC_BLOB                  7
 
 #define SETUP_INDIRECT                 (1<<31)
 
@@ -187,7 +188,8 @@ struct boot_params {
        __u32 ext_ramdisk_image;                        /* 0x0c0 */
        __u32 ext_ramdisk_size;                         /* 0x0c4 */
        __u32 ext_cmd_line_ptr;                         /* 0x0c8 */
-       __u8  _pad4[116];                               /* 0x0cc */
+       __u8  _pad4[112];                               /* 0x0cc */
+       __u32 cc_blob_address;                          /* 0x13c */
        struct edid_info edid_info;                     /* 0x140 */
        struct efi_info efi_info;                       /* 0x1c0 */
        __u32 alt_mem_k;                                /* 0x1e0 */
index efa969325ede55d39a3b86b7e72eb2f4b62f78a2..f69c168391aa58cf280ba6fb5aca6630e3593d1a 100644 (file)
 #define SVM_VMGEXIT_AP_JUMP_TABLE              0x80000005
 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE          0
 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE          1
+#define SVM_VMGEXIT_PSC                                0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST              0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST          0x80000012
+#define SVM_VMGEXIT_AP_CREATION                        0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT          0
+#define SVM_VMGEXIT_AP_CREATE                  1
+#define SVM_VMGEXIT_AP_DESTROY                 2
+#define SVM_VMGEXIT_HV_FEATURES                        0x8000fffd
 #define SVM_VMGEXIT_UNSUPPORTED_EVENT          0x8000ffff
 
 /* Exit code reserved for hypervisor/software use */
        { SVM_VMGEXIT_NMI_COMPLETE,     "vmgexit_nmi_complete" }, \
        { SVM_VMGEXIT_AP_HLT_LOOP,      "vmgexit_ap_hlt_loop" }, \
        { SVM_VMGEXIT_AP_JUMP_TABLE,    "vmgexit_ap_jump_table" }, \
+       { SVM_VMGEXIT_PSC,              "vmgexit_page_state_change" }, \
+       { SVM_VMGEXIT_GUEST_REQUEST,    "vmgexit_guest_request" }, \
+       { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+       { SVM_VMGEXIT_AP_CREATION,      "vmgexit_ap_creation" }, \
+       { SVM_VMGEXIT_HV_FEATURES,      "vmgexit_hypervisor_feature" }, \
        { SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
index c41ef42adbe8a3b13d10442b3d3b590b1ac40836..1a2dc328cb5ee82f465a3e230b9712285d6344e9 100644 (file)
@@ -46,8 +46,6 @@ endif
 # non-deterministic coverage.
 KCOV_INSTRUMENT                := n
 
-CFLAGS_head$(BITS).o   += -fno-stack-protector
-
 CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
 
 obj-y                  := process_$(BITS).o signal.o
index 0d01e7f5078c25a6b9d1c933691cebe3b6b30240..6d2c50819501ecb09a5b9d84c17cf49d3683ef05 100644 (file)
@@ -65,6 +65,13 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 static bool acpi_support_online_capable;
 #endif
 
+#ifdef CONFIG_X86_64
+/* Physical address of the Multiprocessor Wakeup Structure mailbox */
+static u64 acpi_mp_wake_mailbox_paddr;
+/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
+static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
 /*
  * Locks related to IOAPIC hotplug
@@ -336,7 +343,60 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
        return 0;
 }
 
-#endif                         /*CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_X86_64
+static int acpi_wakeup_cpu(int apicid, unsigned long start_ip)
+{
+       /*
+        * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
+        *
+        * Wakeup of secondary CPUs is fully serialized in the core code.
+        * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
+        */
+       if (!acpi_mp_wake_mailbox) {
+               acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
+                                               sizeof(*acpi_mp_wake_mailbox),
+                                               MEMREMAP_WB);
+       }
+
+       /*
+        * Mailbox memory is shared between the firmware and OS. Firmware will
+        * listen on mailbox command address, and once it receives the wakeup
+        * command, the CPU associated with the given apicid will be booted.
+        *
+        * The value of 'apic_id' and 'wakeup_vector' must be visible to the
+        * firmware before the wakeup command is visible.  smp_store_release()
+        * ensures ordering and visibility.
+        */
+       acpi_mp_wake_mailbox->apic_id       = apicid;
+       acpi_mp_wake_mailbox->wakeup_vector = start_ip;
+       smp_store_release(&acpi_mp_wake_mailbox->command,
+                         ACPI_MP_WAKE_COMMAND_WAKEUP);
+
+       /*
+        * Wait for the CPU to wake up.
+        *
+        * The CPU being woken up is essentially in a spin loop waiting to be
+        * woken up. It should not take long for it wake up and acknowledge by
+        * zeroing out ->command.
+        *
+        * ACPI specification doesn't provide any guidance on how long kernel
+        * has to wait for a wake up acknowledgement. It also doesn't provide
+        * a way to cancel a wake up request if it takes too long.
+        *
+        * In TDX environment, the VMM has control over how long it takes to
+        * wake up secondary. It can postpone scheduling secondary vCPU
+        * indefinitely. Giving up on wake up request and reporting error opens
+        * possible attack vector for VMM: it can wake up a secondary CPU when
+        * kernel doesn't expect it. Wait until positive result of the wake up
+        * request.
+        */
+       while (READ_ONCE(acpi_mp_wake_mailbox->command))
+               cpu_relax();
+
+       return 0;
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_IO_APIC
 #define MP_ISA_BUS             0
@@ -1083,6 +1143,29 @@ static int __init acpi_parse_madt_lapic_entries(void)
        }
        return 0;
 }
+
+#ifdef CONFIG_X86_64
+static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+                                    const unsigned long end)
+{
+       struct acpi_madt_multiproc_wakeup *mp_wake;
+
+       if (!IS_ENABLED(CONFIG_SMP))
+               return -ENODEV;
+
+       mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
+       if (BAD_MADT_ENTRY(mp_wake, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(&header->common);
+
+       acpi_mp_wake_mailbox_paddr = mp_wake->base_address;
+
+       acpi_wake_cpu_handler_update(acpi_wakeup_cpu);
+
+       return 0;
+}
+#endif                         /* CONFIG_X86_64 */
 #endif                         /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_IO_APIC
@@ -1278,6 +1361,14 @@ static void __init acpi_process_madt(void)
 
                                smp_found_config = 1;
                        }
+
+#ifdef CONFIG_X86_64
+                       /*
+                        * Parse MADT MP Wake entry.
+                        */
+                       acpi_table_parse_madt(ACPI_MADT_TYPE_MULTIPROC_WAKEUP,
+                                             acpi_parse_mp_wake, 1);
+#endif
                }
                if (error == -EINVAL) {
                        /*
index df1644d9b3b66c635a8bf99241c3fd15be97fcbd..8b8cbf22461a4feff2249a2d731b538a31c30501 100644 (file)
@@ -50,20 +50,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
        return err;
 }
 
-bool amd_set_max_freq_ratio(u64 *ratio)
+static void amd_set_max_freq_ratio(void)
 {
        struct cppc_perf_caps perf_caps;
        u64 highest_perf, nominal_perf;
        u64 perf_ratio;
        int rc;
 
-       if (!ratio)
-               return false;
-
        rc = cppc_get_perf_caps(0, &perf_caps);
        if (rc) {
                pr_debug("Could not retrieve perf counters (%d)\n", rc);
-               return false;
+               return;
        }
 
        highest_perf = amd_get_highest_perf();
@@ -71,7 +68,7 @@ bool amd_set_max_freq_ratio(u64 *ratio)
 
        if (!highest_perf || !nominal_perf) {
                pr_debug("Could not retrieve highest or nominal performance\n");
-               return false;
+               return;
        }
 
        perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
@@ -79,25 +76,27 @@ bool amd_set_max_freq_ratio(u64 *ratio)
        perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
        if (!perf_ratio) {
                pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
-               return false;
+               return;
        }
 
-       *ratio = perf_ratio;
-       arch_set_max_freq_ratio(false);
-
-       return true;
+       freq_invariance_set_perf_ratio(perf_ratio, false);
 }
 
 static DEFINE_MUTEX(freq_invariance_lock);
 
 void init_freq_invariance_cppc(void)
 {
-       static bool secondary;
+       static bool init_done;
 
-       mutex_lock(&freq_invariance_lock);
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               return;
 
-       init_freq_invariance(secondary, true);
-       secondary = true;
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+               return;
 
+       mutex_lock(&freq_invariance_lock);
+       if (!init_done)
+               amd_set_max_freq_ratio();
+       init_done = true;
        mutex_unlock(&freq_invariance_lock);
 }
index b70344bf660083bbeb998bc0b74737a21047d46b..3c8f2c797a9865760d08f2fd8f918c805d65d648 100644 (file)
@@ -2551,6 +2551,16 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
 }
 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
 
+#ifdef CONFIG_X86_64
+void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler)
+{
+       struct apic **drv;
+
+       for (drv = __apicdrivers; drv < __apicdrivers_end; drv++)
+               (*drv)->wakeup_secondary_cpu_64 = handler;
+}
+#endif
+
 /*
  * Override the generic EOI implementation with an optimized version.
  * Only called during early boot when only one CPU is active and with
index c1bb384935b0555597c2fa500cbef9141d895c83..a868b76cd3d4241eea784dc2c1486d5bbba1cd11 100644 (file)
@@ -65,6 +65,7 @@
 #include <asm/irq_remapping.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/pgtable.h>
 
 #define        for_each_ioapic(idx)            \
        for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
@@ -2677,6 +2678,19 @@ static struct resource * __init ioapic_setup_resources(void)
        return res;
 }
 
+static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+{
+       pgprot_t flags = FIXMAP_PAGE_NOCACHE;
+
+       /*
+        * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot
+        * bits, just like normal ioremap():
+        */
+       flags = pgprot_decrypted(flags);
+
+       __set_fixmap(idx, phys, flags);
+}
+
 void __init io_apic_init_mappings(void)
 {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
@@ -2709,7 +2723,7 @@ fake_ioapic_page:
                                      __func__, PAGE_SIZE, PAGE_SIZE);
                        ioapic_phys = __pa(ioapic_phys);
                }
-               set_fixmap_nocache(idx, ioapic_phys);
+               io_apic_set_fixmap(idx, ioapic_phys);
                apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
                        __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
                        ioapic_phys);
@@ -2838,7 +2852,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
        ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
        ioapics[idx].mp_config.apicaddr = address;
 
-       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+       io_apic_set_fixmap(FIX_IO_APIC_BASE_0 + idx, address);
        if (bad_ioapic_register(idx)) {
                clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
                return -ENODEV;
index f5a48e66e4f5462f83c4a76e89554377372964c6..48285522796454b4b368e8ff1c0287e4bef7d574 100644 (file)
@@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void)
        int mmr_shift;
        char *state;
 
-       /* Different returns from different UV BIOS versions */
+       /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */
+       if (!is_uv(UV2|UV3|UV4)) {
+               mark_tsc_async_resets("UV5+");
+               return;
+       }
+
+       /* UV2,3,4, UV BIOS TSC sync state available */
        mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
        mmr_shift =
                is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
@@ -1340,7 +1346,7 @@ static void __init decode_gam_params(unsigned long ptr)
 static void __init decode_gam_rng_tbl(unsigned long ptr)
 {
        struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
-       unsigned long lgre = 0;
+       unsigned long lgre = 0, gend = 0;
        int index = 0;
        int sock_min = 999999, pnode_min = 99999;
        int sock_max = -1, pnode_max = -1;
@@ -1374,6 +1380,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
                        flag, size, suffix[order],
                        gre->type, gre->nasid, gre->sockid, gre->pnode);
 
+               if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+                       gend = (unsigned long)gre->limit << UV_GAM_RANGE_SHFT;
+
                /* update to next range start */
                lgre = gre->limit;
                if (sock_min > gre->sockid)
@@ -1391,7 +1400,8 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
        _max_pnode      = pnode_max;
        _gr_table_len   = index;
 
-       pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+       pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x), pnodes(min:%x,max:%x), gap_end(%d)\n",
+         index, _min_socket, _max_socket, _min_pnode, _max_pnode, fls64(gend));
 }
 
 /* Walk through UVsystab decoding the fields */
index 9fb0a2f8b62a2da72cd1915eb73ea6a5880984a1..437308004ef2e4f1345947ce318c62bc56a6036f 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
+#include <asm/tdx.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -65,6 +66,22 @@ static void __used common(void)
        OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
 #endif
 
+       BLANK();
+       OFFSET(TDX_MODULE_rcx, tdx_module_output, rcx);
+       OFFSET(TDX_MODULE_rdx, tdx_module_output, rdx);
+       OFFSET(TDX_MODULE_r8,  tdx_module_output, r8);
+       OFFSET(TDX_MODULE_r9,  tdx_module_output, r9);
+       OFFSET(TDX_MODULE_r10, tdx_module_output, r10);
+       OFFSET(TDX_MODULE_r11, tdx_module_output, r11);
+
+       BLANK();
+       OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
+       OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
+       OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
+       OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
+       OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
+       OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+
        BLANK();
        OFFSET(BP_scratch, boot_params, scratch);
        OFFSET(BP_secure_boot, boot_params, secure_boot);
index 9ca008f9e9b1ad8449b0053ab4a99e51e7c30bf7..1f60a2b279368e94c79f64ea43f736baf966f39f 100644 (file)
  * Copyright (C) 2017 Intel Corp.
  * Author: Len Brown <len.brown@intel.com>
  */
-
+#include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/ktime.h>
 #include <linux/math64.h>
 #include <linux/percpu.h>
-#include <linux/cpufreq.h>
-#include <linux/smp.h>
-#include <linux/sched/isolation.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/isolation.h>
+#include <linux/sched/topology.h>
+#include <linux/smp.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 
 #include "cpu.h"
 
-struct aperfmperf_sample {
-       unsigned int    khz;
-       atomic_t        scfpending;
-       ktime_t time;
-       u64     aperf;
-       u64     mperf;
+struct aperfmperf {
+       seqcount_t      seq;
+       unsigned long   last_update;
+       u64             acnt;
+       u64             mcnt;
+       u64             aperf;
+       u64             mperf;
 };
 
-static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
+       .seq = SEQCNT_ZERO(cpu_samples.seq)
+};
 
-#define APERFMPERF_CACHE_THRESHOLD_MS  10
-#define APERFMPERF_REFRESH_DELAY_MS    10
-#define APERFMPERF_STALE_THRESHOLD_MS  1000
+static void init_counter_refs(void)
+{
+       u64 aperf, mperf;
+
+       rdmsrl(MSR_IA32_APERF, aperf);
+       rdmsrl(MSR_IA32_MPERF, mperf);
 
+       this_cpu_write(cpu_samples.aperf, aperf);
+       this_cpu_write(cpu_samples.mperf, mperf);
+}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
 /*
- * aperfmperf_snapshot_khz()
- * On the current CPU, snapshot APERF, MPERF, and jiffies
- * unless we already did it within 10ms
- * calculate kHz, save snapshot
+ * APERF/MPERF frequency ratio computation.
+ *
+ * The scheduler wants to do frequency invariant accounting and needs a <1
+ * ratio to account for the 'current' frequency, corresponding to
+ * freq_curr / freq_max.
+ *
+ * Since the frequency freq_curr on x86 is controlled by micro-controller and
+ * our P-state setting is little more than a request/hint, we need to observe
+ * the effective frequency 'BusyMHz', i.e. the average frequency over a time
+ * interval after discarding idle time. This is given by:
+ *
+ *   BusyMHz = delta_APERF / delta_MPERF * freq_base
+ *
+ * where freq_base is the max non-turbo P-state.
+ *
+ * The freq_max term has to be set to a somewhat arbitrary value, because we
+ * can't know which turbo states will be available at a given point in time:
+ * it all depends on the thermal headroom of the entire package. We set it to
+ * the turbo level with 4 cores active.
+ *
+ * Benchmarks show that's a good compromise between the 1C turbo ratio
+ * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
+ * which would ignore the entire turbo range (a conspicuous part, making
+ * freq_curr/freq_max always maxed out).
+ *
+ * An exception to the heuristic above is the Atom uarch, where we choose the
+ * highest turbo level for freq_max since Atom's are generally oriented towards
+ * power efficiency.
+ *
+ * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
+ * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
  */
-static void aperfmperf_snapshot_khz(void *dummy)
+
+DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
+static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
+
+void arch_set_max_freq_ratio(bool turbo_disabled)
 {
-       u64 aperf, aperf_delta;
-       u64 mperf, mperf_delta;
-       struct aperfmperf_sample *s = this_cpu_ptr(&samples);
-       unsigned long flags;
+       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
+                                       arch_turbo_freq_ratio;
+}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
 
-       local_irq_save(flags);
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-       local_irq_restore(flags);
+static bool __init turbo_disabled(void)
+{
+       u64 misc_en;
+       int err;
+
+       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
+       if (err)
+               return false;
+
+       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
+}
+
+static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+       int err;
+
+       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
+       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
+
+       return true;
+}
+
+#define X86_MATCH(model)                                       \
+       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
+               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
+
+static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(XEON_PHI_KNL),
+       X86_MATCH(XEON_PHI_KNM),
+       {}
+};
+
+static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(SKYLAKE_X),
+       {}
+};
+
+static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
+       X86_MATCH(ATOM_GOLDMONT),
+       X86_MATCH(ATOM_GOLDMONT_D),
+       X86_MATCH(ATOM_GOLDMONT_PLUS),
+       {}
+};
+
+static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
+                                         int num_delta_fratio)
+{
+       int fratio, delta_fratio, found;
+       int err, i;
+       u64 msr;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+       if (err)
+               return false;
+
+       fratio = (msr >> 8) & 0xFF;
+       i = 16;
+       found = 0;
+       do {
+               if (found >= num_delta_fratio) {
+                       *turbo_freq = fratio;
+                       return true;
+               }
+
+               delta_fratio = (msr >> (i + 5)) & 0x7;
+
+               if (delta_fratio) {
+                       found += 1;
+                       fratio -= delta_fratio;
+               }
+
+               i += 8;
+       } while (i < 64);
+
+       return true;
+}
+
+static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
+{
+       u64 ratios, counts;
+       u32 group_size;
+       int err, i;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
+       if (err)
+               return false;
+
+       for (i = 0; i < 64; i += 8) {
+               group_size = (counts >> i) & 0xFF;
+               if (group_size >= size) {
+                       *turbo_freq = (ratios >> i) & 0xFF;
+                       return true;
+               }
+       }
+
+       return false;
+}
 
-       aperf_delta = aperf - s->aperf;
-       mperf_delta = mperf - s->mperf;
+static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
+{
+       u64 msr;
+       int err;
+
+       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
+       if (err)
+               return false;
+
+       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
+       if (err)
+               return false;
+
+       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
+       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
+
+       /* The CPU may have less than 4 cores */
+       if (!*turbo_freq)
+               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
+
+       return true;
+}
+
+static bool __init intel_set_max_freq_ratio(void)
+{
+       u64 base_freq, turbo_freq;
+       u64 turbo_ratio;
 
+       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
+               goto out;
+
+       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
+           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+               goto out;
+
+       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
+           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
+               goto out;
+
+       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
+           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
+               goto out;
+
+       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
+               goto out;
+
+       return false;
+
+out:
        /*
-        * There is no architectural guarantee that MPERF
-        * increments faster than we can read it.
+        * Some hypervisors advertise X86_FEATURE_APERFMPERF
+        * but then fill all MSR's with zeroes.
+        * Some CPUs have turbo boost but don't declare any turbo ratio
+        * in MSR_TURBO_RATIO_LIMIT.
         */
-       if (mperf_delta == 0)
-               return;
+       if (!base_freq || !turbo_freq) {
+               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
+               return false;
+       }
 
-       s->time = ktime_get();
-       s->aperf = aperf;
-       s->mperf = mperf;
-       s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
-       atomic_set_release(&s->scfpending, 0);
+       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
+       if (!turbo_ratio) {
+               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
+               return false;
+       }
+
+       arch_turbo_freq_ratio = turbo_ratio;
+       arch_set_max_freq_ratio(turbo_disabled());
+
+       return true;
 }
 
-static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+#ifdef CONFIG_PM_SLEEP
+static struct syscore_ops freq_invariance_syscore_ops = {
+       .resume = init_counter_refs,
+};
+
+static void register_freq_invariance_syscore_ops(void)
 {
-       s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
-       struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+       register_syscore_ops(&freq_invariance_syscore_ops);
+}
+#else
+static inline void register_freq_invariance_syscore_ops(void) {}
+#endif
 
-       /* Don't bother re-computing within the cache threshold time. */
-       if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
-               return true;
+static void freq_invariance_enable(void)
+{
+       if (static_branch_unlikely(&arch_scale_freq_key)) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+       static_branch_enable(&arch_scale_freq_key);
+       register_freq_invariance_syscore_ops();
+       pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
+}
+
+void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
+{
+       arch_turbo_freq_ratio = ratio;
+       arch_set_max_freq_ratio(turbo_disabled);
+       freq_invariance_enable();
+}
+
+static void __init bp_init_freq_invariance(void)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return;
 
-       if (!atomic_xchg(&s->scfpending, 1) || wait)
-               smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+       if (intel_set_max_freq_ratio())
+               freq_invariance_enable();
+}
 
-       /* Return false if the previous iteration was too long ago. */
-       return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
+static void disable_freq_invariance_workfn(struct work_struct *work)
+{
+       static_branch_disable(&arch_scale_freq_key);
 }
 
-unsigned int aperfmperf_get_khz(int cpu)
+static DECLARE_WORK(disable_freq_invariance_work,
+                   disable_freq_invariance_workfn);
+
+DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void scale_freq_tick(u64 acnt, u64 mcnt)
 {
-       if (!cpu_khz)
-               return 0;
+       u64 freq_scale;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return 0;
+       if (!arch_scale_freq_invariant())
+               return;
 
-       if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-               return 0;
+       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
+               goto error;
 
-       if (rcu_is_idle_cpu(cpu))
-               return 0; /* Idle CPUs are completely uninteresting. */
+       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+               goto error;
 
-       aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
-       return per_cpu(samples.khz, cpu);
+       freq_scale = div64_u64(acnt, mcnt);
+       if (!freq_scale)
+               goto error;
+
+       if (freq_scale > SCHED_CAPACITY_SCALE)
+               freq_scale = SCHED_CAPACITY_SCALE;
+
+       this_cpu_write(arch_freq_scale, freq_scale);
+       return;
+
+error:
+       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
+       schedule_work(&disable_freq_invariance_work);
 }
+#else
+static inline void bp_init_freq_invariance(void) { }
+static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
+#endif /* CONFIG_X86_64 && CONFIG_SMP */
 
-void arch_freq_prepare_all(void)
+void arch_scale_freq_tick(void)
 {
-       ktime_t now = ktime_get();
-       bool wait = false;
-       int cpu;
+       struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
+       u64 acnt, mcnt, aperf, mperf;
 
-       if (!cpu_khz)
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                return;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return;
+       rdmsrl(MSR_IA32_APERF, aperf);
+       rdmsrl(MSR_IA32_MPERF, mperf);
+       acnt = aperf - s->aperf;
+       mcnt = mperf - s->mperf;
 
-       for_each_online_cpu(cpu) {
-               if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-                       continue;
-               if (rcu_is_idle_cpu(cpu))
-                       continue; /* Idle CPUs are completely uninteresting. */
-               if (!aperfmperf_snapshot_cpu(cpu, now, false))
-                       wait = true;
-       }
+       s->aperf = aperf;
+       s->mperf = mperf;
+
+       raw_write_seqcount_begin(&s->seq);
+       s->last_update = jiffies;
+       s->acnt = acnt;
+       s->mcnt = mcnt;
+       raw_write_seqcount_end(&s->seq);
 
-       if (wait)
-               msleep(APERFMPERF_REFRESH_DELAY_MS);
+       scale_freq_tick(acnt, mcnt);
 }
 
+/*
+ * Discard samples older than the define maximum sample age of 20ms. There
+ * is no point in sending IPIs in such a case. If the scheduler tick was
+ * not running then the CPU is either idle or isolated.
+ */
+#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
+
 unsigned int arch_freq_get_on_cpu(int cpu)
 {
-       struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu);
+       struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
+       unsigned int seq, freq;
+       unsigned long last;
+       u64 acnt, mcnt;
 
-       if (!cpu_khz)
-               return 0;
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               goto fallback;
 
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return 0;
+       do {
+               seq = raw_read_seqcount_begin(&s->seq);
+               last = s->last_update;
+               acnt = s->acnt;
+               mcnt = s->mcnt;
+       } while (read_seqcount_retry(&s->seq, seq));
 
-       if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
-               return 0;
+       /*
+        * Bail on invalid count and when the last update was too long ago,
+        * which covers idle and NOHZ full CPUs.
+        */
+       if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
+               goto fallback;
+
+       return div64_u64((cpu_khz * acnt), mcnt);
+
+fallback:
+       freq = cpufreq_quick_get(cpu);
+       return freq ? freq : cpu_khz;
+}
 
-       if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
-               return per_cpu(samples.khz, cpu);
+static int __init bp_init_aperfmperf(void)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               return 0;
 
-       msleep(APERFMPERF_REFRESH_DELAY_MS);
-       atomic_set(&s->scfpending, 1);
-       smp_mb(); /* ->scfpending before smp_call_function_single(). */
-       smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+       init_counter_refs();
+       bp_init_freq_invariance();
+       return 0;
+}
+early_initcall(bp_init_aperfmperf);
 
-       return per_cpu(samples.khz, cpu);
+void ap_init_aperfmperf(void)
+{
+       if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
+               init_counter_refs();
 }
index 6296e1ebed1dbef3e7a1d147d14bf720dcd0cb5e..d879a6c93609a6f655bd76f1efbb7750b68de5b7 100644 (file)
@@ -446,6 +446,13 @@ void update_srbds_msr(void)
        if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
                return;
 
+       /*
+        * A MDS_NO CPU for which SRBDS mitigation is not needed due to TSX
+        * being disabled and it hasn't received the SRBDS MSR microcode.
+        */
+       if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
+               return;
+
        rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
 
        switch (srbds_mitigation) {
index ed4417500700416768b6096a905e3e5d090b4d98..2e9142797c99786054a3182f281a53094f4d999c 100644 (file)
@@ -60,6 +60,7 @@
 #include <asm/uv/uv.h>
 #include <asm/sigframe.h>
 #include <asm/traps.h>
+#include <asm/sev.h>
 
 #include "cpu.h"
 
@@ -298,13 +299,6 @@ static int __init cachesize_setup(char *str)
 }
 __setup("cachesize=", cachesize_setup);
 
-static int __init x86_sep_setup(char *s)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SEP);
-       return 1;
-}
-__setup("nosep", x86_sep_setup);
-
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 {
@@ -376,26 +370,12 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
-static __init int setup_disable_smep(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SMEP);
-       return 1;
-}
-__setup("nosmep", setup_disable_smep);
-
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
        if (cpu_has(c, X86_FEATURE_SMEP))
                cr4_set_bits(X86_CR4_SMEP);
 }
 
-static __init int setup_disable_smap(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_SMAP);
-       return 1;
-}
-__setup("nosmap", setup_disable_smap);
-
 static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 {
        unsigned long eflags = native_save_fl();
@@ -403,14 +383,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
        /* This should have been cleared long ago */
        BUG_ON(eflags & X86_EFLAGS_AC);
 
-       if (cpu_has(c, X86_FEATURE_SMAP)) {
-#ifdef CONFIG_X86_SMAP
+       if (cpu_has(c, X86_FEATURE_SMAP))
                cr4_set_bits(X86_CR4_SMAP);
-#else
-               clear_cpu_cap(c, X86_FEATURE_SMAP);
-               cr4_clear_bits(X86_CR4_SMAP);
-#endif
-       }
 }
 
 static __always_inline void setup_umip(struct cpuinfo_x86 *c)
@@ -1368,8 +1342,8 @@ static void detect_nopl(void)
 static void __init cpu_parse_early_param(void)
 {
        char arg[128];
-       char *argptr = arg;
-       int arglen, res, bit;
+       char *argptr = arg, *opt;
+       int arglen, taint = 0;
 
 #ifdef CONFIG_X86_32
        if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -1397,21 +1371,61 @@ static void __init cpu_parse_early_param(void)
                return;
 
        pr_info("Clearing CPUID bits:");
-       do {
-               res = get_option(&argptr, &bit);
-               if (res == 0 || res == 3)
-                       break;
 
-               /* If the argument was too long, the last bit may be cut off */
-               if (res == 1 && arglen >= sizeof(arg))
-                       break;
+       while (argptr) {
+               bool found __maybe_unused = false;
+               unsigned int bit;
 
-               if (bit >= 0 && bit < NCAPINTS * 32) {
-                       pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+               opt = strsep(&argptr, ",");
+
+               /*
+                * Handle naked numbers first for feature flags which don't
+                * have names.
+                */
+               if (!kstrtouint(opt, 10, &bit)) {
+                       if (bit < NCAPINTS * 32) {
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+                               /* empty-string, i.e., ""-defined feature flags */
+                               if (!x86_cap_flags[bit])
+                                       pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
+                               else
+#endif
+                                       pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+
+                               setup_clear_cpu_cap(bit);
+                               taint++;
+                       }
+                       /*
+                        * The assumption is that there are no feature names with only
+                        * numbers in the name thus go to the next argument.
+                        */
+                       continue;
+               }
+
+#ifdef CONFIG_X86_FEATURE_NAMES
+               for (bit = 0; bit < 32 * NCAPINTS; bit++) {
+                       if (!x86_cap_flag(bit))
+                               continue;
+
+                       if (strcmp(x86_cap_flag(bit), opt))
+                               continue;
+
+                       pr_cont(" %s", opt);
                        setup_clear_cpu_cap(bit);
+                       taint++;
+                       found = true;
+                       break;
                }
-       } while (res == 2);
+
+               if (!found)
+                       pr_cont(" (unknown: %s)", opt);
+#endif
+       }
        pr_cont("\n");
+
+       if (taint)
+               add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
 }
 
 /*
@@ -1855,15 +1869,9 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
        validate_apic_and_package_id(c);
        x86_spec_ctrl_setup_ap();
        update_srbds_msr();
-}
 
-static __init int setup_noclflush(char *arg)
-{
-       setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
-       setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
-       return 1;
+       tsx_ap_init();
 }
-__setup("noclflush", setup_noclflush);
 
 void print_cpu_info(struct cpuinfo_x86 *c)
 {
@@ -2124,6 +2132,9 @@ void cpu_init_exception_handling(void)
 
        load_TR_desc();
 
+       /* GHCB needs to be setup to handle #VC. */
+       setup_ghcb();
+
        /* Finally load the IDT */
        load_current_idt();
 }
index ee6f23f7587d402cc6c1f4b0e6b7358dcb78a7f6..2a8e584fc991384813cab20de6b1746fbe8f1e38 100644 (file)
@@ -55,11 +55,10 @@ enum tsx_ctrl_states {
 extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
 
 extern void __init tsx_init(void);
-extern void tsx_enable(void);
-extern void tsx_disable(void);
-extern void tsx_clear_cpuid(void);
+void tsx_ap_init(void);
 #else
 static inline void tsx_init(void) { }
+static inline void tsx_ap_init(void) { }
 #endif /* CONFIG_CPU_SUP_INTEL */
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
index 8321c43554a1d2e2cb615b4f9d6af463eb0f082f..f7a5370a9b3b83d4ed129e44879d774d6a8be6e4 100644 (file)
@@ -717,13 +717,6 @@ static void init_intel(struct cpuinfo_x86 *c)
 
        init_intel_misc_features(c);
 
-       if (tsx_ctrl_state == TSX_CTRL_ENABLE)
-               tsx_enable();
-       else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
-               tsx_disable();
-       else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
-               tsx_clear_cpuid();
-
        split_lock_init();
        bus_lock_init();
 
index 1940d305db1c0fc6549dc38569121c85c5d62e14..1c87501e0fa3dd82fd6c290f562d062bb54d10d6 100644 (file)
@@ -1294,10 +1294,23 @@ out_free:
        kfree(bank);
 }
 
+static void __threshold_remove_device(struct threshold_bank **bp)
+{
+       unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+
+       for (bank = 0; bank < numbanks; bank++) {
+               if (!bp[bank])
+                       continue;
+
+               threshold_remove_bank(bp[bank]);
+               bp[bank] = NULL;
+       }
+       kfree(bp);
+}
+
 int mce_threshold_remove_device(unsigned int cpu)
 {
        struct threshold_bank **bp = this_cpu_read(threshold_banks);
-       unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
 
        if (!bp)
                return 0;
@@ -1308,13 +1321,7 @@ int mce_threshold_remove_device(unsigned int cpu)
         */
        this_cpu_write(threshold_banks, NULL);
 
-       for (bank = 0; bank < numbanks; bank++) {
-               if (bp[bank]) {
-                       threshold_remove_bank(bp[bank]);
-                       bp[bank] = NULL;
-               }
-       }
-       kfree(bp);
+       __threshold_remove_device(bp);
        return 0;
 }
 
@@ -1351,15 +1358,14 @@ int mce_threshold_create_device(unsigned int cpu)
                if (!(this_cpu_read(bank_map) & (1 << bank)))
                        continue;
                err = threshold_create_bank(bp, cpu, bank);
-               if (err)
-                       goto out_err;
+               if (err) {
+                       __threshold_remove_device(bp);
+                       return err;
+               }
        }
        this_cpu_write(threshold_banks, bp);
 
        if (thresholding_irq_en)
                mce_threshold_vector = amd_threshold_interrupt;
        return 0;
-out_err:
-       mce_threshold_remove_device(cpu);
-       return err;
 }
index 981496e6bc0e41d44ee8f4802ef70deb10cbb8d8..d775fcd74e98d268d4251402d10c1cbf5a52e66f 100644 (file)
@@ -69,7 +69,9 @@ DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
 
 struct mce_bank {
        u64                     ctl;                    /* subevents to enable */
-       bool                    init;                   /* initialise bank? */
+
+       __u64 init                      : 1,            /* initialise bank? */
+             __reserved_1              : 63;
 };
 static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
 
index 1add869353497d690dc4cf0ca6054b41823f8b5a..00483d1c27e4f4a55acd87129ded713332fe43b5 100644 (file)
@@ -301,85 +301,65 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
        }
 }
 
-static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+/* See AMD PPR(s) section Machine Check Error Handling. */
+static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
 {
-       u64 mcx_cfg;
+       char *panic_msg = NULL;
+       int ret;
 
        /*
-        * We need to look at the following bits:
-        * - "succor" bit (data poisoning support), and
-        * - TCC bit (Task Context Corrupt)
-        * in MCi_STATUS to determine error severity.
+        * Default return value: Action required, the error must be handled
+        * immediately.
         */
-       if (!mce_flags.succor)
-               return MCE_PANIC_SEVERITY;
-
-       mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank));
-
-       /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
-       if ((mcx_cfg & MCI_CONFIG_MCAX) &&
-           (m->status & MCI_STATUS_TCC) &&
-           (err_ctx == IN_KERNEL))
-               return MCE_PANIC_SEVERITY;
-
-        /* ...otherwise invoke hwpoison handler. */
-       return MCE_AR_SEVERITY;
-}
-
-/*
- * See AMD Error Scope Hierarchy table in a newer BKDG. For example
- * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
- */
-static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
-{
-       enum context ctx = error_context(m, regs);
+       ret = MCE_AR_SEVERITY;
 
        /* Processor Context Corrupt, no need to fumble too much, die! */
-       if (m->status & MCI_STATUS_PCC)
-               return MCE_PANIC_SEVERITY;
-
-       if (m->status & MCI_STATUS_UC) {
-
-               if (ctx == IN_KERNEL)
-                       return MCE_PANIC_SEVERITY;
+       if (m->status & MCI_STATUS_PCC) {
+               panic_msg = "Processor Context Corrupt";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
 
-               /*
-                * On older systems where overflow_recov flag is not present, we
-                * should simply panic if an error overflow occurs. If
-                * overflow_recov flag is present and set, then software can try
-                * to at least kill process to prolong system operation.
-                */
-               if (mce_flags.overflow_recov) {
-                       if (mce_flags.smca)
-                               return mce_severity_amd_smca(m, ctx);
-
-                       /* kill current process */
-                       return MCE_AR_SEVERITY;
-               } else {
-                       /* at least one error was not logged */
-                       if (m->status & MCI_STATUS_OVER)
-                               return MCE_PANIC_SEVERITY;
-               }
-
-               /*
-                * For any other case, return MCE_UC_SEVERITY so that we log the
-                * error and exit #MC handler.
-                */
-               return MCE_UC_SEVERITY;
+       if (m->status & MCI_STATUS_DEFERRED) {
+               ret = MCE_DEFERRED_SEVERITY;
+               goto out;
        }
 
        /*
-        * deferred error: poll handler catches these and adds to mce_ring so
-        * memory-failure can take recovery actions.
+        * If the UC bit is not set, the system either corrected or deferred
+        * the error. No action will be required after logging the error.
         */
-       if (m->status & MCI_STATUS_DEFERRED)
-               return MCE_DEFERRED_SEVERITY;
+       if (!(m->status & MCI_STATUS_UC)) {
+               ret = MCE_KEEP_SEVERITY;
+               goto out;
+       }
 
        /*
-        * corrected error: poll handler catches these and passes responsibility
-        * of decoding the error to EDAC
+        * On MCA overflow, without the MCA overflow recovery feature the
+        * system will not be able to recover, panic.
         */
-       return MCE_KEEP_SEVERITY;
+       if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
+               panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
+
+       if (!mce_flags.succor) {
+               panic_msg = "Uncorrected error without MCA Recovery";
+               ret = MCE_PANIC_SEVERITY;
+               goto out;
+       }
+
+       if (error_context(m, regs) == IN_KERNEL) {
+               panic_msg = "Uncorrected unrecoverable error in kernel context";
+               ret = MCE_PANIC_SEVERITY;
+       }
+
+out:
+       if (msg && panic_msg)
+               *msg = panic_msg;
+
+       return ret;
 }
 
 static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
index f955d25076bab97e4791fc488b75c7683ba0ef14..239ff5fcec6a20df6ebac18b695e046f6fa55a22 100644 (file)
@@ -758,9 +758,9 @@ static struct subsys_interface mc_cpu_interface = {
 };
 
 /**
- * mc_bp_resume - Update boot CPU microcode during resume.
+ * microcode_bsp_resume - Update boot CPU microcode during resume.
  */
-static void mc_bp_resume(void)
+void microcode_bsp_resume(void)
 {
        int cpu = smp_processor_id();
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -772,7 +772,7 @@ static void mc_bp_resume(void)
 }
 
 static struct syscore_ops mc_syscore_ops = {
-       .resume                 = mc_bp_resume,
+       .resume                 = microcode_bsp_resume,
 };
 
 static int mc_cpu_starting(unsigned int cpu)
index 4eec8889b0ff1de48787fd162985bf930b89fd11..099b6f0d96bdc1c369b493867bc703fa228ab892 100644 (file)
@@ -84,14 +84,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
        if (cpu_has(c, X86_FEATURE_TSC)) {
-               unsigned int freq = aperfmperf_get_khz(cpu);
-
-               if (!freq)
-                       freq = cpufreq_quick_get(cpu);
-               if (!freq)
-                       freq = cpu_khz;
-               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-                          freq / 1000, (freq % 1000));
+               unsigned int freq = arch_freq_get_on_cpu(cpu);
+
+               seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
        }
 
        /* Cache size */
index 83f901e2c2df9e9b99b97bf9bf9fea271cdcf427..f276aff521e8b851b0cf2641aaec1adef59a4c15 100644 (file)
@@ -341,14 +341,14 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
        /* Check whether cpus belong to parent ctrl group */
        cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
                return -EINVAL;
        }
 
        /* Check whether cpus are dropped from this group */
        cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                /* Give any dropped cpus to parent rdtgroup */
                cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
                update_closid_rmid(tmpmask, prgrp);
@@ -359,7 +359,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
         * and update per-cpu rmid
         */
        cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                head = &prgrp->mon.crdtgrp_list;
                list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
                        if (crgrp == rdtgrp)
@@ -394,7 +394,7 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
        /* Check whether cpus are dropped from this group */
        cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                /* Can't drop from default group */
                if (rdtgrp == &rdtgroup_default) {
                        rdt_last_cmd_puts("Can't drop CPUs from default group\n");
@@ -413,12 +413,12 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
         * and update per-cpu closid/rmid.
         */
        cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
                        if (r == rdtgrp)
                                continue;
                        cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
-                       if (cpumask_weight(tmpmask1))
+                       if (!cpumask_empty(tmpmask1))
                                cpumask_rdtgrp_clear(r, tmpmask1);
                }
                update_closid_rmid(tmpmask, rdtgrp);
@@ -488,7 +488,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 
        /* check that user didn't specify any offline cpus */
        cpumask_andnot(tmpmask, newmask, cpu_online_mask);
-       if (cpumask_weight(tmpmask)) {
+       if (!cpumask_empty(tmpmask)) {
                ret = -EINVAL;
                rdt_last_cmd_puts("Can only assign online CPUs\n");
                goto unlock;
index 9c7a5f049292966362384cb81e396630ff3e7a92..ec7bbac3a9f29aa4f34b5d9ba0de4d9c58381cc0 100644 (file)
@@ -19,7 +19,7 @@
 
 enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
 
-void tsx_disable(void)
+static void tsx_disable(void)
 {
        u64 tsx;
 
@@ -39,7 +39,7 @@ void tsx_disable(void)
        wrmsrl(MSR_IA32_TSX_CTRL, tsx);
 }
 
-void tsx_enable(void)
+static void tsx_enable(void)
 {
        u64 tsx;
 
@@ -58,7 +58,7 @@ void tsx_enable(void)
        wrmsrl(MSR_IA32_TSX_CTRL, tsx);
 }
 
-static bool __init tsx_ctrl_is_supported(void)
+static bool tsx_ctrl_is_supported(void)
 {
        u64 ia32_cap = x86_read_arch_cap_msr();
 
@@ -84,7 +84,45 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
        return TSX_CTRL_ENABLE;
 }
 
-void tsx_clear_cpuid(void)
+/*
+ * Disabling TSX is not a trivial business.
+ *
+ * First of all, there's a CPUID bit: X86_FEATURE_RTM_ALWAYS_ABORT
+ * which says that TSX is practically disabled (all transactions are
+ * aborted by default). When that bit is set, the kernel unconditionally
+ * disables TSX.
+ *
+ * In order to do that, however, it needs to dance a bit:
+ *
+ * 1. The first method to disable it is through MSR_TSX_FORCE_ABORT and
+ * the MSR is present only when *two* CPUID bits are set:
+ *
+ * - X86_FEATURE_RTM_ALWAYS_ABORT
+ * - X86_FEATURE_TSX_FORCE_ABORT
+ *
+ * 2. The second method is for CPUs which do not have the above-mentioned
+ * MSR: those use a different MSR - MSR_IA32_TSX_CTRL and disable TSX
+ * through that one. Those CPUs can also have the initially mentioned
+ * CPUID bit X86_FEATURE_RTM_ALWAYS_ABORT set and for those the same strategy
+ * applies: TSX gets disabled unconditionally.
+ *
+ * When either of the two methods are present, the kernel disables TSX and
+ * clears the respective RTM and HLE feature flags.
+ *
+ * An additional twist in the whole thing presents late microcode loading
+ * which, when done, may cause for the X86_FEATURE_RTM_ALWAYS_ABORT CPUID
+ * bit to be set after the update.
+ *
+ * A subsequent hotplug operation on any logical CPU except the BSP will
+ * cause for the supported CPUID feature bits to get re-detected and, if
+ * RTM and HLE get cleared all of a sudden, but, userspace did consult
+ * them before the update, then funny explosions will happen. Long story
+ * short: the kernel doesn't modify CPUID feature bits after booting.
+ *
+ * That's why, this function's call in init_intel() doesn't clear the
+ * feature flags.
+ */
+static void tsx_clear_cpuid(void)
 {
        u64 msr;
 
@@ -97,6 +135,39 @@ void tsx_clear_cpuid(void)
                rdmsrl(MSR_TSX_FORCE_ABORT, msr);
                msr |= MSR_TFA_TSX_CPUID_CLEAR;
                wrmsrl(MSR_TSX_FORCE_ABORT, msr);
+       } else if (tsx_ctrl_is_supported()) {
+               rdmsrl(MSR_IA32_TSX_CTRL, msr);
+               msr |= TSX_CTRL_CPUID_CLEAR;
+               wrmsrl(MSR_IA32_TSX_CTRL, msr);
+       }
+}
+
+/*
+ * Disable TSX development mode
+ *
+ * When the microcode released in Feb 2022 is applied, TSX will be disabled by
+ * default on some processors. MSR 0x122 (TSX_CTRL) and MSR 0x123
+ * (IA32_MCU_OPT_CTRL) can be used to re-enable TSX for development, doing so is
+ * not recommended for production deployments. In particular, applying MD_CLEAR
+ * flows for mitigation of the Intel TSX Asynchronous Abort (TAA) transient
+ * execution attack may not be effective on these processors when Intel TSX is
+ * enabled with updated microcode.
+ */
+static void tsx_dev_mode_disable(void)
+{
+       u64 mcu_opt_ctrl;
+
+       /* Check if RTM_ALLOW exists */
+       if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() ||
+           !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
+               return;
+
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
+
+       if (mcu_opt_ctrl & RTM_ALLOW) {
+               mcu_opt_ctrl &= ~RTM_ALLOW;
+               wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
+               setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
        }
 }
 
@@ -105,14 +176,14 @@ void __init tsx_init(void)
        char arg[5] = {};
        int ret;
 
+       tsx_dev_mode_disable();
+
        /*
-        * Hardware will always abort a TSX transaction if both CPUID bits
-        * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is
-        * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them
-        * here.
+        * Hardware will always abort a TSX transaction when the CPUID bit
+        * RTM_ALWAYS_ABORT is set. In this case, it is better not to enumerate
+        * CPUID.RTM and CPUID.HLE bits. Clear them here.
         */
-       if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) &&
-           boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+       if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
                tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT;
                tsx_clear_cpuid();
                setup_clear_cpu_cap(X86_FEATURE_RTM);
@@ -175,3 +246,16 @@ void __init tsx_init(void)
                setup_force_cpu_cap(X86_FEATURE_HLE);
        }
 }
+
+void tsx_ap_init(void)
+{
+       tsx_dev_mode_disable();
+
+       if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+               tsx_enable();
+       else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+               tsx_disable();
+       else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
+               /* See comment over that function for more details. */
+               tsx_clear_cpuid();
+}
index a7f617a3981d451c002317ac9bb64ab3baeaaa27..97529552dd2496421cd7f684c38e7a3629ad40c5 100644 (file)
@@ -37,7 +37,6 @@ static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
        } else
                memcpy(buf, vaddr + offset, csize);
 
-       set_iounmap_nonlazy();
        iounmap((void __iomem *)vaddr);
        return csize;
 }
index c049561f373a0c6b1f7a7aa8a8346f0f898c1304..e28ab0ecc5378be41a8aa66b88987931412251ff 100644 (file)
@@ -41,17 +41,7 @@ struct fpu_state_config fpu_user_cfg __ro_after_init;
  */
 struct fpstate init_fpstate __ro_after_init;
 
-/*
- * Track whether the kernel is using the FPU state
- * currently.
- *
- * This flag is used:
- *
- *   - by IRQ context code to potentially use the FPU
- *     if it's unused.
- *
- *   - to debug kernel_fpu_begin()/end() correctness
- */
+/* Track in-kernel FPU usage */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
 
 /*
@@ -59,42 +49,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
  */
 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
-static bool kernel_fpu_disabled(void)
-{
-       return this_cpu_read(in_kernel_fpu);
-}
-
-static bool interrupted_kernel_fpu_idle(void)
-{
-       return !kernel_fpu_disabled();
-}
-
-/*
- * Were we in user mode (or vm86 mode) when we were
- * interrupted?
- *
- * Doing kernel_fpu_begin/end() is ok if we are running
- * in an interrupt context from user mode - we'll just
- * save the FPU state as required.
- */
-static bool interrupted_user_mode(void)
-{
-       struct pt_regs *regs = get_irq_regs();
-       return regs && user_mode(regs);
-}
-
 /*
  * Can we use the FPU in kernel mode with the
  * whole "kernel_fpu_begin/end()" sequence?
- *
- * It's always ok in process context (ie "not interrupt")
- * but it is sometimes ok even from an irq.
  */
 bool irq_fpu_usable(void)
 {
-       return !in_interrupt() ||
-               interrupted_user_mode() ||
-               interrupted_kernel_fpu_idle();
+       if (WARN_ON_ONCE(in_nmi()))
+               return false;
+
+       /* In kernel FPU usage already active? */
+       if (this_cpu_read(in_kernel_fpu))
+               return false;
+
+       /*
+        * When not in NMI or hard interrupt context, FPU can be used in:
+        *
+        * - Task context except from within fpregs_lock()'ed critical
+        *   regions.
+        *
+        * - Soft interrupt processing context which cannot happen
+        *   while in a fpregs_lock()'ed critical region.
+        */
+       if (!in_hardirq())
+               return true;
+
+       /*
+        * In hard interrupt context it's safe when soft interrupts
+        * are enabled, which means the interrupt did not hit in
+        * a fpregs_lock()'ed critical region.
+        */
+       return !softirq_count();
 }
 EXPORT_SYMBOL(irq_fpu_usable);
 
index 39e1c8626ab999fea588e7558927724dd2f3b457..c8340156bfd2aadc490b5d0a663879ded2bb991f 100644 (file)
@@ -142,7 +142,8 @@ static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
         * Non-compacted format and legacy features use the cached fixed
         * offsets.
         */
-       if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE)
+       if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
+           xfeature <= XFEATURE_SSE)
                return xstate_offsets[xfeature];
 
        /*
@@ -369,12 +370,12 @@ static void __init setup_init_fpu_buf(void)
        /*
         * All components are now in init state. Read the state back so
         * that init_fpstate contains all non-zero init state. This only
-        * works with XSAVE, but not with XSAVEOPT and XSAVES because
+        * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
         * those use the init optimization which skips writing data for
         * components in init state.
         *
         * XSAVE could be used, but that would require to reshuffle the
-        * data when XSAVES is available because XSAVES uses xstate
+        * data when XSAVEC/S is available because XSAVEC/S uses xstate
         * compaction. But doing so is a pointless exercise because most
         * components have an all zeros init state except for the legacy
         * ones (FP and SSE). Those can be saved with FXSAVE into the
@@ -584,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
  */
 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
 {
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
+       bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
        unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
        int i;
 
@@ -595,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
                 * Supervisor state components can be managed only by
                 * XSAVES.
                 */
-               if (!compacted && xfeature_is_supervisor(i)) {
+               if (!xsaves && xfeature_is_supervisor(i)) {
                        XSTATE_WARN_ON(1);
                        return false;
                }
@@ -612,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
  * the size of the *user* states.  If we use it to size a buffer
  * that we use 'XSAVES' on, we could potentially overflow the
  * buffer because 'XSAVES' saves system states too.
+ *
+ * This also takes compaction into account. So this works for
+ * XSAVEC as well.
  */
-static unsigned int __init get_xsaves_size(void)
+static unsigned int __init get_compacted_size(void)
 {
        unsigned int eax, ebx, ecx, edx;
        /*
@@ -623,6 +628,10 @@ static unsigned int __init get_xsaves_size(void)
         *    containing all the state components
         *    corresponding to bits currently set in
         *    XCR0 | IA32_XSS.
+        *
+        * When XSAVES is not available but XSAVEC is (virt), then there
+        * are no supervisor states, but XSAVEC still uses compacted
+        * format.
         */
        cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
        return ebx;
@@ -632,13 +641,13 @@ static unsigned int __init get_xsaves_size(void)
  * Get the total size of the enabled xstates without the independent supervisor
  * features.
  */
-static unsigned int __init get_xsaves_size_no_independent(void)
+static unsigned int __init get_xsave_compacted_size(void)
 {
        u64 mask = xfeatures_mask_independent();
        unsigned int size;
 
        if (!mask)
-               return get_xsaves_size();
+               return get_compacted_size();
 
        /* Disable independent features. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
@@ -647,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
         * Ask the hardware what size is required of the buffer.
         * This is the size required for the task->fpu buffer.
         */
-       size = get_xsaves_size();
+       size = get_compacted_size();
 
        /* Re-enable independent features so XSAVES will work on them again. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
@@ -687,20 +696,21 @@ static int __init init_xstate_size(void)
 {
        /* Recompute the context size for enabled features: */
        unsigned int user_size, kernel_size, kernel_default_size;
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
 
        /* Uncompacted user space size */
        user_size = get_xsave_size_user();
 
        /*
-        * XSAVES kernel size includes supervisor states and
-        * uses compacted format when available.
+        * XSAVES kernel size includes supervisor states and uses compacted
+        * format. XSAVEC uses compacted format, but does not save
+        * supervisor states.
         *
-        * XSAVE does not support supervisor states so
-        * kernel and user size is identical.
+        * XSAVE[OPT] do not support supervisor states so kernel and user
+        * size is identical.
         */
        if (compacted)
-               kernel_size = get_xsaves_size_no_independent();
+               kernel_size = get_xsave_compacted_size();
        else
                kernel_size = user_size;
 
@@ -813,8 +823,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
        if (!cpu_feature_enabled(X86_FEATURE_XFD))
                fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
 
-       fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
-                             XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+       if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
+               fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
+       else
+               fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
+                                       XFEATURE_MASK_SUPERVISOR_SUPPORTED;
 
        fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
        fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
@@ -837,6 +850,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
         */
        init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
 
+       /* Set up compaction feature bit */
+       if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
+           cpu_feature_enabled(X86_FEATURE_XSAVES))
+               setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
+
        /* Enable xstate instructions to be able to continue with initialization: */
        fpu__init_cpu_xstate();
 
@@ -873,7 +891,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                fpu_kernel_cfg.max_features,
                fpu_kernel_cfg.max_size,
-               boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+               boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
        return;
 
 out_disable:
@@ -917,7 +935,7 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
        if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
                return NULL;
 
-       if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
+       if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
                if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
                        return NULL;
        }
@@ -1215,7 +1233,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
        }
 
        for (i = 0; i < XFEATURE_MAX; i++) {
-               u64 mask = ((u64)1 << i);
+               mask = BIT_ULL(i);
 
                if (hdr.xfeatures & mask) {
                        void *dst = __raw_xsave_addr(xsave, i);
@@ -1525,7 +1543,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
         * vendors into extending XFD for the pre AMX states, especially
         * AVX512.
         */
-       bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+       bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
        struct fpu *fpu = &current->group_leader->thread.fpu;
        struct fpu_state_perm *perm;
        unsigned int ksize, usize;
@@ -1687,16 +1705,13 @@ EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
  * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
  * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
  */
-long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
+long fpu_xstate_prctl(int option, unsigned long arg2)
 {
        u64 __user *uptr = (u64 __user *)arg2;
        u64 permitted, supported;
        unsigned long idx = arg2;
        bool guest = false;
 
-       if (tsk != current)
-               return -EPERM;
-
        switch (option) {
        case ARCH_GET_XCOMP_SUPP:
                supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
index d22ace092ca290d21b3707c050e83f89284de33f..5ad47031383b54c69c129d097e0f18ae68585c55 100644 (file)
@@ -16,7 +16,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
         * XRSTORS requires these bits set in xcomp_bv, or it will
         * trigger #GP:
         */
-       if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+       if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED))
                xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
 }
 
@@ -79,6 +79,7 @@ static inline u64 xfeatures_mask_independent(void)
 /* These macros all use (%edi)/(%rdi) as the single memory argument. */
 #define XSAVE          ".byte " REX_PREFIX "0x0f,0xae,0x27"
 #define XSAVEOPT       ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVEC         ".byte " REX_PREFIX "0x0f,0xc7,0x27"
 #define XSAVES         ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
 #define XRSTOR         ".byte " REX_PREFIX "0x0f,0xae,0x2f"
 #define XRSTORS                ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
@@ -97,9 +98,11 @@ static inline u64 xfeatures_mask_independent(void)
                     : "memory")
 
 /*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
+ * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor
+ * states in addition to XSAVEC.
+ *
+ * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports
+ * compacted storage format in addition to XSAVEOPT.
  *
  * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
  * supports modified optimization which is not supported by XSAVE.
@@ -111,8 +114,9 @@ static inline u64 xfeatures_mask_independent(void)
  * address of the instruction where we might get an exception at.
  */
 #define XSTATE_XSAVE(st, lmask, hmask, err)                            \
-       asm volatile(ALTERNATIVE_2(XSAVE,                               \
+       asm volatile(ALTERNATIVE_3(XSAVE,                               \
                                   XSAVEOPT, X86_FEATURE_XSAVEOPT,      \
+                                  XSAVEC,   X86_FEATURE_XSAVEC,        \
                                   XSAVES,   X86_FEATURE_XSAVES)        \
                     "\n"                                               \
                     "xor %[err], %[err]\n"                             \
index 4f5ecbbaae77c7f6f74eb40d82681c94a063a99c..bd4a34100ed0cd53acf56114dcb976ec6186a669 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/extable.h>
 #include <asm/trapnr.h>
 #include <asm/sev.h>
+#include <asm/tdx.h>
 
 /*
  * Manage page tables very early on.
@@ -143,7 +144,20 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
        if (sme_get_me_mask()) {
                vaddr = (unsigned long)__start_bss_decrypted;
                vaddr_end = (unsigned long)__end_bss_decrypted;
+
                for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+                       /*
+                        * On SNP, transition the page to shared in the RMP table so that
+                        * it is consistent with the page table attribute change.
+                        *
+                        * __start_bss_decrypted has a virtual address in the high range
+                        * mapping (kernel .text). PVALIDATE, by way of
+                        * early_snp_set_memory_shared(), requires a valid virtual
+                        * address but the kernel is currently running off of the identity
+                        * mapping so use __pa() to get a *currently* valid virtual address.
+                        */
+                       early_snp_set_memory_shared(__pa(vaddr), __pa(vaddr), PTRS_PER_PMD);
+
                        i = pmd_index(vaddr);
                        pmd[i] -= sme_get_me_mask();
                }
@@ -192,9 +206,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
        if (load_delta & ~PMD_PAGE_MASK)
                for (;;);
 
-       /* Activate Secure Memory Encryption (SME) if supported and enabled */
-       sme_enable(bp);
-
        /* Include the SME encryption mask in the fixup value */
        load_delta += sme_get_me_mask();
 
@@ -308,15 +319,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
        return sme_postprocess_startup(bp, pmd);
 }
 
-unsigned long __startup_secondary_64(void)
-{
-       /*
-        * Return the SME encryption mask (if SME is active) to be used as a
-        * modifier for the initial pgdir entry programmed into CR3.
-        */
-       return sme_get_me_mask();
-}
-
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
 {
@@ -416,6 +418,9 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
            trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
                return;
 
+       if (trapnr == X86_TRAP_VE && tdx_early_handle_ve(regs))
+               return;
+
        early_fixup_exception(regs, trapnr);
 }
 
@@ -514,6 +519,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 
        idt_setup_early_handler();
 
+       /* Needed before cc_platform_has() can be used for TDX */
+       tdx_early_init();
+
        copy_bootdata(__va(real_mode_data));
 
        /*
@@ -600,8 +608,10 @@ static void startup_64_load_idt(unsigned long physbase)
 void early_setup_idt(void)
 {
        /* VMM Communication Exception */
-       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+               setup_ghcb();
                set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+       }
 
        bringup_idt_descr.address = (unsigned long)bringup_idt_table;
        native_load_idt(&bringup_idt_descr);
index b8e3019547a5d8271ed1a4bf8c7104ea4de3cafb..92c4afa2b7298d2a45f6fc3e67843fdb73354bfe 100644 (file)
@@ -65,10 +65,39 @@ SYM_CODE_START_NOALIGN(startup_64)
        leaq    (__end_init_task - FRAME_SIZE)(%rip), %rsp
 
        leaq    _text(%rip), %rdi
+
+       /*
+        * initial_gs points to initial fixed_percpu_data struct with storage for
+        * the stack protector canary. Global pointer fixups are needed at this
+        * stage, so apply them as is done in fixup_pointer(), and initialize %gs
+        * such that the canary can be accessed at %gs:40 for subsequent C calls.
+        */
+       movl    $MSR_GS_BASE, %ecx
+       movq    initial_gs(%rip), %rax
+       movq    $_text, %rdx
+       subq    %rdx, %rax
+       addq    %rdi, %rax
+       movq    %rax, %rdx
+       shrq    $32,  %rdx
+       wrmsr
+
        pushq   %rsi
        call    startup_64_setup_env
        popq    %rsi
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       /*
+        * Activate SEV/SME memory encryption if supported/enabled. This needs to
+        * be done now, since this also includes setup of the SEV-SNP CPUID table,
+        * which needs to be done before any CPUID instructions are executed in
+        * subsequent code.
+        */
+       movq    %rsi, %rdi
+       pushq   %rsi
+       call    sme_enable
+       popq    %rsi
+#endif
+
        /* Now switch to __KERNEL_CS so IRET works reliably */
        pushq   $__KERNEL_CS
        leaq    .Lon_kernel_cs(%rip), %rax
@@ -134,16 +163,32 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
         * Retrieve the modifier (SME encryption mask if SME is active) to be
         * added to the initial pgdir entry that will be programmed into CR3.
         */
-       pushq   %rsi
-       call    __startup_secondary_64
-       popq    %rsi
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+       movq    sme_me_mask, %rax
+#else
+       xorq    %rax, %rax
+#endif
 
        /* Form the CR3 value being sure to include the CR3 modifier */
        addq    $(init_top_pgt - __START_KERNEL_map), %rax
 1:
 
+#ifdef CONFIG_X86_MCE
+       /*
+        * Preserve CR4.MCE if the kernel will enable #MC support.
+        * Clearing MCE may fault in some environments (that also force #MC
+        * support). Any machine check that occurs before #MC support is fully
+        * configured will crash the system regardless of the CR4.MCE value set
+        * here.
+        */
+       movq    %cr4, %rcx
+       andl    $X86_CR4_MCE, %ecx
+#else
+       movl    $0, %ecx
+#endif
+
        /* Enable PAE mode, PGE and LA57 */
-       movl    $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+       orl     $(X86_CR4_PAE | X86_CR4_PGE), %ecx
 #ifdef CONFIG_X86_5LEVEL
        testl   $1, __pgtable_l5_enabled(%rip)
        jz      1f
@@ -249,13 +294,23 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
        /* Setup EFER (Extended Feature Enable Register) */
        movl    $MSR_EFER, %ecx
        rdmsr
+       /*
+        * Preserve current value of EFER for comparison and to skip
+        * EFER writes if no change was made (for TDX guest)
+        */
+       movl    %eax, %edx
        btsl    $_EFER_SCE, %eax        /* Enable System Call */
        btl     $20,%edi                /* No Execute supported? */
        jnc     1f
        btsl    $_EFER_NX, %eax
        btsq    $_PAGE_BIT_NX,early_pmd_flags(%rip)
-1:     wrmsr                           /* Make changes effective */
 
+       /* Avoid writing EFER if no change was made (for TDX guest) */
+1:     cmpl    %edx, %eax
+       je      1f
+       xor     %edx, %edx
+       wrmsr                           /* Make changes effective */
+1:
        /* Setup cr0 */
        movl    $CR0_STATE, %eax
        /* Make changes effective */
index 608eb63bf0444c9bf0636715a630e4b445350ff1..a58c6bc1cd68c2be54ebbe5bb149fa9c954b38d7 100644 (file)
@@ -69,6 +69,9 @@ static const __initconst struct idt_data early_idts[] = {
         */
        INTG(X86_TRAP_PF,               asm_exc_page_fault),
 #endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+       INTG(X86_TRAP_VE,               asm_exc_virtualization_exception),
+#endif
 };
 
 /*
index a22deb58f86d2e6d2f5709af95671be9c63c39c6..8b1c45c9cda8771a446aed8b4a62849dda08e77e 100644 (file)
@@ -69,6 +69,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align
 DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
 static int has_steal_clock = 0;
 
+static int has_guest_poll = 0;
 /*
  * No need for any "IO delay" on KVM
  */
@@ -706,14 +707,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 
 static int kvm_suspend(void)
 {
+       u64 val = 0;
+
        kvm_guest_cpu_offline(false);
 
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+       if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
+               rdmsrl(MSR_KVM_POLL_CONTROL, val);
+       has_guest_poll = !(val & 1);
+#endif
        return 0;
 }
 
 static void kvm_resume(void)
 {
        kvm_cpu_online(raw_smp_processor_id());
+
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+       if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
+               wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+#endif
 }
 
 static struct syscore_ops kvm_syscore_ops = {
index e73f7df362f5d178b008dc8e15ff4eb6d4f7af5e..cec0bfa3bc04fa21fbf741d5c6fca0464956bf1d 100644 (file)
@@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
        struct nmi_desc *desc = nmi_to_desc(type);
        unsigned long flags;
 
-       if (!action->handler)
+       if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
                return -EINVAL;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
@@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
                list_add_rcu(&action->list, &desc->head);
        else
                list_add_tail_rcu(&action->list, &desc->head);
-       
+
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
 }
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler);
 void unregister_nmi_handler(unsigned int type, const char *name)
 {
        struct nmi_desc *desc = nmi_to_desc(type);
-       struct nmiaction *n;
+       struct nmiaction *n, *found = NULL;
        unsigned long flags;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
@@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name)
                        WARN(in_nmi(),
                                "Trying to free NMI (%s) from NMI context!\n", n->name);
                        list_del_rcu(&n->list);
+                       found = n;
                        break;
                }
        }
 
        raw_spin_unlock_irqrestore(&desc->lock, flags);
-       synchronize_rcu();
+       if (found) {
+               synchronize_rcu();
+               INIT_LIST_HEAD(&found->list);
+       }
 }
 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
 
index 36e84d9042606476e0c69a5554853feff3052632..319fef37d9dce41f8109c768f58f627649dbac99 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/sections.h>
 #include <asm/io.h>
 #include <asm/setup_arch.h>
+#include <asm/sev.h>
 
 static struct resource system_rom_resource = {
        .name   = "System ROM",
@@ -197,11 +198,21 @@ static int __init romchecksum(const unsigned char *rom, unsigned long length)
 
 void __init probe_roms(void)
 {
-       const unsigned char *rom;
        unsigned long start, length, upper;
+       const unsigned char *rom;
        unsigned char c;
        int i;
 
+       /*
+        * The ROM memory range is not part of the e820 table and is therefore not
+        * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
+        * memory, and SNP requires encrypted memory to be validated before access.
+        * Do that here.
+        */
+       snp_prep_memory(video_rom_resource.start,
+                       ((system_rom_resource.end + 1) - video_rom_resource.start),
+                       SNP_PAGE_STATE_PRIVATE);
+
        /* video rom */
        upper = adapter_rom_resources[0].start;
        for (start = video_rom_resource.start; start < upper; start += 2048) {
index b370767f5b191e1556b3665a3b8b5f032decf0e6..cbe6aa3f649d8415cded6748a1eb8a45b4d730e5 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/proto.h>
 #include <asm/frame.h>
 #include <asm/unwind.h>
+#include <asm/tdx.h>
 
 #include "process.h"
 
@@ -160,6 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        savesegment(ds, p->thread.ds);
 #else
        p->thread.sp0 = (unsigned long) (childregs + 1);
+       savesegment(gs, p->thread.gs);
        /*
         * Clear all status flags including IF and set fixed bit. 64bit
         * does not have this initialization as the frame does not contain
@@ -191,10 +193,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        if (sp)
                childregs->sp = sp;
 
-#ifdef CONFIG_X86_32
-       task_user_gs(p) = get_user_gs(current_pt_regs());
-#endif
-
        if (unlikely(p->flags & PF_IO_WORKER)) {
                /*
                 * An IO thread is a user space thread, but it doesn't
@@ -334,7 +332,7 @@ static int get_cpuid_mode(void)
        return !test_thread_flag(TIF_NOCPUID);
 }
 
-static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+static int set_cpuid_mode(unsigned long cpuid_enabled)
 {
        if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
                return -ENODEV;
@@ -405,7 +403,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
 }
 
 /**
- * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
+ * native_tss_update_io_bitmap - Update I/O bitmap before exiting to user mode
  */
 void native_tss_update_io_bitmap(void)
 {
@@ -873,6 +871,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
        } else if (prefer_mwait_c1_over_halt(c)) {
                pr_info("using mwait in idle threads\n");
                x86_idle = mwait_idle;
+       } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+               pr_info("using TDX aware idle routine\n");
+               x86_idle = tdx_safe_halt;
        } else
                x86_idle = default_idle;
 }
@@ -985,20 +986,19 @@ unsigned long __get_wchan(struct task_struct *p)
        return addr;
 }
 
-long do_arch_prctl_common(struct task_struct *task, int option,
-                         unsigned long arg2)
+long do_arch_prctl_common(int option, unsigned long arg2)
 {
        switch (option) {
        case ARCH_GET_CPUID:
                return get_cpuid_mode();
        case ARCH_SET_CPUID:
-               return set_cpuid_mode(task, arg2);
+               return set_cpuid_mode(arg2);
        case ARCH_GET_XCOMP_SUPP:
        case ARCH_GET_XCOMP_PERM:
        case ARCH_REQ_XCOMP_PERM:
        case ARCH_GET_XCOMP_GUEST_PERM:
        case ARCH_REQ_XCOMP_GUEST_PERM:
-               return fpu_xstate_prctl(task, option, arg2);
+               return fpu_xstate_prctl(option, arg2);
        }
 
        return -EINVAL;
index 26edb1cd07a4332e6f9ab500cab316781fc62b26..2f314b170c9f0737d6ee673ed380d894a6978911 100644 (file)
@@ -63,10 +63,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
        unsigned long d0, d1, d2, d3, d6, d7;
        unsigned short gs;
 
-       if (user_mode(regs))
-               gs = get_user_gs(regs);
-       else
-               savesegment(gs, gs);
+       savesegment(gs, gs);
 
        show_ip(regs, log_lvl);
 
@@ -114,7 +111,7 @@ void release_thread(struct task_struct *dead_task)
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
-       set_user_gs(regs, 0);
+       loadsegment(gs, 0);
        regs->fs                = 0;
        regs->ds                = __USER_DS;
        regs->es                = __USER_DS;
@@ -177,7 +174,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * used %fs or %gs (it does not today), or if the kernel is
         * running inside of a hypervisor layer.
         */
-       lazy_save_gs(prev->gs);
+       savesegment(gs, prev->gs);
 
        /*
         * Load the per-thread Thread-Local Storage descriptor.
@@ -208,7 +205,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Restore %gs if needed (which is common)
         */
        if (prev->gs | next->gs)
-               lazy_load_gs(next->gs);
+               loadsegment(gs, next->gs);
 
        this_cpu_write(current_task, next_p);
 
@@ -222,5 +219,5 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl_common(current, option, arg2);
+       return do_arch_prctl_common(option, arg2);
 }
index e459253649be23f6ad942a0ae76b7d3987aa9215..1962008fe7437f89be48e2511199e0356348573f 100644 (file)
@@ -844,7 +844,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 
        ret = do_arch_prctl_64(current, option, arg2);
        if (ret == -EINVAL)
-               ret = do_arch_prctl_common(current, option, arg2);
+               ret = do_arch_prctl_common(option, arg2);
 
        return ret;
 }
@@ -852,7 +852,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 #ifdef CONFIG_IA32_EMULATION
 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl_common(current, option, arg2);
+       return do_arch_prctl_common(option, arg2);
 }
 #endif
 
index 98d10ef605717820e802540b86fdb5c1b051bd38..37c12fb92906b64cfb2e0b7e96a20a2ffa57cfca 100644 (file)
@@ -170,9 +170,9 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
                retval = *pt_regs_access(task_pt_regs(task), offset);
        else {
                if (task == current)
-                       retval = get_user_gs(task_pt_regs(task));
+                       savesegment(gs, retval);
                else
-                       retval = task_user_gs(task);
+                       retval = task->thread.gs;
        }
        return retval;
 }
@@ -210,7 +210,7 @@ static int set_segment_reg(struct task_struct *task,
                break;
 
        case offsetof(struct user_regs_struct, gs):
-               task_user_gs(task) = value;
+               task->thread.gs = value;
        }
 
        return 0;
index c95b9ac5a4571a30181af4a3e4082bd7af7ed601..249981bf3d8aa4a5a8e973142b7f81bb65d9f3db 100644 (file)
@@ -756,6 +756,30 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
        return 0;
 }
 
+void x86_configure_nx(void)
+{
+       if (boot_cpu_has(X86_FEATURE_NX))
+               __supported_pte_mask |= _PAGE_NX;
+       else
+               __supported_pte_mask &= ~_PAGE_NX;
+}
+
+static void __init x86_report_nx(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_NX)) {
+               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+                      "missing in CPU!\n");
+       } else {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+               printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#else
+               /* 32bit non-PAE kernel, NX cannot be used */
+               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+                      "cannot be enabled: non-PAE kernel!\n");
+#endif
+       }
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -896,9 +920,7 @@ void __init setup_arch(char **cmdline_p)
        /*
         * x86_configure_nx() is called before parse_early_param() to detect
         * whether hardware doesn't support NX (so that the early EHCI debug
-        * console setup can safely call set_fixmap()). It may then be called
-        * again from within noexec_setup() during parsing early parameters
-        * to honor the respective command line option.
+        * console setup can safely call set_fixmap()).
         */
        x86_configure_nx();
 
index ce987688bbc0520382fb0e2a8cc6db100be35c49..b478edf43bec2a8f9eb50c2f68316f71aca3605a 100644 (file)
 #define has_cpuflag(f) boot_cpu_has(f)
 #endif
 
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+       u32 fn;
+       u32 subfn;
+       u32 eax;
+       u32 ebx;
+       u32 ecx;
+       u32 edx;
+};
+
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+       u32 eax_in;
+       u32 ecx_in;
+       u64 xcr0_in;
+       u64 xss_in;
+       u32 eax;
+       u32 ebx;
+       u32 ecx;
+       u32 edx;
+       u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+       u32 count;
+       u32 __reserved1;
+       u64 __reserved2;
+       struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
+/*
+ * Since feature negotiation related variables are set early in the boot
+ * process they must reside in the .data section so as not to be zeroed
+ * out when the .bss section is later cleared.
+ *
+ * GHCB protocol version negotiated with the hypervisor.
+ */
+static u16 ghcb_version __ro_after_init;
+
+/* Copy of the SNP firmware's CPUID page. */
+static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
+
+/*
+ * These will be initialized based on CPUID table so that non-present
+ * all-zero leaves (for sparse tables) can be differentiated from
+ * invalid/out-of-range leaves. This is needed since all-zero leaves
+ * still need to be post-processed.
+ */
+static u32 cpuid_std_range_max __ro_after_init;
+static u32 cpuid_hyp_range_max __ro_after_init;
+static u32 cpuid_ext_range_max __ro_after_init;
+
 static bool __init sev_es_check_cpu_features(void)
 {
        if (!has_cpuflag(X86_FEATURE_RDRAND)) {
@@ -24,15 +86,12 @@ static bool __init sev_es_check_cpu_features(void)
        return true;
 }
 
-static void __noreturn sev_es_terminate(unsigned int reason)
+static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
 {
        u64 val = GHCB_MSR_TERM_REQ;
 
-       /*
-        * Tell the hypervisor what went wrong - only reason-set 0 is
-        * currently supported.
-        */
-       val |= GHCB_SEV_TERM_REASON(0, reason);
+       /* Tell the hypervisor what went wrong. */
+       val |= GHCB_SEV_TERM_REASON(set, reason);
 
        /* Request Guest Termination from Hypvervisor */
        sev_es_wr_ghcb_msr(val);
@@ -42,6 +101,42 @@ static void __noreturn sev_es_terminate(unsigned int reason)
                asm volatile("hlt\n" : : : "memory");
 }
 
+/*
+ * The hypervisor features are available from GHCB version 2 onward.
+ */
+static u64 get_hv_features(void)
+{
+       u64 val;
+
+       if (ghcb_version < 2)
+               return 0;
+
+       sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
+       VMGEXIT();
+
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
+               return 0;
+
+       return GHCB_MSR_HV_FT_RESP_VAL(val);
+}
+
+static void snp_register_ghcb_early(unsigned long paddr)
+{
+       unsigned long pfn = paddr >> PAGE_SHIFT;
+       u64 val;
+
+       sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
+       VMGEXIT();
+
+       val = sev_es_rd_ghcb_msr();
+
+       /* If the response GPA is not ours then abort the guest */
+       if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
+           (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
+}
+
 static bool sev_es_negotiate_protocol(void)
 {
        u64 val;
@@ -54,10 +149,12 @@ static bool sev_es_negotiate_protocol(void)
        if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
                return false;
 
-       if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-           GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
+       if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+           GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
                return false;
 
+       ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
+
        return true;
 }
 
@@ -104,10 +201,7 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
 
        if (ret == 1) {
                u64 info = ghcb->save.sw_exit_info_2;
-               unsigned long v;
-
-               info = ghcb->save.sw_exit_info_2;
-               v = info & SVM_EVTINJ_VEC_MASK;
+               unsigned long v = info & SVM_EVTINJ_VEC_MASK;
 
                /* Check if exception information from hypervisor is sane. */
                if ((info & SVM_EVTINJ_VALID) &&
@@ -130,7 +224,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
                                   u64 exit_info_1, u64 exit_info_2)
 {
        /* Fill in protocol and format specifiers */
-       ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+       ghcb->protocol_version = ghcb_version;
        ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
 
        ghcb_set_sw_exit_code(ghcb, exit_code);
@@ -150,6 +244,290 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
        return verify_exception_info(ghcb, ctxt);
 }
 
+static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
+{
+       u64 val;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               return -EIO;
+
+       *reg = (val >> 32);
+
+       return 0;
+}
+
+static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+{
+       int ret;
+
+       /*
+        * MSR protocol does not support fetching non-zero subfunctions, but is
+        * sufficient to handle current early-boot cases. Should that change,
+        * make sure to report an error rather than ignoring the index and
+        * grabbing random values. If this issue arises in the future, handling
+        * can be added here to use GHCB-page protocol for cases that occur late
+        * enough in boot that GHCB page is available.
+        */
+       if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
+               return -EINVAL;
+
+       ret =         __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
+       ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
+
+       return ret;
+}
+
+/*
+ * This may be called early while still running on the initial identity
+ * mapping. Use RIP-relative addressing to obtain the correct address
+ * while running with the initial identity mapping as well as the
+ * switch-over to kernel virtual addresses later.
+ */
+static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+{
+       void *ptr;
+
+       asm ("lea cpuid_table_copy(%%rip), %0"
+            : "=r" (ptr)
+            : "p" (&cpuid_table_copy));
+
+       return ptr;
+}
+
+/*
+ * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
+ * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
+ * and 1 based on the corresponding features enabled by a particular
+ * combination of XCR0 and XSS registers so that a guest can look up the
+ * version corresponding to the features currently enabled in its XCR0/XSS
+ * registers. The only values that differ between these versions/table
+ * entries is the enabled XSAVE area size advertised via EBX.
+ *
+ * While hypervisors may choose to make use of this support, it is more
+ * robust/secure for a guest to simply find the entry corresponding to the
+ * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
+ * XSAVE area size using subfunctions 2 through 64, as documented in APM
+ * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
+ *
+ * Since base/legacy XSAVE area size is documented as 0x240, use that value
+ * directly rather than relying on the base size in the CPUID table.
+ *
+ * Return: XSAVE area size on success, 0 otherwise.
+ */
+static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       u64 xfeatures_found = 0;
+       u32 xsave_size = 0x240;
+       int i;
+
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+               if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
+                       continue;
+               if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
+                       continue;
+               if (xfeatures_found & (BIT_ULL(e->ecx_in)))
+                       continue;
+
+               xfeatures_found |= (BIT_ULL(e->ecx_in));
+
+               if (compacted)
+                       xsave_size += e->eax;
+               else
+                       xsave_size = max(xsave_size, e->eax + e->ebx);
+       }
+
+       /*
+        * Either the guest set unsupported XCR0/XSS bits, or the corresponding
+        * entries in the CPUID table were not present. This is not a valid
+        * state to be in.
+        */
+       if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
+               return 0;
+
+       return xsave_size;
+}
+
+static bool
+snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       int i;
+
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
+
+               if (e->eax_in != leaf->fn)
+                       continue;
+
+               if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
+                       continue;
+
+               /*
+                * For 0xD subfunctions 0 and 1, only use the entry corresponding
+                * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
+                * See the comments above snp_cpuid_calc_xsave_size() for more
+                * details.
+                */
+               if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
+                       if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
+                               continue;
+
+               leaf->eax = e->eax;
+               leaf->ebx = e->ebx;
+               leaf->ecx = e->ecx;
+               leaf->edx = e->edx;
+
+               return true;
+       }
+
+       return false;
+}
+
+static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+{
+       if (sev_cpuid_hv(leaf))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
+}
+
+static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+{
+       struct cpuid_leaf leaf_hv = *leaf;
+
+       switch (leaf->fn) {
+       case 0x1:
+               snp_cpuid_hv(&leaf_hv);
+
+               /* initial APIC ID */
+               leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
+               /* APIC enabled bit */
+               leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
+
+               /* OSXSAVE enabled bit */
+               if (native_read_cr4() & X86_CR4_OSXSAVE)
+                       leaf->ecx |= BIT(27);
+               break;
+       case 0x7:
+               /* OSPKE enabled bit */
+               leaf->ecx &= ~BIT(4);
+               if (native_read_cr4() & X86_CR4_PKE)
+                       leaf->ecx |= BIT(4);
+               break;
+       case 0xB:
+               leaf_hv.subfn = 0;
+               snp_cpuid_hv(&leaf_hv);
+
+               /* extended APIC ID */
+               leaf->edx = leaf_hv.edx;
+               break;
+       case 0xD: {
+               bool compacted = false;
+               u64 xcr0 = 1, xss = 0;
+               u32 xsave_size;
+
+               if (leaf->subfn != 0 && leaf->subfn != 1)
+                       return 0;
+
+               if (native_read_cr4() & X86_CR4_OSXSAVE)
+                       xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+               if (leaf->subfn == 1) {
+                       /* Get XSS value if XSAVES is enabled. */
+                       if (leaf->eax & BIT(3)) {
+                               unsigned long lo, hi;
+
+                               asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
+                                                    : "c" (MSR_IA32_XSS));
+                               xss = (hi << 32) | lo;
+                       }
+
+                       /*
+                        * The PPR and APM aren't clear on what size should be
+                        * encoded in 0xD:0x1:EBX when compaction is not enabled
+                        * by either XSAVEC (feature bit 1) or XSAVES (feature
+                        * bit 3) since SNP-capable hardware has these feature
+                        * bits fixed as 1. KVM sets it to 0 in this case, but
+                        * to avoid this becoming an issue it's safer to simply
+                        * treat this as unsupported for SNP guests.
+                        */
+                       if (!(leaf->eax & (BIT(1) | BIT(3))))
+                               return -EINVAL;
+
+                       compacted = true;
+               }
+
+               xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
+               if (!xsave_size)
+                       return -EINVAL;
+
+               leaf->ebx = xsave_size;
+               }
+               break;
+       case 0x8000001E:
+               snp_cpuid_hv(&leaf_hv);
+
+               /* extended APIC ID */
+               leaf->eax = leaf_hv.eax;
+               /* compute ID */
+               leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
+               /* node ID */
+               leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
+               break;
+       default:
+               /* No fix-ups needed, use values as-is. */
+               break;
+       }
+
+       return 0;
+}
+
+/*
+ * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
+ * should be treated as fatal by caller.
+ */
+static int snp_cpuid(struct cpuid_leaf *leaf)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+       if (!cpuid_table->count)
+               return -EOPNOTSUPP;
+
+       if (!snp_cpuid_get_validated_func(leaf)) {
+               /*
+                * Some hypervisors will avoid keeping track of CPUID entries
+                * where all values are zero, since they can be handled the
+                * same as out-of-range values (all-zero). This is useful here
+                * as well as it allows virtually all guest configurations to
+                * work using a single SNP CPUID table.
+                *
+                * To allow for this, there is a need to distinguish between
+                * out-of-range entries and in-range zero entries, since the
+                * CPUID table entries are only a template that may need to be
+                * augmented with additional values for things like
+                * CPU-specific information during post-processing. So if it's
+                * not in the table, set the values to zero. Then, if they are
+                * within a valid CPUID range, proceed with post-processing
+                * using zeros as the initial values. Otherwise, skip
+                * post-processing and just return zeros immediately.
+                */
+               leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
+
+               /* Skip post-processing for out-of-range zero leafs. */
+               if (!(leaf->fn <= cpuid_std_range_max ||
+                     (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
+                     (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
+                       return 0;
+       }
+
+       return snp_cpuid_postprocess(leaf);
+}
+
 /*
  * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
  * page yet, so it only supports the MSR based communication with the
@@ -157,40 +535,33 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
  */
 void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 {
+       unsigned int subfn = lower_bits(regs->cx, 32);
        unsigned int fn = lower_bits(regs->ax, 32);
-       unsigned long val;
+       struct cpuid_leaf leaf;
+       int ret;
 
        /* Only CPUID is supported via MSR protocol */
        if (exit_code != SVM_EXIT_CPUID)
                goto fail;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
-               goto fail;
-       regs->ax = val >> 32;
+       leaf.fn = fn;
+       leaf.subfn = subfn;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
-               goto fail;
-       regs->bx = val >> 32;
+       ret = snp_cpuid(&leaf);
+       if (!ret)
+               goto cpuid_done;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+       if (ret != -EOPNOTSUPP)
                goto fail;
-       regs->cx = val >> 32;
 
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+       if (sev_cpuid_hv(&leaf))
                goto fail;
-       regs->dx = val >> 32;
+
+cpuid_done:
+       regs->ax = leaf.eax;
+       regs->bx = leaf.ebx;
+       regs->cx = leaf.ecx;
+       regs->dx = leaf.edx;
 
        /*
         * This is a VC handler and the #VC is only raised when SEV-ES is
@@ -221,7 +592,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 
 fail:
        /* Terminate the guest */
-       sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 }
 
 static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
@@ -481,12 +852,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
        return ret;
 }
 
+static int vc_handle_cpuid_snp(struct pt_regs *regs)
+{
+       struct cpuid_leaf leaf;
+       int ret;
+
+       leaf.fn = regs->ax;
+       leaf.subfn = regs->cx;
+       ret = snp_cpuid(&leaf);
+       if (!ret) {
+               regs->ax = leaf.eax;
+               regs->bx = leaf.ebx;
+               regs->cx = leaf.ecx;
+               regs->dx = leaf.edx;
+       }
+
+       return ret;
+}
+
 static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
                                      struct es_em_ctxt *ctxt)
 {
        struct pt_regs *regs = ctxt->regs;
        u32 cr4 = native_read_cr4();
        enum es_result ret;
+       int snp_cpuid_ret;
+
+       snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+       if (!snp_cpuid_ret)
+               return ES_OK;
+       if (snp_cpuid_ret != -EOPNOTSUPP)
+               return ES_VMM_ERROR;
 
        ghcb_set_rax(ghcb, regs->ax);
        ghcb_set_rcx(ghcb, regs->cx);
@@ -538,3 +934,67 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
 
        return ES_OK;
 }
+
+struct cc_setup_data {
+       struct setup_data header;
+       u32 cc_blob_address;
+};
+
+/*
+ * Search for a Confidential Computing blob passed in as a setup_data entry
+ * via the Linux Boot Protocol.
+ */
+static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+{
+       struct cc_setup_data *sd = NULL;
+       struct setup_data *hdr;
+
+       hdr = (struct setup_data *)bp->hdr.setup_data;
+
+       while (hdr) {
+               if (hdr->type == SETUP_CC_BLOB) {
+                       sd = (struct cc_setup_data *)hdr;
+                       return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
+               }
+               hdr = (struct setup_data *)hdr->next;
+       }
+
+       return NULL;
+}
+
+/*
+ * Initialize the kernel's copy of the SNP CPUID table, and set up the
+ * pointer that will be used to access it.
+ *
+ * Maintaining a direct mapping of the SNP CPUID table used by firmware would
+ * be possible as an alternative, but the approach is brittle since the
+ * mapping needs to be updated in sync with all the changes to virtual memory
+ * layout and related mapping facilities throughout the boot process.
+ */
+static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+{
+       const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
+       int i;
+
+       if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+       cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
+       if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
+
+       cpuid_table = snp_cpuid_get_table();
+       memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
+
+       /* Initialize CPUID ranges for range-checking. */
+       for (i = 0; i < cpuid_table->count; i++) {
+               const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+               if (fn->eax_in == 0x0)
+                       cpuid_std_range_max = fn->eax;
+               else if (fn->eax_in == 0x40000000)
+                       cpuid_hyp_range_max = fn->eax;
+               else if (fn->eax_in == 0x80000000)
+                       cpuid_ext_range_max = fn->eax;
+       }
+}
index e6d316a01fdd464bc2307e84f8362a8638712778..c05f0124c41096d948fa2be351f92f4c266c2cbe 100644 (file)
 #include <linux/memblock.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/svm.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
 
 #define DR7_RESET_VALUE        0x400
 
+/* AP INIT values as documented in the APM2  section "Processor Initialization State" */
+#define AP_INIT_CS_LIMIT               0xffff
+#define AP_INIT_DS_LIMIT               0xffff
+#define AP_INIT_LDTR_LIMIT             0xffff
+#define AP_INIT_GDTR_LIMIT             0xffff
+#define AP_INIT_IDTR_LIMIT             0xffff
+#define AP_INIT_TR_LIMIT               0xffff
+#define AP_INIT_RFLAGS_DEFAULT         0x2
+#define AP_INIT_DR6_DEFAULT            0xffff0ff0
+#define AP_INIT_GPAT_DEFAULT           0x0007040600070406ULL
+#define AP_INIT_XCR0_DEFAULT           0x1
+#define AP_INIT_X87_FTW_DEFAULT                0x5555
+#define AP_INIT_X87_FCW_DEFAULT                0x0040
+#define AP_INIT_CR0_DEFAULT            0x60000010
+#define AP_INIT_MXCSR_DEFAULT          0x1f80
+
 /* For early boot hypervisor communication in SEV-ES enabled guests */
 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
 
@@ -41,7 +64,10 @@ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
  * Needs to be in the .data section because we need it NULL before bss is
  * cleared
  */
-static struct ghcb __initdata *boot_ghcb;
+static struct ghcb *boot_ghcb __section(".data");
+
+/* Bitmap of SEV features supported by the hypervisor */
+static u64 sev_hv_features __ro_after_init;
 
 /* #VC handler runtime per-CPU data */
 struct sev_es_runtime_data {
@@ -87,6 +113,15 @@ struct ghcb_state {
 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
 
+static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+struct sev_config {
+       __u64 debug             : 1,
+             __reserved        : 63;
+};
+
+static struct sev_config sev_cfg __read_mostly;
+
 static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
        unsigned long sp = regs->sp;
@@ -523,13 +558,68 @@ void noinstr __sev_es_nmi_complete(void)
        __sev_put_ghcb(&state);
 }
 
-static u64 get_jump_table_addr(void)
+static u64 __init get_secrets_page(void)
+{
+       u64 pa_data = boot_params.cc_blob_address;
+       struct cc_blob_sev_info info;
+       void *map;
+
+       /*
+        * The CC blob contains the address of the secrets page, check if the
+        * blob is present.
+        */
+       if (!pa_data)
+               return 0;
+
+       map = early_memremap(pa_data, sizeof(info));
+       if (!map) {
+               pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n");
+               return 0;
+       }
+       memcpy(&info, map, sizeof(info));
+       early_memunmap(map, sizeof(info));
+
+       /* smoke-test the secrets page passed */
+       if (!info.secrets_phys || info.secrets_len != PAGE_SIZE)
+               return 0;
+
+       return info.secrets_phys;
+}
+
+static u64 __init get_snp_jump_table_addr(void)
+{
+       struct snp_secrets_page_layout *layout;
+       void __iomem *mem;
+       u64 pa, addr;
+
+       pa = get_secrets_page();
+       if (!pa)
+               return 0;
+
+       mem = ioremap_encrypted(pa, PAGE_SIZE);
+       if (!mem) {
+               pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
+               return 0;
+       }
+
+       layout = (__force struct snp_secrets_page_layout *)mem;
+
+       addr = layout->os_area.ap_jump_table_pa;
+       iounmap(mem);
+
+       return addr;
+}
+
+static u64 __init get_jump_table_addr(void)
 {
        struct ghcb_state state;
        unsigned long flags;
        struct ghcb *ghcb;
        u64 ret = 0;
 
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return get_snp_jump_table_addr();
+
        local_irq_save(flags);
 
        ghcb = __sev_get_ghcb(&state);
@@ -553,7 +643,496 @@ static u64 get_jump_table_addr(void)
        return ret;
 }
 
-int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
+{
+       unsigned long vaddr_end;
+       int rc;
+
+       vaddr = vaddr & PAGE_MASK;
+       vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+       while (vaddr < vaddr_end) {
+               rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+               if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
+                       sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+               vaddr = vaddr + PAGE_SIZE;
+       }
+}
+
+static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op)
+{
+       unsigned long paddr_end;
+       u64 val;
+
+       paddr = paddr & PAGE_MASK;
+       paddr_end = paddr + (npages << PAGE_SHIFT);
+
+       while (paddr < paddr_end) {
+               /*
+                * Use the MSR protocol because this function can be called before
+                * the GHCB is established.
+                */
+               sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
+               VMGEXIT();
+
+               val = sev_es_rd_ghcb_msr();
+
+               if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
+                        "Wrong PSC response code: 0x%x\n",
+                        (unsigned int)GHCB_RESP_CODE(val)))
+                       goto e_term;
+
+               if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
+                        "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
+                        op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
+                        paddr, GHCB_MSR_PSC_RESP_VAL(val)))
+                       goto e_term;
+
+               paddr = paddr + PAGE_SIZE;
+       }
+
+       return;
+
+e_term:
+       sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+                                        unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+        /*
+         * Ask the hypervisor to mark the memory pages as private in the RMP
+         * table.
+         */
+       early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+       /* Validate the memory pages after they've been added in the RMP table. */
+       pvalidate_pages(vaddr, npages, true);
+}
+
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+                                       unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       /* Invalidate the memory pages before they are marked shared in the RMP table. */
+       pvalidate_pages(vaddr, npages, false);
+
+        /* Ask hypervisor to mark the memory pages shared in the RMP table. */
+       early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
+{
+       unsigned long vaddr, npages;
+
+       vaddr = (unsigned long)__va(paddr);
+       npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+       if (op == SNP_PAGE_STATE_PRIVATE)
+               early_snp_set_memory_private(vaddr, paddr, npages);
+       else if (op == SNP_PAGE_STATE_SHARED)
+               early_snp_set_memory_shared(vaddr, paddr, npages);
+       else
+               WARN(1, "invalid memory op %d\n", op);
+}
+
+static int vmgexit_psc(struct snp_psc_desc *desc)
+{
+       int cur_entry, end_entry, ret = 0;
+       struct snp_psc_desc *data;
+       struct ghcb_state state;
+       struct es_em_ctxt ctxt;
+       unsigned long flags;
+       struct ghcb *ghcb;
+
+       /*
+        * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+        * a per-CPU GHCB.
+        */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+       if (!ghcb) {
+               ret = 1;
+               goto out_unlock;
+       }
+
+       /* Copy the input desc into GHCB shared buffer */
+       data = (struct snp_psc_desc *)ghcb->shared_buffer;
+       memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
+
+       /*
+        * As per the GHCB specification, the hypervisor can resume the guest
+        * before processing all the entries. Check whether all the entries
+        * are processed. If not, then keep retrying. Note, the hypervisor
+        * will update the data memory directly to indicate the status, so
+        * reference the data->hdr everywhere.
+        *
+        * The strategy here is to wait for the hypervisor to change the page
+        * state in the RMP table before guest accesses the memory pages. If the
+        * page state change was not successful, then later memory access will
+        * result in a crash.
+        */
+       cur_entry = data->hdr.cur_entry;
+       end_entry = data->hdr.end_entry;
+
+       while (data->hdr.cur_entry <= data->hdr.end_entry) {
+               ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
+
+               /* This will advance the shared buffer data points to. */
+               ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+
+               /*
+                * Page State Change VMGEXIT can pass error code through
+                * exit_info_2.
+                */
+               if (WARN(ret || ghcb->save.sw_exit_info_2,
+                        "SNP: PSC failed ret=%d exit_info_2=%llx\n",
+                        ret, ghcb->save.sw_exit_info_2)) {
+                       ret = 1;
+                       goto out;
+               }
+
+               /* Verify that reserved bit is not set */
+               if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
+                       ret = 1;
+                       goto out;
+               }
+
+               /*
+                * Sanity check that entry processing is not going backwards.
+                * This will happen only if hypervisor is tricking us.
+                */
+               if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+                        end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
+                       ret = 1;
+                       goto out;
+               }
+       }
+
+out:
+       __sev_put_ghcb(&state);
+
+out_unlock:
+       local_irq_restore(flags);
+
+       return ret;
+}
+
+static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
+                             unsigned long vaddr_end, int op)
+{
+       struct psc_hdr *hdr;
+       struct psc_entry *e;
+       unsigned long pfn;
+       int i;
+
+       hdr = &data->hdr;
+       e = data->entries;
+
+       memset(data, 0, sizeof(*data));
+       i = 0;
+
+       while (vaddr < vaddr_end) {
+               if (is_vmalloc_addr((void *)vaddr))
+                       pfn = vmalloc_to_pfn((void *)vaddr);
+               else
+                       pfn = __pa(vaddr) >> PAGE_SHIFT;
+
+               e->gfn = pfn;
+               e->operation = op;
+               hdr->end_entry = i;
+
+               /*
+                * Current SNP implementation doesn't keep track of the RMP page
+                * size so use 4K for simplicity.
+                */
+               e->pagesize = RMP_PG_SIZE_4K;
+
+               vaddr = vaddr + PAGE_SIZE;
+               e++;
+               i++;
+       }
+
+       if (vmgexit_psc(data))
+               sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+}
+
+static void set_pages_state(unsigned long vaddr, unsigned int npages, int op)
+{
+       unsigned long vaddr_end, next_vaddr;
+       struct snp_psc_desc *desc;
+
+       desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
+       if (!desc)
+               panic("SNP: failed to allocate memory for PSC descriptor\n");
+
+       vaddr = vaddr & PAGE_MASK;
+       vaddr_end = vaddr + (npages << PAGE_SHIFT);
+
+       while (vaddr < vaddr_end) {
+               /* Calculate the last vaddr that fits in one struct snp_psc_desc. */
+               next_vaddr = min_t(unsigned long, vaddr_end,
+                                  (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
+
+               __set_pages_state(desc, vaddr, next_vaddr, op);
+
+               vaddr = next_vaddr;
+       }
+
+       kfree(desc);
+}
+
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       pvalidate_pages(vaddr, npages, false);
+
+       set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
+}
+
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
+
+       pvalidate_pages(vaddr, npages, true);
+}
+
+static int snp_set_vmsa(void *va, bool vmsa)
+{
+       u64 attrs;
+
+       /*
+        * Running at VMPL0 allows the kernel to change the VMSA bit for a page
+        * using the RMPADJUST instruction. However, for the instruction to
+        * succeed it must target the permissions of a lesser privileged
+        * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST
+        * instruction in the AMD64 APM Volume 3).
+        */
+       attrs = 1;
+       if (vmsa)
+               attrs |= RMPADJUST_VMSA_PAGE_BIT;
+
+       return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
+}
+
+#define __ATTR_BASE            (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
+#define INIT_CS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
+#define INIT_DS_ATTRIBS                (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
+
+#define INIT_LDTR_ATTRIBS      (SVM_SELECTOR_P_MASK | 2)
+#define INIT_TR_ATTRIBS                (SVM_SELECTOR_P_MASK | 3)
+
+static void *snp_alloc_vmsa_page(void)
+{
+       struct page *p;
+
+       /*
+        * Allocate VMSA page to work around the SNP erratum where the CPU will
+        * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
+        * collides with the RMP entry of VMSA page. The recommended workaround
+        * is to not use a large page.
+        *
+        * Allocate an 8k page which is also 8k-aligned.
+        */
+       p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+       if (!p)
+               return NULL;
+
+       split_page(p, 1);
+
+       /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
+       __free_page(p);
+
+       return page_address(p + 1);
+}
+
+static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
+{
+       int err;
+
+       err = snp_set_vmsa(vmsa, false);
+       if (err)
+               pr_err("clear VMSA page failed (%u), leaking page\n", err);
+       else
+               free_page((unsigned long)vmsa);
+}
+
+static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip)
+{
+       struct sev_es_save_area *cur_vmsa, *vmsa;
+       struct ghcb_state state;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       u8 sipi_vector;
+       int cpu, ret;
+       u64 cr4;
+
+       /*
+        * The hypervisor SNP feature support check has happened earlier, just check
+        * the AP_CREATION one here.
+        */
+       if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
+               return -EOPNOTSUPP;
+
+       /*
+        * Verify the desired start IP against the known trampoline start IP
+        * to catch any future new trampolines that may be introduced that
+        * would require a new protected guest entry point.
+        */
+       if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
+                     "Unsupported SNP start_ip: %lx\n", start_ip))
+               return -EINVAL;
+
+       /* Override start_ip with known protected guest start IP */
+       start_ip = real_mode_header->sev_es_trampoline_start;
+
+       /* Find the logical CPU for the APIC ID */
+       for_each_present_cpu(cpu) {
+               if (arch_match_cpu_phys_id(cpu, apic_id))
+                       break;
+       }
+       if (cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       cur_vmsa = per_cpu(sev_vmsa, cpu);
+
+       /*
+        * A new VMSA is created each time because there is no guarantee that
+        * the current VMSA is the kernels or that the vCPU is not running. If
+        * an attempt was done to use the current VMSA with a running vCPU, a
+        * #VMEXIT of that vCPU would wipe out all of the settings being done
+        * here.
+        */
+       vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page();
+       if (!vmsa)
+               return -ENOMEM;
+
+       /* CR4 should maintain the MCE value */
+       cr4 = native_read_cr4() & X86_CR4_MCE;
+
+       /* Set the CS value based on the start_ip converted to a SIPI vector */
+       sipi_vector             = (start_ip >> 12);
+       vmsa->cs.base           = sipi_vector << 12;
+       vmsa->cs.limit          = AP_INIT_CS_LIMIT;
+       vmsa->cs.attrib         = INIT_CS_ATTRIBS;
+       vmsa->cs.selector       = sipi_vector << 8;
+
+       /* Set the RIP value based on start_ip */
+       vmsa->rip               = start_ip & 0xfff;
+
+       /* Set AP INIT defaults as documented in the APM */
+       vmsa->ds.limit          = AP_INIT_DS_LIMIT;
+       vmsa->ds.attrib         = INIT_DS_ATTRIBS;
+       vmsa->es                = vmsa->ds;
+       vmsa->fs                = vmsa->ds;
+       vmsa->gs                = vmsa->ds;
+       vmsa->ss                = vmsa->ds;
+
+       vmsa->gdtr.limit        = AP_INIT_GDTR_LIMIT;
+       vmsa->ldtr.limit        = AP_INIT_LDTR_LIMIT;
+       vmsa->ldtr.attrib       = INIT_LDTR_ATTRIBS;
+       vmsa->idtr.limit        = AP_INIT_IDTR_LIMIT;
+       vmsa->tr.limit          = AP_INIT_TR_LIMIT;
+       vmsa->tr.attrib         = INIT_TR_ATTRIBS;
+
+       vmsa->cr4               = cr4;
+       vmsa->cr0               = AP_INIT_CR0_DEFAULT;
+       vmsa->dr7               = DR7_RESET_VALUE;
+       vmsa->dr6               = AP_INIT_DR6_DEFAULT;
+       vmsa->rflags            = AP_INIT_RFLAGS_DEFAULT;
+       vmsa->g_pat             = AP_INIT_GPAT_DEFAULT;
+       vmsa->xcr0              = AP_INIT_XCR0_DEFAULT;
+       vmsa->mxcsr             = AP_INIT_MXCSR_DEFAULT;
+       vmsa->x87_ftw           = AP_INIT_X87_FTW_DEFAULT;
+       vmsa->x87_fcw           = AP_INIT_X87_FCW_DEFAULT;
+
+       /* SVME must be set. */
+       vmsa->efer              = EFER_SVME;
+
+       /*
+        * Set the SNP-specific fields for this VMSA:
+        *   VMPL level
+        *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
+        */
+       vmsa->vmpl              = 0;
+       vmsa->sev_features      = sev_status >> 2;
+
+       /* Switch the page over to a VMSA page now that it is initialized */
+       ret = snp_set_vmsa(vmsa, true);
+       if (ret) {
+               pr_err("set VMSA page failed (%u)\n", ret);
+               free_page((unsigned long)vmsa);
+
+               return -EINVAL;
+       }
+
+       /* Issue VMGEXIT AP Creation NAE event */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+
+       vc_ghcb_invalidate(ghcb);
+       ghcb_set_rax(ghcb, vmsa->sev_features);
+       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
+       ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE);
+       ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
+
+       sev_es_wr_ghcb_msr(__pa(ghcb));
+       VMGEXIT();
+
+       if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           lower_32_bits(ghcb->save.sw_exit_info_1)) {
+               pr_err("SNP AP Creation error\n");
+               ret = -EINVAL;
+       }
+
+       __sev_put_ghcb(&state);
+
+       local_irq_restore(flags);
+
+       /* Perform cleanup if there was an error */
+       if (ret) {
+               snp_cleanup_vmsa(vmsa);
+               vmsa = NULL;
+       }
+
+       /* Free up any previous VMSA page */
+       if (cur_vmsa)
+               snp_cleanup_vmsa(cur_vmsa);
+
+       /* Record the current VMSA page */
+       per_cpu(sev_vmsa, cpu) = vmsa;
+
+       return ret;
+}
+
+void snp_set_wakeup_secondary_cpu(void)
+{
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return;
+
+       /*
+        * Always set this override if SNP is enabled. This makes it the
+        * required method to start APs under SNP. If the hypervisor does
+        * not support AP creation, then no APs will be started.
+        */
+       apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit;
+}
+
+int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
 {
        u16 startup_cs, startup_ip;
        phys_addr_t jump_table_pa;
@@ -644,15 +1223,39 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
        return ret;
 }
 
-/*
- * This function runs on the first #VC exception after the kernel
- * switched to virtual addresses.
- */
-static bool __init sev_es_setup_ghcb(void)
+static void snp_register_per_cpu_ghcb(void)
+{
+       struct sev_es_runtime_data *data;
+       struct ghcb *ghcb;
+
+       data = this_cpu_read(runtime_data);
+       ghcb = &data->ghcb_page;
+
+       snp_register_ghcb_early(__pa(ghcb));
+}
+
+void setup_ghcb(void)
 {
+       if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+               return;
+
        /* First make sure the hypervisor talks a supported protocol. */
        if (!sev_es_negotiate_protocol())
-               return false;
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+       /*
+        * Check whether the runtime #VC exception handler is active. It uses
+        * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
+        *
+        * If SNP is active, register the per-CPU GHCB page so that the runtime
+        * exception handler can use it.
+        */
+       if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
+               if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+                       snp_register_per_cpu_ghcb();
+
+               return;
+       }
 
        /*
         * Clear the boot_ghcb. The first exception comes in before the bss
@@ -663,7 +1266,9 @@ static bool __init sev_es_setup_ghcb(void)
        /* Alright - Make the boot-ghcb public */
        boot_ghcb = &boot_ghcb_page;
 
-       return true;
+       /* SNP guest requires that GHCB GPA must be registered. */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               snp_register_ghcb_early(__pa(&boot_ghcb_page));
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -766,6 +1371,17 @@ void __init sev_es_init_vc_handling(void)
        if (!sev_es_check_cpu_features())
                panic("SEV-ES CPU Features missing");
 
+       /*
+        * SNP is supported in v2 of the GHCB spec which mandates support for HV
+        * features.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+               sev_hv_features = get_hv_features();
+
+               if (!(sev_hv_features & GHCB_HV_FT_SNP))
+                       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+       }
+
        /* Enable SEV-ES special handling */
        static_branch_enable(&sev_es_enable_key);
 
@@ -1337,7 +1953,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
                show_regs(regs);
 
                /* Ask hypervisor to sev_es_terminate */
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
+               sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
 
                /* If that fails and we get here - just panic */
                panic("Returned from Terminate-Request to Hypervisor\n");
@@ -1383,10 +1999,6 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
        struct es_em_ctxt ctxt;
        enum es_result result;
 
-       /* Do initial setup or terminate the guest */
-       if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
-               sev_es_terminate(GHCB_SEV_ES_GEN_REQ);
-
        vc_ghcb_invalidate(boot_ghcb);
 
        result = vc_init_em_ctxt(&ctxt, regs, exit_code);
@@ -1425,6 +2037,215 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
 fail:
        show_regs(regs);
 
-       while (true)
-               halt();
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+}
+
+/*
+ * Initial set up of SNP relies on information provided by the
+ * Confidential Computing blob, which can be passed to the kernel
+ * in the following ways, depending on how it is booted:
+ *
+ * - when booted via the boot/decompress kernel:
+ *   - via boot_params
+ *
+ * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
+ *   - via a setup_data entry, as defined by the Linux Boot Protocol
+ *
+ * Scan for the blob in that order.
+ */
+static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       /* Boot kernel would have passed the CC blob via boot_params. */
+       if (bp->cc_blob_address) {
+               cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
+               goto found_cc_info;
+       }
+
+       /*
+        * If kernel was booted directly, without the use of the
+        * boot/decompression kernel, the CC blob may have been passed via
+        * setup_data instead.
+        */
+       cc_info = find_cc_blob_setup_data(bp);
+       if (!cc_info)
+               return NULL;
+
+found_cc_info:
+       if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
+               snp_abort();
+
+       return cc_info;
+}
+
+bool __init snp_init(struct boot_params *bp)
+{
+       struct cc_blob_sev_info *cc_info;
+
+       if (!bp)
+               return false;
+
+       cc_info = find_cc_blob(bp);
+       if (!cc_info)
+               return false;
+
+       setup_cpuid_table(cc_info);
+
+       /*
+        * The CC blob will be used later to access the secrets page. Cache
+        * it here like the boot kernel does.
+        */
+       bp->cc_blob_address = (u32)(unsigned long)cc_info;
+
+       return true;
+}
+
+void __init snp_abort(void)
+{
+       sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+}
+
+static void dump_cpuid_table(void)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+       int i = 0;
+
+       pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
+               cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
+
+       for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
+               const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
+
+               pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
+                       i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
+                       fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
+       }
+}
+
+/*
+ * It is useful from an auditing/testing perspective to provide an easy way
+ * for the guest owner to know that the CPUID table has been initialized as
+ * expected, but that initialization happens too early in boot to print any
+ * sort of indicator, and there's not really any other good place to do it,
+ * so do it here.
+ */
+static int __init report_cpuid_table(void)
+{
+       const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+       if (!cpuid_table->count)
+               return 0;
+
+       pr_info("Using SNP CPUID table, %d entries present.\n",
+               cpuid_table->count);
+
+       if (sev_cfg.debug)
+               dump_cpuid_table();
+
+       return 0;
+}
+arch_initcall(report_cpuid_table);
+
+static int __init init_sev_config(char *str)
+{
+       char *s;
+
+       while ((s = strsep(&str, ","))) {
+               if (!strcmp(s, "debug")) {
+                       sev_cfg.debug = true;
+                       continue;
+               }
+
+               pr_info("SEV command-line option '%s' was not recognized\n", s);
+       }
+
+       return 1;
+}
+__setup("sev=", init_sev_config);
+
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err)
+{
+       struct ghcb_state state;
+       struct es_em_ctxt ctxt;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       int ret;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return -ENODEV;
+
+       if (!fw_err)
+               return -EINVAL;
+
+       /*
+        * __sev_get_ghcb() needs to run with IRQs disabled because it is using
+        * a per-CPU GHCB.
+        */
+       local_irq_save(flags);
+
+       ghcb = __sev_get_ghcb(&state);
+       if (!ghcb) {
+               ret = -EIO;
+               goto e_restore_irq;
+       }
+
+       vc_ghcb_invalidate(ghcb);
+
+       if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+               ghcb_set_rax(ghcb, input->data_gpa);
+               ghcb_set_rbx(ghcb, input->data_npages);
+       }
+
+       ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+       if (ret)
+               goto e_put;
+
+       if (ghcb->save.sw_exit_info_2) {
+               /* Number of expected pages are returned in RBX */
+               if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
+                   ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
+                       input->data_npages = ghcb_get_rbx(ghcb);
+
+               *fw_err = ghcb->save.sw_exit_info_2;
+
+               ret = -EIO;
+       }
+
+e_put:
+       __sev_put_ghcb(&state);
+e_restore_irq:
+       local_irq_restore(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(snp_issue_guest_request);
+
+static struct platform_device sev_guest_device = {
+       .name           = "sev-guest",
+       .id             = -1,
+};
+
+static int __init snp_init_platform_device(void)
+{
+       struct sev_guest_platform_data data;
+       u64 gpa;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               return -ENODEV;
+
+       gpa = get_secrets_page();
+       if (!gpa)
+               return -ENODEV;
+
+       data.secrets_gpa = gpa;
+       if (platform_device_add_data(&sev_guest_device, &data, sizeof(data)))
+               return -ENODEV;
+
+       if (platform_device_register(&sev_guest_device))
+               return -ENODEV;
+
+       pr_info("SNP guest platform device initialized.\n");
+       return 0;
 }
+device_initcall(snp_init_platform_device);
index e439eb14325fa131057e93426e5f78aacad96262..9c7265b524c73ac26c0bf419ab2d0a0d5745d3bf 100644 (file)
@@ -93,7 +93,7 @@ static bool restore_sigcontext(struct pt_regs *regs,
                return false;
 
 #ifdef CONFIG_X86_32
-       set_user_gs(regs, sc.gs);
+       loadsegment(gs, sc.gs);
        regs->fs = sc.fs;
        regs->es = sc.es;
        regs->ds = sc.ds;
@@ -146,8 +146,10 @@ __unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask)
 {
 #ifdef CONFIG_X86_32
-       unsafe_put_user(get_user_gs(regs),
-                                 (unsigned int __user *)&sc->gs, Efault);
+       unsigned int gs;
+       savesegment(gs, gs);
+
+       unsafe_put_user(gs,       (unsigned int __user *)&sc->gs, Efault);
        unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
        unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
        unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
index 2ef14772dc047d433e54e97a030026a69f6439df..5e7f9532a10d07760b053d6d56bcd30e463b902b 100644 (file)
@@ -56,7 +56,6 @@
 #include <linux/numa.h>
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
-#include <linux/syscore_ops.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -82,6 +81,7 @@
 #include <asm/spec-ctrl.h>
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
+#include <asm/sev.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -187,7 +187,7 @@ static void smp_callin(void)
         */
        set_cpu_sibling_map(raw_smp_processor_id());
 
-       init_freq_invariance(true, false);
+       ap_init_aperfmperf();
 
        /*
         * Get our bogomips.
@@ -1082,6 +1082,11 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        unsigned long boot_error = 0;
        unsigned long timeout;
 
+#ifdef CONFIG_X86_64
+       /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
+       if (apic->wakeup_secondary_cpu_64)
+               start_ip = real_mode_header->trampoline_start64;
+#endif
        idle->thread.sp = (unsigned long)task_pt_regs(idle);
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
        initial_code = (unsigned long)start_secondary;
@@ -1123,11 +1128,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 
        /*
         * Wake up a CPU in difference cases:
-        * - Use the method in the APIC driver if it's defined
+        * - Use a method from the APIC driver if one defined, with wakeup
+        *   straight to 64-bit mode preferred over wakeup to RM.
         * Otherwise,
         * - Use an INIT boot APIC message for APs or NMI for BSP.
         */
-       if (apic->wakeup_secondary_cpu)
+       if (apic->wakeup_secondary_cpu_64)
+               boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+       else if (apic->wakeup_secondary_cpu)
                boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
        else
                boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
@@ -1397,7 +1405,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
        smp_prepare_cpus_common();
 
-       init_freq_invariance(false, false);
        smp_sanity_check();
 
        switch (apic_intr_mode) {
@@ -1430,6 +1437,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        smp_quirk_init_udelay();
 
        speculative_store_bypass_ht_init();
+
+       snp_set_wakeup_secondary_cpu();
 }
 
 void arch_thaw_secondary_cpus_begin(void)
@@ -1847,357 +1856,3 @@ void native_play_dead(void)
 }
 
 #endif
-
-#ifdef CONFIG_X86_64
-/*
- * APERF/MPERF frequency ratio computation.
- *
- * The scheduler wants to do frequency invariant accounting and needs a <1
- * ratio to account for the 'current' frequency, corresponding to
- * freq_curr / freq_max.
- *
- * Since the frequency freq_curr on x86 is controlled by micro-controller and
- * our P-state setting is little more than a request/hint, we need to observe
- * the effective frequency 'BusyMHz', i.e. the average frequency over a time
- * interval after discarding idle time. This is given by:
- *
- *   BusyMHz = delta_APERF / delta_MPERF * freq_base
- *
- * where freq_base is the max non-turbo P-state.
- *
- * The freq_max term has to be set to a somewhat arbitrary value, because we
- * can't know which turbo states will be available at a given point in time:
- * it all depends on the thermal headroom of the entire package. We set it to
- * the turbo level with 4 cores active.
- *
- * Benchmarks show that's a good compromise between the 1C turbo ratio
- * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
- * which would ignore the entire turbo range (a conspicuous part, making
- * freq_curr/freq_max always maxed out).
- *
- * An exception to the heuristic above is the Atom uarch, where we choose the
- * highest turbo level for freq_max since Atom's are generally oriented towards
- * power efficiency.
- *
- * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
- * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
- */
-
-DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
-
-static DEFINE_PER_CPU(u64, arch_prev_aperf);
-static DEFINE_PER_CPU(u64, arch_prev_mperf);
-static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
-static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
-
-void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-       arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
-                                       arch_turbo_freq_ratio;
-}
-EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
-
-static bool turbo_disabled(void)
-{
-       u64 misc_en;
-       int err;
-
-       err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
-       if (err)
-               return false;
-
-       return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
-}
-
-static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
-       int err;
-
-       err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
-       *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
-
-       return true;
-}
-
-#define X86_MATCH(model)                                       \
-       X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,            \
-               INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
-
-static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
-       X86_MATCH(XEON_PHI_KNL),
-       X86_MATCH(XEON_PHI_KNM),
-       {}
-};
-
-static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
-       X86_MATCH(SKYLAKE_X),
-       {}
-};
-
-static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
-       X86_MATCH(ATOM_GOLDMONT),
-       X86_MATCH(ATOM_GOLDMONT_D),
-       X86_MATCH(ATOM_GOLDMONT_PLUS),
-       {}
-};
-
-static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
-                               int num_delta_fratio)
-{
-       int fratio, delta_fratio, found;
-       int err, i;
-       u64 msr;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
-       if (err)
-               return false;
-
-       fratio = (msr >> 8) & 0xFF;
-       i = 16;
-       found = 0;
-       do {
-               if (found >= num_delta_fratio) {
-                       *turbo_freq = fratio;
-                       return true;
-               }
-
-               delta_fratio = (msr >> (i + 5)) & 0x7;
-
-               if (delta_fratio) {
-                       found += 1;
-                       fratio -= delta_fratio;
-               }
-
-               i += 8;
-       } while (i < 64);
-
-       return true;
-}
-
-static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
-{
-       u64 ratios, counts;
-       u32 group_size;
-       int err, i;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
-       if (err)
-               return false;
-
-       for (i = 0; i < 64; i += 8) {
-               group_size = (counts >> i) & 0xFF;
-               if (group_size >= size) {
-                       *turbo_freq = (ratios >> i) & 0xFF;
-                       return true;
-               }
-       }
-
-       return false;
-}
-
-static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
-{
-       u64 msr;
-       int err;
-
-       err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
-       if (err)
-               return false;
-
-       err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
-       if (err)
-               return false;
-
-       *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
-       *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
-
-       /* The CPU may have less than 4 cores */
-       if (!*turbo_freq)
-               *turbo_freq = msr & 0xFF;         /* 1C turbo    */
-
-       return true;
-}
-
-static bool intel_set_max_freq_ratio(void)
-{
-       u64 base_freq, turbo_freq;
-       u64 turbo_ratio;
-
-       if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
-               goto out;
-
-       if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
-           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
-               goto out;
-
-       if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
-           knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
-               goto out;
-
-       if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
-           skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
-               goto out;
-
-       if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
-               goto out;
-
-       return false;
-
-out:
-       /*
-        * Some hypervisors advertise X86_FEATURE_APERFMPERF
-        * but then fill all MSR's with zeroes.
-        * Some CPUs have turbo boost but don't declare any turbo ratio
-        * in MSR_TURBO_RATIO_LIMIT.
-        */
-       if (!base_freq || !turbo_freq) {
-               pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
-               return false;
-       }
-
-       turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
-       if (!turbo_ratio) {
-               pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
-               return false;
-       }
-
-       arch_turbo_freq_ratio = turbo_ratio;
-       arch_set_max_freq_ratio(turbo_disabled());
-
-       return true;
-}
-
-static void init_counter_refs(void)
-{
-       u64 aperf, mperf;
-
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-
-       this_cpu_write(arch_prev_aperf, aperf);
-       this_cpu_write(arch_prev_mperf, mperf);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static struct syscore_ops freq_invariance_syscore_ops = {
-       .resume = init_counter_refs,
-};
-
-static void register_freq_invariance_syscore_ops(void)
-{
-       /* Bail out if registered already. */
-       if (freq_invariance_syscore_ops.node.prev)
-               return;
-
-       register_syscore_ops(&freq_invariance_syscore_ops);
-}
-#else
-static inline void register_freq_invariance_syscore_ops(void) {}
-#endif
-
-void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-       bool ret = false;
-
-       if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
-               return;
-
-       if (secondary) {
-               if (static_branch_likely(&arch_scale_freq_key)) {
-                       init_counter_refs();
-               }
-               return;
-       }
-
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               ret = intel_set_max_freq_ratio();
-       else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-               if (!cppc_ready) {
-                       return;
-               }
-               ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
-       }
-
-       if (ret) {
-               init_counter_refs();
-               static_branch_enable(&arch_scale_freq_key);
-               register_freq_invariance_syscore_ops();
-               pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
-       } else {
-               pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
-       }
-}
-
-static void disable_freq_invariance_workfn(struct work_struct *work)
-{
-       static_branch_disable(&arch_scale_freq_key);
-}
-
-static DECLARE_WORK(disable_freq_invariance_work,
-                   disable_freq_invariance_workfn);
-
-DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
-
-void arch_scale_freq_tick(void)
-{
-       u64 freq_scale;
-       u64 aperf, mperf;
-       u64 acnt, mcnt;
-
-       if (!arch_scale_freq_invariant())
-               return;
-
-       rdmsrl(MSR_IA32_APERF, aperf);
-       rdmsrl(MSR_IA32_MPERF, mperf);
-
-       acnt = aperf - this_cpu_read(arch_prev_aperf);
-       mcnt = mperf - this_cpu_read(arch_prev_mperf);
-
-       this_cpu_write(arch_prev_aperf, aperf);
-       this_cpu_write(arch_prev_mperf, mperf);
-
-       if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
-               goto error;
-
-       if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
-               goto error;
-
-       freq_scale = div64_u64(acnt, mcnt);
-       if (!freq_scale)
-               goto error;
-
-       if (freq_scale > SCHED_CAPACITY_SCALE)
-               freq_scale = SCHED_CAPACITY_SCALE;
-
-       this_cpu_write(arch_freq_scale, freq_scale);
-       return;
-
-error:
-       pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
-       schedule_work(&disable_freq_invariance_work);
-}
-#endif /* CONFIG_X86_64 */
index 531fb4cbb63fd1d1e29ef2cb80d4b27eb0b8ed0c..aa72cefdd5be61552e820f6ff65a949bf424fd97 100644 (file)
@@ -12,10 +12,9 @@ enum insn_type {
 };
 
 /*
- * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
- * The REX.W cancels the effect of any data16.
+ * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
  */
-static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
+static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
 
 static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
 
index 1563fb9950059d8724f0b7b252aec316e3349245..d62b2cb85ceae55407d02a7ff5678e0df67f3ec3 100644 (file)
@@ -62,6 +62,7 @@
 #include <asm/insn.h>
 #include <asm/insn-eval.h>
 #include <asm/vdso.h>
+#include <asm/tdx.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -686,13 +687,40 @@ static bool try_fixup_enqcmd_gp(void)
 #endif
 }
 
+static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
+                                   unsigned long error_code, const char *str)
+{
+       if (fixup_exception(regs, trapnr, error_code, 0))
+               return true;
+
+       current->thread.error_code = error_code;
+       current->thread.trap_nr = trapnr;
+
+       /*
+        * To be potentially processing a kprobe fault and to trust the result
+        * from kprobe_running(), we have to be non-preemptible.
+        */
+       if (!preemptible() && kprobe_running() &&
+           kprobe_fault_handler(regs, trapnr))
+               return true;
+
+       return notify_die(DIE_GPF, str, regs, error_code, trapnr, SIGSEGV) == NOTIFY_STOP;
+}
+
+static void gp_user_force_sig_segv(struct pt_regs *regs, int trapnr,
+                                  unsigned long error_code, const char *str)
+{
+       current->thread.error_code = error_code;
+       current->thread.trap_nr = trapnr;
+       show_signal(current, SIGSEGV, "", str, regs, error_code);
+       force_sig(SIGSEGV);
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
        char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
        enum kernel_gp_hint hint = GP_NO_HINT;
-       struct task_struct *tsk;
        unsigned long gp_addr;
-       int ret;
 
        if (user_mode(regs) && try_fixup_enqcmd_gp())
                return;
@@ -711,40 +739,18 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
                return;
        }
 
-       tsk = current;
-
        if (user_mode(regs)) {
                if (fixup_iopl_exception(regs))
                        goto exit;
 
-               tsk->thread.error_code = error_code;
-               tsk->thread.trap_nr = X86_TRAP_GP;
-
                if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
                        goto exit;
 
-               show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
-               force_sig(SIGSEGV);
+               gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
                goto exit;
        }
 
-       if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
-               goto exit;
-
-       tsk->thread.error_code = error_code;
-       tsk->thread.trap_nr = X86_TRAP_GP;
-
-       /*
-        * To be potentially processing a kprobe fault and to trust the result
-        * from kprobe_running(), we have to be non-preemptible.
-        */
-       if (!preemptible() &&
-           kprobe_running() &&
-           kprobe_fault_handler(regs, X86_TRAP_GP))
-               goto exit;
-
-       ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
-       if (ret == NOTIFY_STOP)
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
                goto exit;
 
        if (error_code)
@@ -892,14 +898,10 @@ sync:
 }
 #endif
 
-struct bad_iret_stack {
-       void *error_entry_ret;
-       struct pt_regs regs;
-};
-
-asmlinkage __visible noinstr
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
 {
+       struct pt_regs tmp, *new_stack;
+
        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
@@ -908,19 +910,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
         * just below the IRET frame) and we want to pretend that the
         * exception came from the IRET target.
         */
-       struct bad_iret_stack tmp, *new_stack =
-               (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+       new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
        /* Copy the IRET target to the temporary storage. */
-       __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
+       __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
 
        /* Copy the remainder of the stack from the current stack. */
-       __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
+       __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
 
        /* Update the entry stack */
        __memcpy(new_stack, &tmp, sizeof(tmp));
 
-       BUG_ON(!user_mode(&new_stack->regs));
+       BUG_ON(!user_mode(new_stack));
        return new_stack;
 }
 #endif
@@ -1343,6 +1344,91 @@ DEFINE_IDTENTRY(exc_device_not_available)
        }
 }
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+#define VE_FAULT_STR "VE fault"
+
+static void ve_raise_fault(struct pt_regs *regs, long error_code)
+{
+       if (user_mode(regs)) {
+               gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
+               return;
+       }
+
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+               return;
+
+       die_addr(VE_FAULT_STR, regs, error_code, 0);
+}
+
+/*
+ * Virtualization Exceptions (#VE) are delivered to TDX guests due to
+ * specific guest actions which may happen in either user space or the
+ * kernel:
+ *
+ *  * Specific instructions (WBINVD, for example)
+ *  * Specific MSR accesses
+ *  * Specific CPUID leaf accesses
+ *  * Access to specific guest physical addresses
+ *
+ * In the settings that Linux will run in, virtualization exceptions are
+ * never generated on accesses to normal, TD-private memory that has been
+ * accepted (by BIOS or with tdx_enc_status_changed()).
+ *
+ * Syscall entry code has a critical window where the kernel stack is not
+ * yet set up. Any exception in this window leads to hard to debug issues
+ * and can be exploited for privilege escalation. Exceptions in the NMI
+ * entry code also cause issues. Returning from the exception handler with
+ * IRET will re-enable NMIs and nested NMI will corrupt the NMI stack.
+ *
+ * For these reasons, the kernel avoids #VEs during the syscall gap and
+ * the NMI entry code. Entry code paths do not access TD-shared memory,
+ * MMIO regions, use #VE triggering MSRs, instructions, or CPUID leaves
+ * that might generate #VE. VMM can remove memory from TD at any point,
+ * but access to unaccepted (or missing) private memory leads to VM
+ * termination, not to #VE.
+ *
+ * Similarly to page faults and breakpoints, #VEs are allowed in NMI
+ * handlers once the kernel is ready to deal with nested NMIs.
+ *
+ * During #VE delivery, all interrupts, including NMIs, are blocked until
+ * TDGETVEINFO is called. It prevents #VE nesting until the kernel reads
+ * the VE info.
+ *
+ * If a guest kernel action which would normally cause a #VE occurs in
+ * the interrupt-disabled region before TDGETVEINFO, a #DF (fault
+ * exception) is delivered to the guest which will result in an oops.
+ *
+ * The entry code has been audited carefully for following these expectations.
+ * Changes in the entry code have to be audited for correctness vs. this
+ * aspect. Similarly to #PF, #VE in these places will expose kernel to
+ * privilege escalation or may lead to random crashes.
+ */
+DEFINE_IDTENTRY(exc_virtualization_exception)
+{
+       struct ve_info ve;
+
+       /*
+        * NMIs/Machine-checks/Interrupts will be in a disabled state
+        * till TDGETVEINFO TDCALL is executed. This ensures that VE
+        * info cannot be overwritten by a nested #VE.
+        */
+       tdx_get_ve_info(&ve);
+
+       cond_local_irq_enable(regs);
+
+       /*
+        * If tdx_handle_virt_exception() could not process
+        * it successfully, treat it as #GP(0) and handle it.
+        */
+       if (!tdx_handle_virt_exception(regs, &ve))
+               ve_raise_fault(regs, 0);
+
+       cond_local_irq_disable(regs);
+}
+
+#endif
+
 #ifdef CONFIG_X86_32
 DEFINE_IDTENTRY_SW(iret_error)
 {
index 794fdef2501ab47f58ef375854fea455c3d70304..38185aedf7d1622103e83f7e771eb724a02196f2 100644 (file)
@@ -339,11 +339,11 @@ static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
        struct stack_info *info = &state->stack_info;
        void *addr = (void *)_addr;
 
-       if (!on_stack(info, addr, len) &&
-           (get_stack_info(addr, state->task, info, &state->stack_mask)))
-               return false;
+       if (on_stack(info, addr, len))
+               return true;
 
-       return true;
+       return !get_stack_info(addr, state->task, info, &state->stack_mask) &&
+               on_stack(info, addr, len);
 }
 
 static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
index c21bcd668284259d8f8833205a936106a8010af6..e9e803a4d44cf6ffe8938071a18b305feafdbf81 100644 (file)
@@ -151,7 +151,7 @@ exit_vm86:
 
        memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 
-       lazy_load_gs(vm86->regs32.gs);
+       loadsegment(gs, vm86->regs32.gs);
 
        regs->pt.ax = retval;
        return;
@@ -325,7 +325,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
  * Save old state
  */
        vm86->saved_sp0 = tsk->thread.sp0;
-       lazy_save_gs(vm86->regs32.gs);
+       savesegment(gs, vm86->regs32.gs);
 
        /* make room for real-mode segments */
        preempt_disable();
index b24ca7f4ed7c8b6603d6d54ec846504c04f0c742..de6d44e07e348fcdae81841f8dd516fdea9781bf 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
 #include <asm/sgx.h>
+#include <asm/cpuid.h>
 #include "cpuid.h"
 #include "lapic.h"
 #include "mmu.h"
@@ -744,24 +745,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
        cpuid_count(entry->function, entry->index,
                    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
 
-       switch (function) {
-       case 4:
-       case 7:
-       case 0xb:
-       case 0xd:
-       case 0xf:
-       case 0x10:
-       case 0x12:
-       case 0x14:
-       case 0x17:
-       case 0x18:
-       case 0x1d:
-       case 0x1e:
-       case 0x1f:
-       case 0x8000001d:
+       if (cpuid_function_is_indexed(function))
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-               break;
-       }
 
        return entry;
 }
@@ -887,6 +872,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                union cpuid10_eax eax;
                union cpuid10_edx edx;
 
+               if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
+                       entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+                       break;
+               }
+
                perf_get_x86_pmu_capability(&cap);
 
                /*
@@ -1085,12 +1075,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
        case 0x80000000:
                entry->eax = min(entry->eax, 0x80000021);
                /*
-                * Serializing LFENCE is reported in a multitude of ways,
-                * and NullSegClearsBase is not reported in CPUID on Zen2;
-                * help userspace by providing the CPUID leaf ourselves.
+                * Serializing LFENCE is reported in a multitude of ways, and
+                * NullSegClearsBase is not reported in CPUID on Zen2; help
+                * userspace by providing the CPUID leaf ourselves.
+                *
+                * However, only do it if the host has CPUID leaf 0x8000001d.
+                * QEMU thinks that it can query the host blindly for that
+                * CPUID leaf if KVM reports that it supports 0x8000001d or
+                * above.  The processor merrily returns values from the
+                * highest Intel leaf which QEMU tries to use as the guest's
+                * 0x8000001d.  Even worse, this can result in an infinite
+                * loop if said highest leaf has no subleaves indexed by ECX.
                 */
-               if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
-                   || !static_cpu_has_bug(X86_BUG_NULL_SEG))
+               if (entry->eax >= 0x8000001d &&
+                   (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
+                    || !static_cpu_has_bug(X86_BUG_NULL_SEG)))
                        entry->eax = max(entry->eax, 0x80000021);
                break;
        case 0x80000001:
index 123b677111c58411e38e7fd90aa50c83e0291102..a0702b6be3e8979b894ce33c070bc613411b4adc 100644 (file)
@@ -1135,11 +1135,13 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
        BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
        BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
 
+       mutex_lock(&hv->hv_lock);
+
        if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
+           hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
            hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
-               return;
+               goto out_unlock;
 
-       mutex_lock(&hv->hv_lock);
        if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
                goto out_unlock;
 
@@ -1201,45 +1203,19 @@ out_unlock:
        mutex_unlock(&hv->hv_lock);
 }
 
-void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
+void kvm_hv_request_tsc_page_update(struct kvm *kvm)
 {
        struct kvm_hv *hv = to_kvm_hv(kvm);
-       u64 gfn;
-       int idx;
-
-       if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
-           hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
-           tsc_page_update_unsafe(hv))
-               return;
 
        mutex_lock(&hv->hv_lock);
 
-       if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-               goto out_unlock;
-
-       /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
-       if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
-               hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
+       if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET &&
+           !tsc_page_update_unsafe(hv))
+               hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
 
-       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-
-       hv->tsc_ref.tsc_sequence = 0;
-
-       /*
-        * Take the srcu lock as memslots will be accessed to check the gfn
-        * cache generation against the memslots generation.
-        */
-       idx = srcu_read_lock(&kvm->srcu);
-       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
-                           &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
-               hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
-       srcu_read_unlock(&kvm->srcu, idx);
-
-out_unlock:
        mutex_unlock(&hv->hv_lock);
 }
 
-
 static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 {
        if (!hv_vcpu->enforce_cpuid)
@@ -1938,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
        struct hv_send_ipi_ex send_ipi_ex;
        struct hv_send_ipi send_ipi;
        DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
-       unsigned long valid_bank_mask;
+       u64 valid_bank_mask;
        u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
        u32 vector;
        bool all_cpus;
@@ -1980,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
                valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
                all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
 
-               if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64))
+               if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64))
                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
                if (all_cpus)
index e19c00ee9ab33c14974af99d4a65db9432d0a2a1..da2737f2a956c2ab054af924b83012e4960997f6 100644 (file)
@@ -137,7 +137,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 
 void kvm_hv_setup_tsc_page(struct kvm *kvm,
                           struct pvclock_vcpu_time_info *hv_clock);
-void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
+void kvm_hv_request_tsc_page_update(struct kvm *kvm);
 
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
index e6cae6f226838c02919e60ff2d35719d90165c42..a335e7f1f69e66881c271ed3d8c48d643e1734cf 100644 (file)
@@ -65,6 +65,30 @@ static __always_inline u64 rsvd_bits(int s, int e)
        return ((2ULL << (e - s)) - 1) << s;
 }
 
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+extern u8 __read_mostly shadow_phys_bits;
+
+static inline gfn_t kvm_mmu_max_gfn(void)
+{
+       /*
+        * Note that this uses the host MAXPHYADDR, not the guest's.
+        * EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR;
+        * assuming KVM is running on bare metal, guest accesses beyond
+        * host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit
+        * (either EPT Violation/Misconfig or #NPF), and so KVM will never
+        * install a SPTE for such addresses.  If KVM is running as a VM
+        * itself, on the other hand, it might see a MAXPHYADDR that is less
+        * than hardware's real MAXPHYADDR.  Using the host MAXPHYADDR
+        * disallows such SPTEs entirely and simplifies the TDP MMU.
+        */
+       int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
+
+       return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
+}
+
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
 void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
 
index 8f19ea752704282672674e12777c20173b3b3ecd..45e1573f8f1d3e0497c14f063c6d431dd255eadf 100644 (file)
@@ -473,30 +473,6 @@ retry:
 }
 #endif
 
-static bool spte_has_volatile_bits(u64 spte)
-{
-       if (!is_shadow_present_pte(spte))
-               return false;
-
-       /*
-        * Always atomically update spte if it can be updated
-        * out of mmu-lock, it can ensure dirty bit is not lost,
-        * also, it can help us to get a stable is_writable_pte()
-        * to ensure tlb flush is not missed.
-        */
-       if (spte_can_locklessly_be_made_writable(spte) ||
-           is_access_track_spte(spte))
-               return true;
-
-       if (spte_ad_enabled(spte)) {
-               if ((spte & shadow_accessed_mask) == 0 ||
-                   (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
-                       return true;
-       }
-
-       return false;
-}
-
 /* Rules for using mmu_spte_set:
  * Set the sptep from nonpresent to present.
  * Note: the sptep being assigned *must* be either not present
@@ -557,7 +533,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
         * we always atomically update it, see the comments in
         * spte_has_volatile_bits().
         */
-       if (spte_can_locklessly_be_made_writable(old_spte) &&
+       if (is_mmu_writable_spte(old_spte) &&
              !is_writable_pte(new_spte))
                flush = true;
 
@@ -591,7 +567,8 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
        u64 old_spte = *sptep;
        int level = sptep_to_sp(sptep)->role.level;
 
-       if (!spte_has_volatile_bits(old_spte))
+       if (!is_shadow_present_pte(old_spte) ||
+           !spte_has_volatile_bits(old_spte))
                __update_clear_spte_fast(sptep, 0ull);
        else
                old_spte = __update_clear_spte_slow(sptep, 0ull);
@@ -1187,7 +1164,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
        u64 spte = *sptep;
 
        if (!is_writable_pte(spte) &&
-             !(pt_protect && spte_can_locklessly_be_made_writable(spte)))
+           !(pt_protect && is_mmu_writable_spte(spte)))
                return false;
 
        rmap_printk("spte %p %llx\n", sptep, *sptep);
@@ -2804,8 +2781,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
                                  const struct kvm_memory_slot *slot)
 {
        unsigned long hva;
-       pte_t *pte;
-       int level;
+       unsigned long flags;
+       int level = PG_LEVEL_4K;
+       pgd_t pgd;
+       p4d_t p4d;
+       pud_t pud;
+       pmd_t pmd;
 
        if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
                return PG_LEVEL_4K;
@@ -2820,10 +2801,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
         */
        hva = __gfn_to_hva_memslot(slot, gfn);
 
-       pte = lookup_address_in_mm(kvm->mm, hva, &level);
-       if (unlikely(!pte))
-               return PG_LEVEL_4K;
+       /*
+        * Lookup the mapping level in the current mm.  The information
+        * may become stale soon, but it is safe to use as long as
+        * 1) mmu_notifier_retry was checked after taking mmu_lock, and
+        * 2) mmu_lock is taken now.
+        *
+        * We still need to disable IRQs to prevent concurrent tear down
+        * of page tables.
+        */
+       local_irq_save(flags);
+
+       pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
+       if (pgd_none(pgd))
+               goto out;
 
+       p4d = READ_ONCE(*p4d_offset(&pgd, hva));
+       if (p4d_none(p4d) || !p4d_present(p4d))
+               goto out;
+
+       pud = READ_ONCE(*pud_offset(&p4d, hva));
+       if (pud_none(pud) || !pud_present(pud))
+               goto out;
+
+       if (pud_large(pud)) {
+               level = PG_LEVEL_1G;
+               goto out;
+       }
+
+       pmd = READ_ONCE(*pmd_offset(&pud, hva));
+       if (pmd_none(pmd) || !pmd_present(pmd))
+               goto out;
+
+       if (pmd_large(pmd))
+               level = PG_LEVEL_2M;
+
+out:
+       local_irq_restore(flags);
        return level;
 }
 
@@ -2992,9 +3006,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
                /*
                 * If MMIO caching is disabled, emulate immediately without
                 * touching the shadow page tables as attempting to install an
-                * MMIO SPTE will just be an expensive nop.
+                * MMIO SPTE will just be an expensive nop.  Do not cache MMIO
+                * whose gfn is greater than host.MAXPHYADDR, any guest that
+                * generates such gfns is running nested and is being tricked
+                * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
+                * and only if L1's MAXPHYADDR is inaccurate with respect to
+                * the hardware's).
                 */
-               if (unlikely(!shadow_mmio_value)) {
+               if (unlikely(!shadow_mmio_value) ||
+                   unlikely(fault->gfn > kvm_mmu_max_gfn())) {
                        *ret_val = RET_PF_EMULATE;
                        return true;
                }
@@ -3153,8 +3173,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                 * be removed in the fast path only if the SPTE was
                 * write-protected for dirty-logging or access tracking.
                 */
-               if (fault->write &&
-                   spte_can_locklessly_be_made_writable(spte)) {
+               if (fault->write && is_mmu_writable_spte(spte)) {
                        new_spte |= PT_WRITABLE_MASK;
 
                        /*
@@ -5451,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
        uint i;
 
        if (pcid == kvm_get_active_pcid(vcpu)) {
-               mmu->invlpg(vcpu, gva, mmu->root.hpa);
+               if (mmu->invlpg)
+                       mmu->invlpg(vcpu, gva, mmu->root.hpa);
                tlb_flush = true;
        }
 
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
                if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
                    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
-                       mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+                       if (mmu->invlpg)
+                               mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
                        tlb_flush = true;
                }
        }
@@ -5646,6 +5667,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        int nr_zapped, batch = 0;
+       bool unstable;
 
 restart:
        list_for_each_entry_safe_reverse(sp, node,
@@ -5677,11 +5699,12 @@ restart:
                        goto restart;
                }
 
-               if (__kvm_mmu_prepare_zap_page(kvm, sp,
-                               &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
-                       batch += nr_zapped;
+               unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
+                               &kvm->arch.zapped_obsolete_pages, &nr_zapped);
+               batch += nr_zapped;
+
+               if (unstable)
                        goto restart;
-               }
        }
 
        /*
@@ -6237,12 +6260,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
        return 0;
 }
 
-int kvm_mmu_module_init(void)
+/*
+ * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
+ * its default value of -1 is technically undefined behavior for a boolean.
+ */
+void kvm_mmu_x86_module_init(void)
 {
-       int ret = -ENOMEM;
-
        if (nx_huge_pages == -1)
                __set_nx_huge_pages(get_nx_auto_mode());
+}
+
+/*
+ * The bulk of the MMU initialization is deferred until the vendor module is
+ * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need
+ * to be reset when a potentially different vendor module is loaded.
+ */
+int kvm_mmu_vendor_module_init(void)
+{
+       int ret = -ENOMEM;
 
        /*
         * MMU roles use union aliasing which is, generally speaking, an
@@ -6290,7 +6325,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
        mmu_free_memory_caches(vcpu);
 }
 
-void kvm_mmu_module_exit(void)
+void kvm_mmu_vendor_module_exit(void)
 {
        mmu_destroy_caches();
        percpu_counter_destroy(&kvm_total_used_mmu_pages);
index 4739b53c9734d73b2a1f34eba41acededc29f910..e5c0b6db6f2ca33b238cc22b5a093e0c886f1604 100644 (file)
@@ -90,6 +90,34 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
                                     E820_TYPE_RAM);
 }
 
+/*
+ * Returns true if the SPTE has bits that may be set without holding mmu_lock.
+ * The caller is responsible for checking if the SPTE is shadow-present, and
+ * for determining whether or not the caller cares about non-leaf SPTEs.
+ */
+bool spte_has_volatile_bits(u64 spte)
+{
+       /*
+        * Always atomically update spte if it can be updated
+        * out of mmu-lock, it can ensure dirty bit is not lost,
+        * also, it can help us to get a stable is_writable_pte()
+        * to ensure tlb flush is not missed.
+        */
+       if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
+               return true;
+
+       if (is_access_track_spte(spte))
+               return true;
+
+       if (spte_ad_enabled(spte)) {
+               if (!(spte & shadow_accessed_mask) ||
+                   (is_writable_pte(spte) && !(spte & shadow_dirty_mask)))
+                       return true;
+       }
+
+       return false;
+}
+
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
               const struct kvm_memory_slot *slot,
               unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
index 73f12615416f5c39e97eb7639b84d97cf3627fa7..80ab0f5cff01b5fec271c28eeb1a5f97afa5793a 100644 (file)
@@ -201,12 +201,6 @@ static inline bool is_removed_spte(u64 spte)
  */
 extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
 
-/*
- * The number of non-reserved physical address bits irrespective of features
- * that repurpose legal bits, e.g. MKTME.
- */
-extern u8 __read_mostly shadow_phys_bits;
-
 static inline bool is_mmio_spte(u64 spte)
 {
        return (spte & shadow_mmio_mask) == shadow_mmio_value &&
@@ -396,7 +390,7 @@ static inline void check_spte_writable_invariants(u64 spte)
                          "kvm: Writable SPTE is not MMU-writable: %llx", spte);
 }
 
-static inline bool spte_can_locklessly_be_made_writable(u64 spte)
+static inline bool is_mmu_writable_spte(u64 spte)
 {
        return spte & shadow_mmu_writable_mask;
 }
@@ -410,6 +404,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
        return gen;
 }
 
+bool spte_has_volatile_bits(u64 spte);
+
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
               const struct kvm_memory_slot *slot,
               unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
index b1eaf6ec0e0b1fe8c859a44f67f047a01db6225f..f0af385c56e035e74ce9e0d01ed2f0d2ce4c5c20 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/kvm_host.h>
 
 #include "mmu.h"
+#include "spte.h"
 
 /*
  * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
@@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
 {
        return READ_ONCE(*rcu_dereference(sptep));
 }
-static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val)
+
+static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
+{
+       return xchg(rcu_dereference(sptep), new_spte);
+}
+
+static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
+{
+       WRITE_ONCE(*rcu_dereference(sptep), new_spte);
+}
+
+static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
+                                        u64 new_spte, int level)
 {
-       WRITE_ONCE(*rcu_dereference(sptep), val);
+       /*
+        * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
+        * volatile bits, i.e. has bits that can be set outside of mmu_lock.
+        * The Writable bit can be set by KVM's fast page fault handler, and
+        * Accessed and Dirty bits can be set by the CPU.
+        *
+        * Note, non-leaf SPTEs do have Accessed bits and those bits are
+        * technically volatile, but KVM doesn't consume the Accessed bit of
+        * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit.  This
+        * logic needs to be reassessed if KVM were to use non-leaf Accessed
+        * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
+        */
+       if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
+           spte_has_volatile_bits(old_spte))
+               return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
+
+       __kvm_tdp_mmu_write_spte(sptep, new_spte);
+       return old_spte;
 }
 
 /*
index d71d177ae6b8716e98a2a74f91c5e5a0d85231b1..922b06bf4b94885de0eac5e847f02d16921ebe0d 100644 (file)
@@ -51,7 +51,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
        if (!kvm->arch.tdp_mmu_enabled)
                return;
 
-       flush_workqueue(kvm->arch.tdp_mmu_zap_wq);
+       /* Also waits for any queued work items.  */
        destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
 
        WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
@@ -426,9 +426,9 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
        tdp_mmu_unlink_sp(kvm, sp, shared);
 
        for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-               u64 *sptep = rcu_dereference(pt) + i;
+               tdp_ptep_t sptep = pt + i;
                gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
-               u64 old_child_spte;
+               u64 old_spte;
 
                if (shared) {
                        /*
@@ -440,8 +440,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
                         * value to the removed SPTE value.
                         */
                        for (;;) {
-                               old_child_spte = xchg(sptep, REMOVED_SPTE);
-                               if (!is_removed_spte(old_child_spte))
+                               old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, REMOVED_SPTE);
+                               if (!is_removed_spte(old_spte))
                                        break;
                                cpu_relax();
                        }
@@ -455,23 +455,43 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
                         * are guarded by the memslots generation, not by being
                         * unreachable.
                         */
-                       old_child_spte = READ_ONCE(*sptep);
-                       if (!is_shadow_present_pte(old_child_spte))
+                       old_spte = kvm_tdp_mmu_read_spte(sptep);
+                       if (!is_shadow_present_pte(old_spte))
                                continue;
 
                        /*
-                        * Marking the SPTE as a removed SPTE is not
-                        * strictly necessary here as the MMU lock will
-                        * stop other threads from concurrently modifying
-                        * this SPTE. Using the removed SPTE value keeps
-                        * the two branches consistent and simplifies
-                        * the function.
+                        * Use the common helper instead of a raw WRITE_ONCE as
+                        * the SPTE needs to be updated atomically if it can be
+                        * modified by a different vCPU outside of mmu_lock.
+                        * Even though the parent SPTE is !PRESENT, the TLB
+                        * hasn't yet been flushed, and both Intel and AMD
+                        * document that A/D assists can use upper-level PxE
+                        * entries that are cached in the TLB, i.e. the CPU can
+                        * still access the page and mark it dirty.
+                        *
+                        * No retry is needed in the atomic update path as the
+                        * sole concern is dropping a Dirty bit, i.e. no other
+                        * task can zap/remove the SPTE as mmu_lock is held for
+                        * write.  Marking the SPTE as a removed SPTE is not
+                        * strictly necessary for the same reason, but using
+                        * the remove SPTE value keeps the shared/exclusive
+                        * paths consistent and allows the handle_changed_spte()
+                        * call below to hardcode the new value to REMOVED_SPTE.
+                        *
+                        * Note, even though dropping a Dirty bit is the only
+                        * scenario where a non-atomic update could result in a
+                        * functional bug, simply checking the Dirty bit isn't
+                        * sufficient as a fast page fault could read the upper
+                        * level SPTE before it is zapped, and then make this
+                        * target SPTE writable, resume the guest, and set the
+                        * Dirty bit between reading the SPTE above and writing
+                        * it here.
                         */
-                       WRITE_ONCE(*sptep, REMOVED_SPTE);
+                       old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte,
+                                                         REMOVED_SPTE, level);
                }
                handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
-                                   old_child_spte, REMOVED_SPTE, level,
-                                   shared);
+                                   old_spte, REMOVED_SPTE, level, shared);
        }
 
        call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@@ -667,14 +687,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
                                           KVM_PAGES_PER_HPAGE(iter->level));
 
        /*
-        * No other thread can overwrite the removed SPTE as they
-        * must either wait on the MMU lock or use
-        * tdp_mmu_set_spte_atomic which will not overwrite the
-        * special removed SPTE value. No bookkeeping is needed
-        * here since the SPTE is going from non-present
-        * to non-present.
+        * No other thread can overwrite the removed SPTE as they must either
+        * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
+        * overwrite the special removed SPTE value. No bookkeeping is needed
+        * here since the SPTE is going from non-present to non-present.  Use
+        * the raw write helper to avoid an unnecessary check on volatile bits.
         */
-       kvm_tdp_mmu_write_spte(iter->sptep, 0);
+       __kvm_tdp_mmu_write_spte(iter->sptep, 0);
 
        return 0;
 }
@@ -699,10 +718,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
  *                   unless performing certain dirty logging operations.
  *                   Leaving record_dirty_log unset in that case prevents page
  *                   writes from being double counted.
+ *
+ * Returns the old SPTE value, which _may_ be different than @old_spte if the
+ * SPTE had voldatile bits.
  */
-static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
-                              u64 old_spte, u64 new_spte, gfn_t gfn, int level,
-                              bool record_acc_track, bool record_dirty_log)
+static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
+                             u64 old_spte, u64 new_spte, gfn_t gfn, int level,
+                             bool record_acc_track, bool record_dirty_log)
 {
        lockdep_assert_held_write(&kvm->mmu_lock);
 
@@ -715,7 +737,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
         */
        WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
 
-       kvm_tdp_mmu_write_spte(sptep, new_spte);
+       old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
 
        __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
 
@@ -724,6 +746,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
        if (record_dirty_log)
                handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
                                              new_spte, level);
+       return old_spte;
 }
 
 static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
@@ -732,9 +755,10 @@ static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 {
        WARN_ON_ONCE(iter->yielded);
 
-       __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, iter->old_spte,
-                          new_spte, iter->gfn, iter->level,
-                          record_acc_track, record_dirty_log);
+       iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
+                                           iter->old_spte, new_spte,
+                                           iter->gfn, iter->level,
+                                           record_acc_track, record_dirty_log);
 }
 
 static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
@@ -815,14 +839,15 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
        return iter->yielded;
 }
 
-static inline gfn_t tdp_mmu_max_gfn_host(void)
+static inline gfn_t tdp_mmu_max_gfn_exclusive(void)
 {
        /*
-        * Bound TDP MMU walks at host.MAXPHYADDR, guest accesses beyond that
-        * will hit a #PF(RSVD) and never hit an EPT Violation/Misconfig / #NPF,
-        * and so KVM will never install a SPTE for such addresses.
+        * Bound TDP MMU walks at host.MAXPHYADDR.  KVM disallows memslots with
+        * a gpa range that would exceed the max gfn, and KVM does not create
+        * MMIO SPTEs for "impossible" gfns, instead sending such accesses down
+        * the slow emulation path every time.
         */
-       return 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+       return kvm_mmu_max_gfn() + 1;
 }
 
 static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
@@ -830,7 +855,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
 {
        struct tdp_iter iter;
 
-       gfn_t end = tdp_mmu_max_gfn_host();
+       gfn_t end = tdp_mmu_max_gfn_exclusive();
        gfn_t start = 0;
 
        for_each_tdp_pte_min_level(iter, root, zap_level, start, end) {
@@ -923,7 +948,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
 {
        struct tdp_iter iter;
 
-       end = min(end, tdp_mmu_max_gfn_host());
+       end = min(end, tdp_mmu_max_gfn_exclusive());
 
        lockdep_assert_held_write(&kvm->mmu_lock);
 
index eca39f56c23153556c094104b59ea52a1b1e0d91..0604bc29f0b8c947fdeb3291c3f0ce033271064c 100644 (file)
@@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
        return true;
 }
 
-static int cmp_u64(const void *a, const void *b)
+static int cmp_u64(const void *pa, const void *pb)
 {
-       return *(__u64 *)a - *(__u64 *)b;
+       u64 a = *(u64 *)pa;
+       u64 b = *(u64 *)pb;
+
+       return (a > b) - (a < b);
 }
 
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
index 9e66fba1d6a37da38caf441db963b016ca8f37c7..22992b049d380f55f36660e5d866943f8856fddc 100644 (file)
@@ -138,6 +138,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
        return sample_period;
 }
 
+static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
+{
+       if (!pmc->perf_event || pmc->is_paused)
+               return;
+
+       perf_event_period(pmc->perf_event,
+                         get_sample_period(pmc, pmc->counter));
+}
+
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
index a1cf9c31273b7c9176ea0f243a91d3725b28d4f3..421619540ff9d972ee7a441d7404f61fd5e957ba 100644 (file)
@@ -837,7 +837,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
                          BIT(APICV_INHIBIT_REASON_X2APIC) |
-                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+                         BIT(APICV_INHIBIT_REASON_SEV);
 
        return supported & BIT(reason);
 }
index 24eb935b6f85c309fe0adafc600a4be098e02d16..16a5ebb420cfe428fd451c086f7e72be30a2ef9a 100644 (file)
@@ -45,6 +45,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
        [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
 };
 
+/* duplicated from amd_f17h_perfmon_event_map. */
+static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = {
+       [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+       [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+       [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES },
+       [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES },
+       [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+       [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+       [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+       [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+
+/* amd_pmc_perf_hw_id depends on these being the same size */
+static_assert(ARRAY_SIZE(amd_event_mapping) ==
+            ARRAY_SIZE(amd_f17h_event_mapping));
+
 static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
 {
        struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
@@ -140,6 +156,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
 
 static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
 {
+       struct kvm_event_hw_type_mapping *event_mapping;
        u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
        u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
        int i;
@@ -148,15 +165,20 @@ static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
        if (WARN_ON(pmc_is_fixed(pmc)))
                return PERF_COUNT_HW_MAX;
 
+       if (guest_cpuid_family(pmc->vcpu) >= 0x17)
+               event_mapping = amd_f17h_event_mapping;
+       else
+               event_mapping = amd_event_mapping;
+
        for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
-               if (amd_event_mapping[i].eventsel == event_select
-                   && amd_event_mapping[i].unit_mask == unit_mask)
+               if (event_mapping[i].eventsel == event_select
+                   && event_mapping[i].unit_mask == unit_mask)
                        break;
 
        if (i == ARRAY_SIZE(amd_event_mapping))
                return PERF_COUNT_HW_MAX;
 
-       return amd_event_mapping[i].event_type;
+       return event_mapping[i].event_type;
 }
 
 /* check if a PMC is enabled by comparing it against global_ctrl bits. Because
@@ -257,6 +279,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
        if (pmc) {
                pmc->counter += data - pmc_read_counter(pmc);
+               pmc_update_sample_period(pmc);
                return 0;
        }
        /* MSR_EVNTSELn */
index 75fa6dd268f056644446ef5db4c596658d08097b..636c77ef55fc3be26d9b6e2cd8b19f5cfecbc4cc 100644 (file)
@@ -260,6 +260,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
        INIT_LIST_HEAD(&sev->regions_list);
        INIT_LIST_HEAD(&sev->mirror_vms);
 
+       kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV);
+
        return 0;
 
 e_free:
@@ -465,6 +467,7 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
                page_virtual = kmap_atomic(pages[i]);
                clflush_cache_range(page_virtual, PAGE_SIZE);
                kunmap_atomic(page_virtual);
+               cond_resched();
        }
 }
 
@@ -559,12 +562,20 @@ e_unpin:
 
 static int sev_es_sync_vmsa(struct vcpu_svm *svm)
 {
-       struct vmcb_save_area *save = &svm->vmcb->save;
+       struct sev_es_save_area *save = svm->sev_es.vmsa;
 
        /* Check some debug related fields before encrypting the VMSA */
-       if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+       if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
                return -EINVAL;
 
+       /*
+        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+        * the traditional VMSA that is part of the VMCB. Copy the
+        * traditional VMSA as it has been built so far (in prep
+        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+        */
+       memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
+
        /* Sync registgers */
        save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
        save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
@@ -592,14 +603,6 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
        save->xss  = svm->vcpu.arch.ia32_xss;
        save->dr6  = svm->vcpu.arch.dr6;
 
-       /*
-        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
-        * the traditional VMSA that is part of the VMCB. Copy the
-        * traditional VMSA as it has been built so far (in prep
-        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
-        */
-       memcpy(svm->sev_es.vmsa, save, sizeof(*save));
-
        return 0;
 }
 
@@ -1591,24 +1594,51 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
        atomic_set_release(&src_sev->migration_in_progress, 0);
 }
 
+/* vCPU mutex subclasses.  */
+enum sev_migration_role {
+       SEV_MIGRATION_SOURCE = 0,
+       SEV_MIGRATION_TARGET,
+       SEV_NR_MIGRATION_ROLES,
+};
 
-static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+static int sev_lock_vcpus_for_migration(struct kvm *kvm,
+                                       enum sev_migration_role role)
 {
        struct kvm_vcpu *vcpu;
        unsigned long i, j;
+       bool first = true;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (mutex_lock_killable(&vcpu->mutex))
+               if (mutex_lock_killable_nested(&vcpu->mutex, role))
                        goto out_unlock;
+
+               if (first) {
+                       /*
+                        * Reset the role to one that avoids colliding with
+                        * the role used for the first vcpu mutex.
+                        */
+                       role = SEV_NR_MIGRATION_ROLES;
+                       first = false;
+               } else {
+                       mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
+               }
        }
 
        return 0;
 
 out_unlock:
+
+       first = true;
        kvm_for_each_vcpu(j, vcpu, kvm) {
                if (i == j)
                        break;
 
+               if (first)
+                       first = false;
+               else
+                       mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
+
+
                mutex_unlock(&vcpu->mutex);
        }
        return -EINTR;
@@ -1618,8 +1648,15 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
        unsigned long i;
+       bool first = true;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (first)
+                       first = false;
+               else
+                       mutex_acquire(&vcpu->mutex.dep_map,
+                                     SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
+
                mutex_unlock(&vcpu->mutex);
        }
 }
@@ -1745,10 +1782,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
                charged = true;
        }
 
-       ret = sev_lock_vcpus_for_migration(kvm);
+       ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
        if (ret)
                goto out_dst_cgroup;
-       ret = sev_lock_vcpus_for_migration(source_kvm);
+       ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
        if (ret)
                goto out_dst_vcpu;
 
@@ -2223,51 +2260,47 @@ int sev_cpu_init(struct svm_cpu_data *sd)
  * Pages used by hardware to hold guest encrypted state must be flushed before
  * returning them to the system.
  */
-static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
-                                  unsigned long len)
+static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
 {
+       int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+
        /*
-        * If hardware enforced cache coherency for encrypted mappings of the
-        * same physical page is supported, nothing to do.
+        * Note!  The address must be a kernel address, as regular page walk
+        * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
+        * address is non-deterministic and unsafe.  This function deliberately
+        * takes a pointer to deter passing in a user address.
         */
-       if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
-               return;
+       unsigned long addr = (unsigned long)va;
 
        /*
-        * If the VM Page Flush MSR is supported, use it to flush the page
-        * (using the page virtual address and the guest ASID).
+        * If CPU enforced cache coherency for encrypted mappings of the
+        * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
+        * flush is still needed in order to work properly with DMA devices.
         */
-       if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
-               struct kvm_sev_info *sev;
-               unsigned long va_start;
-               u64 start, stop;
-
-               /* Align start and stop to page boundaries. */
-               va_start = (unsigned long)va;
-               start = (u64)va_start & PAGE_MASK;
-               stop = PAGE_ALIGN((u64)va_start + len);
-
-               if (start < stop) {
-                       sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+       if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
+               clflush_cache_range(va, PAGE_SIZE);
+               return;
+       }
 
-                       while (start < stop) {
-                               wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
-                                      start | sev->asid);
+       /*
+        * VM Page Flush takes a host virtual address and a guest ASID.  Fall
+        * back to WBINVD if this faults so as not to make any problems worse
+        * by leaving stale encrypted data in the cache.
+        */
+       if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
+               goto do_wbinvd;
 
-                               start += PAGE_SIZE;
-                       }
+       return;
 
-                       return;
-               }
+do_wbinvd:
+       wbinvd_on_all_cpus();
+}
 
-               WARN(1, "Address overflow, using WBINVD\n");
-       }
+void sev_guest_memory_reclaimed(struct kvm *kvm)
+{
+       if (!sev_guest(kvm))
+               return;
 
-       /*
-        * Hardware should always have one of the above features,
-        * but if not, use WBINVD and issue a warning.
-        */
-       WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
        wbinvd_on_all_cpus();
 }
 
@@ -2281,7 +2314,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
        svm = to_svm(vcpu);
 
        if (vcpu->arch.guest_state_protected)
-               sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+               sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
+
        __free_page(virt_to_page(svm->sev_es.vmsa));
 
        if (svm->sev_es.ghcb_sa_free)
@@ -2932,7 +2966,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
                                            sev_enc_bit));
 }
 
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
 {
        /*
         * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
index bd4c64b362d24a06c9a2e48de0de9803d700c47f..17d334ef54308229fe64141f3bff7562181af66f 100644 (file)
@@ -1270,8 +1270,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         */
        vmsave(__sme_page_pa(sd->save_area));
        if (sev_es_guest(vcpu->kvm)) {
-               struct vmcb_save_area *hostsa;
-               hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+               struct sev_es_save_area *hostsa;
+               hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
 
                sev_es_prepare_switch_to_guest(hostsa);
        }
@@ -3117,8 +3117,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "tr:",
               save01->tr.selector, save01->tr.attrib,
               save01->tr.limit, save01->tr.base);
-       pr_err("cpl:            %d                efer:         %016llx\n",
-               save->cpl, save->efer);
+       pr_err("vmpl: %d   cpl:  %d               efer:          %016llx\n",
+              save->vmpl, save->cpl, save->efer);
        pr_err("%-15s %016llx %-13s %016llx\n",
               "cr0:", save->cr0, "cr2:", save->cr2);
        pr_err("%-15s %016llx %-13s %016llx\n",
@@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .mem_enc_ioctl = sev_mem_enc_ioctl,
        .mem_enc_register_region = sev_mem_enc_register_region,
        .mem_enc_unregister_region = sev_mem_enc_unregister_region,
+       .guest_memory_reclaimed = sev_guest_memory_reclaimed,
 
        .vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
        .vm_move_enc_context_from = sev_vm_move_enc_context_from,
index f77a7d2d39dd6dfe30b94b5cca2665435f378a00..2d83845b9032fe858f60ba026d20fccc399f7526 100644 (file)
@@ -181,7 +181,7 @@ struct svm_nested_state {
 
 struct vcpu_sev_es_state {
        /* SEV-ES support */
-       struct vmcb_save_area *vmsa;
+       struct sev_es_save_area *vmsa;
        struct ghcb *ghcb;
        struct kvm_host_map ghcb_map;
        bool received_first_sipi;
@@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
                                  struct kvm_enc_region *range);
 int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
 int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
+void sev_guest_memory_reclaimed(struct kvm *kvm);
+
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_set_cpu_caps(void);
 void __init sev_hardware_setup(void);
@@ -620,7 +622,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
 void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_vcpu_reset(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa);
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 
 /* vmenter.S */
index f18744f7ff82c9c85ecfee92ff7cc91bde33e894..856c87563883302e8ee4da0eec769fd8e0f99908 100644 (file)
@@ -4618,6 +4618,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
                kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
        }
 
+       if (vmx->nested.update_vmcs01_apicv_status) {
+               vmx->nested.update_vmcs01_apicv_status = false;
+               kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+       }
+
        if ((vm_exit_reason != -1) &&
            (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
                vmx->nested.need_vmcs12_to_shadow_sync = true;
index bc3f8512bb646d76339886342699d369dd827a5b..b82b6709d7a819090bb28106c0b70362fea5d7a1 100644 (file)
@@ -431,15 +431,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                            !(msr & MSR_PMC_FULL_WIDTH_BIT))
                                data = (s64)(s32)data;
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event && !pmc->is_paused)
-                               perf_event_period(pmc->perf_event,
-                                                 get_sample_period(pmc, data));
+                       pmc_update_sample_period(pmc);
                        return 0;
                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event && !pmc->is_paused)
-                               perf_event_period(pmc->perf_event,
-                                                 get_sample_period(pmc, data));
+                       pmc_update_sample_period(pmc);
                        return 0;
                } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
                        if (data == pmc->eventsel)
index 04d170c4b61eb48688b85d60b961d5b539509cb4..610355b9ccceb2798b6409fdb84efbd2987da113 100644 (file)
@@ -4174,6 +4174,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+       if (is_guest_mode(vcpu)) {
+               vmx->nested.update_vmcs01_apicv_status = true;
+               return;
+       }
+
        pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
        if (cpu_has_secondary_exec_ctrls()) {
                if (kvm_vcpu_apicv_active(vcpu))
@@ -5467,7 +5472,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
        return vmx->emulation_required && !vmx->rmode.vm86_active &&
-              vcpu->arch.exception.pending;
+              (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
 }
 
 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
index 9c6bfcd84008be990153a2bffa327ca3e33e996a..b98c7e96697a9a4925909a418831cdf5d5138b24 100644 (file)
@@ -183,6 +183,7 @@ struct nested_vmx {
        bool change_vmcs01_virtual_apic_mode;
        bool reload_vmcs01_apic_access_page;
        bool update_vmcs01_cpu_dirty_logging;
+       bool update_vmcs01_apicv_status;
 
        /*
         * Enlightened VMCS has been enabled. It does not mean that L1 has to
index 0c0ca599a353c0bc40d80dacaf22d83806337209..4790f0d7d40b84293ef7f6d7e5a689c645c5a352 100644 (file)
@@ -2901,7 +2901,7 @@ static void kvm_end_pvclock_update(struct kvm *kvm)
 
 static void kvm_update_masterclock(struct kvm *kvm)
 {
-       kvm_hv_invalidate_tsc_page(kvm);
+       kvm_hv_request_tsc_page_update(kvm);
        kvm_start_pvclock_update(kvm);
        pvclock_update_vm_gtod_copy(kvm);
        kvm_end_pvclock_update(kvm);
@@ -3113,8 +3113,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
                                       offsetof(struct compat_vcpu_info, time));
        if (vcpu->xen.vcpu_time_info_set)
                kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
-       if (!v->vcpu_idx)
-               kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
+       kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
        return 0;
 }
 
@@ -6241,7 +6240,7 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
        if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
                return -EINVAL;
 
-       kvm_hv_invalidate_tsc_page(kvm);
+       kvm_hv_request_tsc_page_update(kvm);
        kvm_start_pvclock_update(kvm);
        pvclock_update_vm_gtod_copy(kvm);
 
@@ -8926,7 +8925,7 @@ int kvm_arch_init(void *opaque)
        }
        kvm_nr_uret_msrs = 0;
 
-       r = kvm_mmu_module_init();
+       r = kvm_mmu_vendor_module_init();
        if (r)
                goto out_free_percpu;
 
@@ -8974,7 +8973,7 @@ void kvm_arch_exit(void)
        cancel_work_sync(&pvclock_gtod_work);
 #endif
        kvm_x86_ops.hardware_enable = NULL;
-       kvm_mmu_module_exit();
+       kvm_mmu_vendor_module_exit();
        free_percpu(user_return_msrs);
        kmem_cache_destroy(x86_emulator_cache);
 #ifdef CONFIG_KVM_XEN
@@ -9112,7 +9111,7 @@ static void kvm_apicv_init(struct kvm *kvm)
 
        if (!enable_apicv)
                set_or_clear_apicv_inhibit(inhibits,
-                                          APICV_INHIBIT_REASON_ABSENT, true);
+                                          APICV_INHIBIT_REASON_DISABLE, true);
 }
 
 static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
@@ -9890,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
                kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
 }
 
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+       static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
+}
+
 static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
        if (!lapic_in_kernel(vcpu))
@@ -10016,12 +10020,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
                        vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+                       vcpu->run->system_event.ndata = 0;
                        r = 0;
                        goto out;
                }
                if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
                        vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+                       vcpu->run->system_event.ndata = 0;
                        r = 0;
                        goto out;
                }
@@ -10098,7 +10104,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        /* Store vcpu->apicv_active before vcpu->mode.  */
        smp_store_release(&vcpu->mode, IN_GUEST_MODE);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        /*
         * 1) We should set ->mode before checking ->requests.  Please see
@@ -10129,7 +10135,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                smp_wmb();
                local_irq_enable();
                preempt_enable();
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                r = 1;
                goto cancel_injection;
        }
@@ -10255,7 +10261,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        local_irq_enable();
        preempt_enable();
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        /*
         * Profile KVM exit RIPs:
@@ -10285,7 +10291,7 @@ out:
 }
 
 /* Called within kvm->srcu read side.  */
-static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+static inline int vcpu_block(struct kvm_vcpu *vcpu)
 {
        bool hv_timer;
 
@@ -10301,12 +10307,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
                if (hv_timer)
                        kvm_lapic_switch_to_sw_timer(vcpu);
 
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
                        kvm_vcpu_halt(vcpu);
                else
                        kvm_vcpu_block(vcpu);
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                if (hv_timer)
                        kvm_lapic_switch_to_hv_timer(vcpu);
@@ -10348,7 +10354,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
 static int vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
-       struct kvm *kvm = vcpu->kvm;
 
        vcpu->arch.l1tf_flush_l1d = true;
 
@@ -10356,7 +10361,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                if (kvm_vcpu_running(vcpu)) {
                        r = vcpu_enter_guest(vcpu);
                } else {
-                       r = vcpu_block(kvm, vcpu);
+                       r = vcpu_block(vcpu);
                }
 
                if (r <= 0)
@@ -10375,9 +10380,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                }
 
                if (__xfer_to_guest_mode_work_pending()) {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        r = xfer_to_guest_mode_handle_work(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+                       kvm_vcpu_srcu_read_lock(vcpu);
                        if (r)
                                return r;
                }
@@ -10388,12 +10393,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
-       int r;
-
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-       r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       return r;
+       return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -10485,7 +10485,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
-       struct kvm *kvm = vcpu->kvm;
        int r;
 
        vcpu_load(vcpu);
@@ -10493,7 +10492,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        kvm_run->flags = 0;
        kvm_load_guest_fpu(vcpu);
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                        r = -EINTR;
@@ -10505,9 +10504,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 */
                WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
 
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_block(vcpu);
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                if (kvm_apic_accept_events(vcpu) < 0) {
                        r = 0;
@@ -10568,7 +10567,7 @@ out:
        if (kvm_run->kvm_valid_regs)
                store_regs(vcpu);
        post_kvm_run_save(vcpu);
-       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        kvm_sigset_deactivate(vcpu);
        vcpu_put(vcpu);
@@ -10986,6 +10985,9 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
        struct kvm_vcpu *vcpu;
        unsigned long i;
 
+       if (!enable_apicv)
+               return;
+
        down_write(&kvm->arch.apicv_update_lock);
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -11197,8 +11199,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
                r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
                if (r < 0)
                        goto fail_mmu_destroy;
-               if (kvm_apicv_activated(vcpu->kvm))
+
+               /*
+                * Defer evaluating inhibits until the vCPU is first run, as
+                * this vCPU will not get notified of any changes until this
+                * vCPU is visible to other vCPUs (marked online and added to
+                * the set of vCPUs).  Opportunistically mark APICv active as
+                * VMX in particularly is highly unlikely to have inhibits.
+                * Ignore the current per-VM APICv state so that vCPU creation
+                * is guaranteed to run with a deterministic value, the request
+                * will ensure the vCPU gets the correct state before VM-Entry.
+                */
+               if (enable_apicv) {
                        vcpu->arch.apicv_active = true;
+                       kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+               }
        } else
                static_branch_inc(&kvm_has_noapic_vcpu);
 
@@ -11996,8 +12011,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
-       if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
+       if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
+               if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
+                       return -EINVAL;
+
                return kvm_alloc_memslot_metadata(kvm, new);
+       }
 
        if (change == KVM_MR_FLAGS_ONLY)
                memcpy(&new->arch, &old->arch, sizeof(old->arch));
@@ -12986,3 +13005,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
+
+static int __init kvm_x86_init(void)
+{
+       kvm_mmu_x86_module_init();
+       return 0;
+}
+module_init(kvm_x86_init);
+
+static void __exit kvm_x86_exit(void)
+{
+       /*
+        * If module_init() is implemented, module_exit() must also be
+        * implemented to allow module unload.
+        */
+}
+module_exit(kvm_x86_exit);
index 8ca5ecf16dc47791be73315ee53f2a7f405484f3..9dec1b38a98fcdd756b170fdd1b3a3243640c6c3 100644 (file)
 SYM_FUNC_START(copy_user_generic_unrolled)
        ASM_STAC
        cmpl $8,%edx
-       jb 20f          /* less then 8 bytes, go to byte copy loop */
+       jb .Lcopy_user_short_string_bytes
        ALIGN_DESTINATION
        movl %edx,%ecx
        andl $63,%edx
        shrl $6,%ecx
-       jz .L_copy_short_string
+       jz copy_user_short_string
 1:     movq (%rsi),%r8
 2:     movq 1*8(%rsi),%r9
 3:     movq 2*8(%rsi),%r10
@@ -79,37 +79,11 @@ SYM_FUNC_START(copy_user_generic_unrolled)
        leaq 64(%rdi),%rdi
        decl %ecx
        jnz 1b
-.L_copy_short_string:
-       movl %edx,%ecx
-       andl $7,%edx
-       shrl $3,%ecx
-       jz 20f
-18:    movq (%rsi),%r8
-19:    movq %r8,(%rdi)
-       leaq 8(%rsi),%rsi
-       leaq 8(%rdi),%rdi
-       decl %ecx
-       jnz 18b
-20:    andl %edx,%edx
-       jz 23f
-       movl %edx,%ecx
-21:    movb (%rsi),%al
-22:    movb %al,(%rdi)
-       incq %rsi
-       incq %rdi
-       decl %ecx
-       jnz 21b
-23:    xor %eax,%eax
-       ASM_CLAC
-       RET
+       jmp copy_user_short_string
 
 30:    shll $6,%ecx
        addl %ecx,%edx
-       jmp 60f
-40:    leal (%rdx,%rcx,8),%edx
-       jmp 60f
-50:    movl %ecx,%edx
-60:    jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
+       jmp .Lcopy_user_handle_tail
 
        _ASM_EXTABLE_CPY(1b, 30b)
        _ASM_EXTABLE_CPY(2b, 30b)
@@ -127,10 +101,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
        _ASM_EXTABLE_CPY(14b, 30b)
        _ASM_EXTABLE_CPY(15b, 30b)
        _ASM_EXTABLE_CPY(16b, 30b)
-       _ASM_EXTABLE_CPY(18b, 40b)
-       _ASM_EXTABLE_CPY(19b, 40b)
-       _ASM_EXTABLE_CPY(21b, 50b)
-       _ASM_EXTABLE_CPY(22b, 50b)
 SYM_FUNC_END(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
@@ -191,7 +161,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
 SYM_FUNC_START(copy_user_enhanced_fast_string)
        ASM_STAC
        /* CPUs without FSRM should avoid rep movsb for short copies */
-       ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM
+       ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
        movl %edx,%ecx
 1:     rep movsb
        xorl %eax,%eax
@@ -243,6 +213,53 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 
 SYM_CODE_END(.Lcopy_user_handle_tail)
 
+/*
+ * Finish memcpy of less than 64 bytes.  #AC should already be set.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count (< 64)
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+SYM_CODE_START_LOCAL(copy_user_short_string)
+       movl %edx,%ecx
+       andl $7,%edx
+       shrl $3,%ecx
+       jz .Lcopy_user_short_string_bytes
+18:    movq (%rsi),%r8
+19:    movq %r8,(%rdi)
+       leaq 8(%rsi),%rsi
+       leaq 8(%rdi),%rdi
+       decl %ecx
+       jnz 18b
+.Lcopy_user_short_string_bytes:
+       andl %edx,%edx
+       jz 23f
+       movl %edx,%ecx
+21:    movb (%rsi),%al
+22:    movb %al,(%rdi)
+       incq %rsi
+       incq %rdi
+       decl %ecx
+       jnz 21b
+23:    xor %eax,%eax
+       ASM_CLAC
+       RET
+
+40:    leal (%rdx,%rcx,8),%edx
+       jmp 60f
+50:    movl %ecx,%edx          /* ecx is zerorest also */
+60:    jmp .Lcopy_user_handle_tail
+
+       _ASM_EXTABLE_CPY(18b, 40b)
+       _ASM_EXTABLE_CPY(19b, 40b)
+       _ASM_EXTABLE_CPY(21b, 50b)
+       _ASM_EXTABLE_CPY(22b, 50b)
+SYM_CODE_END(copy_user_short_string)
+
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
  * This will force destination out of cache for more performance.
index b781d324211bb7ab91b506ea3bf847a7e854d330..21104c41cba04868e54d14f93a3592b9fa2541ba 100644 (file)
@@ -342,9 +342,9 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
  */
 static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
 {
-#ifdef CONFIG_X86_64
        unsigned short sel;
 
+#ifdef CONFIG_X86_64
        switch (seg_reg_idx) {
        case INAT_SEG_REG_IGNORE:
                return 0;
@@ -402,7 +402,8 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
        case INAT_SEG_REG_FS:
                return (unsigned short)(regs->fs & 0xffff);
        case INAT_SEG_REG_GS:
-               return get_user_gs(regs);
+               savesegment(gs, sel);
+               return sel;
        case INAT_SEG_REG_IGNORE:
        default:
                return -EINVAL;
index 2b3eb8c948a3d0d303d206f013ef78f840bf693a..a58f451a7dd32e30215019ca35ffab444c8ccc43 100644 (file)
@@ -11,7 +11,7 @@
 #include <asm/msr.h>
 #include <asm/archrandom.h>
 #include <asm/e820/api.h>
-#include <asm/io.h>
+#include <asm/shared/io.h>
 
 /*
  * When built for the regular kernel, several functions need to be stubbed out
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
deleted file mode 100644 (file)
index e69de29..0000000
index ecb2049c1273f65f544cdf405b17681e05c8966e..b7dfd60243b75c65f79e39a1486163a5d7f21ca6 100644 (file)
@@ -48,6 +48,7 @@ SYM_FUNC_START(__put_user_1)
        cmp %_ASM_BX,%_ASM_CX
        jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
+       ENDBR
        ASM_STAC
 1:     movb %al,(%_ASM_CX)
        xor %ecx,%ecx
@@ -62,6 +63,7 @@ SYM_FUNC_START(__put_user_2)
        cmp %_ASM_BX,%_ASM_CX
        jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
+       ENDBR
        ASM_STAC
 2:     movw %ax,(%_ASM_CX)
        xor %ecx,%ecx
@@ -76,6 +78,7 @@ SYM_FUNC_START(__put_user_4)
        cmp %_ASM_BX,%_ASM_CX
        jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
+       ENDBR
        ASM_STAC
 3:     movl %eax,(%_ASM_CX)
        xor %ecx,%ecx
@@ -90,6 +93,7 @@ SYM_FUNC_START(__put_user_8)
        cmp %_ASM_BX,%_ASM_CX
        jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
+       ENDBR
        ASM_STAC
 4:     mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
index 5f87bab4fb8d1bb3c5a9cb60e9a297d533a64aae..b2b2366885a2b672960805bbb17a7c33a50952cf 100644 (file)
@@ -31,6 +31,7 @@
        .align RETPOLINE_THUNK_SIZE
 SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
        UNWIND_HINT_EMPTY
+       ANNOTATE_NOENDBR
 
        ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
                      __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
@@ -55,7 +56,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
 
        .align RETPOLINE_THUNK_SIZE
 SYM_CODE_START(__x86_indirect_thunk_array)
-       ANNOTATE_NOENDBR // apply_retpolines
 
 #define GEN(reg) THUNK reg
 #include <asm/GEN-for-each-reg.h>
index 0402a749f3a0ee188261cd248f5c5fdb8404817a..0ae6cf804197053e2bc1ab4f8bfc6eede4f13d84 100644 (file)
@@ -119,7 +119,7 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
 
        /* cache copy and flush to align dest */
        if (!IS_ALIGNED(dest, 8)) {
-               unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+               size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest);
 
                memcpy((void *) dest, (void *) source, len);
                clean_cache_range((void *) dest, len);
index b82ca14ba71826a247268de49e4a8d22d81b216b..4a9fd9029a53646c6ecfc14aad4d07a2e5bedb52 100644 (file)
@@ -153,7 +153,7 @@ static long pm_address(u_char FPU_modrm, u_char segment,
        switch (segment) {
        case PREFIX_GS_ - 1:
                /* user gs handling can be lazy, use special accessors */
-               addr->selector = get_user_gs(FPU_info->regs);
+               savesegment(gs, addr->selector);
                break;
        default:
                addr->selector = PM_REG_(segment);
index fe3d3061fc116a7780e28e3f604aef4aa0ea185d..d957dc15b3712890af49639cfe470208609d9f37 100644 (file)
@@ -20,13 +20,12 @@ CFLAGS_REMOVE_mem_encrypt_identity.o        = -pg
 endif
 
 obj-y                          :=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
-                                   pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+                                   pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o
 
 obj-y                          += pat/
 
 # Make sure __phys_addr has no stackprotector
 CFLAGS_physaddr.o              := -fno-stack-protector
-CFLAGS_setup_nx.o              := -fno-stack-protector
 CFLAGS_mem_encrypt_identity.o  := -fno-stack-protector
 
 CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
index 058b2f36b3a6e07f59ca02b6ccba52280694993c..b3ca7d23e4b01c7ae719e954408c234d21d89cdf 100644 (file)
@@ -154,7 +154,7 @@ int __init amd_numa_init(void)
                node_set(nodeid, numa_nodes_parsed);
        }
 
-       if (!nodes_weight(numa_nodes_parsed))
+       if (nodes_empty(numa_nodes_parsed))
                return -ENOENT;
 
        /*
index d0074c6ed31a38ba1778bc776f857a6088b943d3..fad8faa29d042d59ab9ae0f6d89d7aaee5b8a041 100644 (file)
@@ -149,7 +149,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
                unsigned char opcode;
 
                if (user_mode(regs)) {
-                       if (get_user(opcode, instr))
+                       if (get_user(opcode, (unsigned char __user *) instr))
                                break;
                } else {
                        if (get_kernel_nofault(opcode, instr))
index 96d34ebb20a9e1e09e3a9e8eaf41f464a264d87e..61d0ab154f967c0167f0e4d69f41065152009803 100644 (file)
@@ -110,7 +110,6 @@ int force_personality32;
 /*
  * noexec32=on|off
  * Control non executable heap for 32bit processes.
- * To control the stack too use noexec=off
  *
  * on  PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
  * off PROT_READ implies PROT_EXEC
@@ -902,6 +901,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end
 
 static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 {
+       const unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
+
        vmemmap_flush_unused_pmd();
 
        /*
@@ -914,8 +915,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
         * Mark with PAGE_UNUSED the unused parts of the new memmap range
         */
        if (!IS_ALIGNED(start, PMD_SIZE))
-               memset((void *)start, PAGE_UNUSED,
-                       start - ALIGN_DOWN(start, PMD_SIZE));
+               memset((void *)page, PAGE_UNUSED, start - page);
 
        /*
         * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
index 17a492c27306990c2d2d4ed3b0224152ffa2651e..1ad0228f8ceb98032b6ea4e7fac0eb23e1c31d1f 100644 (file)
@@ -242,10 +242,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
         * If the page being mapped is in memory and SEV is active then
         * make sure the memory encryption attribute is enabled in the
         * resulting mapping.
+        * In TDX guests, memory is marked private by default. If encryption
+        * is not requested (using encrypted), explicitly set decrypt
+        * attribute in all IOREMAPPED memory.
         */
        prot = PAGE_KERNEL_IO;
        if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
                prot = pgprot_encrypted(prot);
+       else
+               prot = pgprot_decrypted(prot);
 
        switch (pcm) {
        case _PAGE_CACHE_MODE_UC:
index 50d209939c66cb4950d303fb17acc861d86a4d3e..11350e2fd7366fa3b850f0a24b55eced8c3b7327 100644 (file)
@@ -42,7 +42,14 @@ bool force_dma_unencrypted(struct device *dev)
 
 static void print_mem_encrypt_feature_info(void)
 {
-       pr_info("AMD Memory Encryption Features active:");
+       pr_info("Memory Encryption Features active:");
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+               pr_cont(" Intel TDX\n");
+               return;
+       }
+
+       pr_cont(" AMD");
 
        /* Secure Memory Encryption */
        if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
@@ -62,6 +69,10 @@ static void print_mem_encrypt_feature_info(void)
        if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
                pr_cont(" SEV-ES");
 
+       /* Secure Nested Paging */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+               pr_cont(" SEV-SNP");
+
        pr_cont("\n");
 }
 
index 6169053c28541c96e50f5bd7e2276b564571b527..d3c88d9ef8d631a1a0e53c13fe0a3f8429e46324 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/cmdline.h>
+#include <asm/sev.h>
 
 #include "mm_internal.h"
 
@@ -47,6 +48,36 @@ EXPORT_SYMBOL(sme_me_mask);
 /* Buffer used for early in-place encryption by BSP, no locking needed */
 static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
 
+/*
+ * SNP-specific routine which needs to additionally change the page state from
+ * private to shared before copying the data from the source to destination and
+ * restore after the copy.
+ */
+static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
+                                    unsigned long paddr, bool decrypt)
+{
+       unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+       if (decrypt) {
+               /*
+                * @paddr needs to be accessed decrypted, mark the page shared in
+                * the RMP table before copying it.
+                */
+               early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
+
+               memcpy(dst, src, sz);
+
+               /* Restore the page state after the memcpy. */
+               early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
+       } else {
+               /*
+                * @paddr need to be accessed encrypted, no need for the page state
+                * change.
+                */
+               memcpy(dst, src, sz);
+       }
+}
+
 /*
  * This routine does not change the underlying encryption setting of the
  * page(s) that map this memory. It assumes that eventually the memory is
@@ -95,8 +126,13 @@ static void __init __sme_early_enc_dec(resource_size_t paddr,
                 * Use a temporary buffer, of cache-line multiple size, to
                 * avoid data corruption as documented in the APM.
                 */
-               memcpy(sme_early_buffer, src, len);
-               memcpy(dst, sme_early_buffer, len);
+               if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+                       snp_memcpy(sme_early_buffer, src, len, paddr, enc);
+                       snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
+               } else {
+                       memcpy(sme_early_buffer, src, len);
+                       memcpy(dst, sme_early_buffer, len);
+               }
 
                early_memunmap(dst, len);
                early_memunmap(src, len);
@@ -280,11 +316,24 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 
 static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
 {
+       /*
+        * To maintain the security guarantees of SEV-SNP guests, make sure
+        * to invalidate the memory before encryption attribute is cleared.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
+               snp_set_memory_shared(vaddr, npages);
 }
 
 /* Return true unconditionally: return value doesn't matter for the SEV side */
 static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
 {
+       /*
+        * After memory is mapped encrypted in the page table, validate it
+        * so that it is consistent with the page table updates.
+        */
+       if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
+               snp_set_memory_private(vaddr, npages);
+
        if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
                enc_dec_hypercall(vaddr, npages, enc);
 
@@ -322,14 +371,28 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
        clflush_cache_range(__va(pa), size);
 
        /* Encrypt/decrypt the contents in-place */
-       if (enc)
+       if (enc) {
                sme_early_encrypt(pa, size);
-       else
+       } else {
                sme_early_decrypt(pa, size);
 
+               /*
+                * ON SNP, the page state in the RMP table must happen
+                * before the page table updates.
+                */
+               early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+       }
+
        /* Change the page encryption mask. */
        new_pte = pfn_pte(pfn, new_prot);
        set_pte_atomic(kpte, new_pte);
+
+       /*
+        * If page is set encrypted in the page table, then update the RMP table to
+        * add this page as private.
+        */
+       if (enc)
+               early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
 }
 
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
index b43bc24d2bb6415e4fe5eb7a4bec1914d5767fb1..f415498d3175cf7f67eef6f20c72a5f247a24a2f 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/sections.h>
 #include <asm/cmdline.h>
 #include <asm/coco.h>
+#include <asm/sev.h>
 
 #include "mm_internal.h"
 
@@ -509,8 +510,11 @@ void __init sme_enable(struct boot_params *bp)
        bool active_by_default;
        unsigned long me_mask;
        char buffer[16];
+       bool snp;
        u64 msr;
 
+       snp = snp_init(bp);
+
        /* Check for the SME/SEV support leaf */
        eax = 0x80000000;
        ecx = 0;
@@ -542,6 +546,10 @@ void __init sme_enable(struct boot_params *bp)
        sev_status   = __rdmsr(MSR_AMD64_SEV);
        feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
 
+       /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
+       if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+               snp_abort();
+
        /* Check if memory encryption is enabled */
        if (feature_mask == AMD_SME_BIT) {
                /*
index 933a2ebad471bcb308e90ad9afd73ec13ddb1374..c3317f0650d81c6ff22e23022afd8fa786538823 100644 (file)
@@ -400,7 +400,7 @@ static void leave_uniprocessor(void)
        int cpu;
        int err;
 
-       if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
+       if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus))
                return;
        pr_notice("Re-enabling CPUs...\n");
        for_each_cpu(cpu, downed_cpus) {
index 1a02b791d273cb1e9981663d67131f01c211753e..9a9305367fdd16e268eadff2c9f72457fe5e7203 100644 (file)
@@ -123,7 +123,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
         * Continue to fill physical nodes with fake nodes until there is no
         * memory left on any of them.
         */
-       while (nodes_weight(physnode_mask)) {
+       while (!nodes_empty(physnode_mask)) {
                for_each_node_mask(i, physnode_mask) {
                        u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
                        u64 start, limit, end;
@@ -270,7 +270,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
         * Fill physical nodes with fake nodes of size until there is no memory
         * left on any of them.
         */
-       while (nodes_weight(physnode_mask)) {
+       while (!nodes_empty(physnode_mask)) {
                for_each_node_mask(i, physnode_mask) {
                        u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
                        u64 start, limit, end;
index abf5ed76e4b7bdb9de1d81b10243e5d35b10c148..0656db33574d390e73d2af31ab6815fad1b9922d 100644 (file)
@@ -638,17 +638,6 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
 }
 EXPORT_SYMBOL_GPL(lookup_address);
 
-/*
- * Lookup the page table entry for a virtual address in a given mm. Return a
- * pointer to the entry and the level of the mapping.
- */
-pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
-                           unsigned int *level)
-{
-       return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
-}
-EXPORT_SYMBOL_GPL(lookup_address_in_mm);
-
 static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
                                  unsigned int *level)
 {
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
deleted file mode 100644 (file)
index ed5667f..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/pgtable.h>
-
-#include <asm/proto.h>
-#include <asm/cpufeature.h>
-
-static int disable_nx;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-       if (!str)
-               return -EINVAL;
-       if (!strncmp(str, "on", 2)) {
-               disable_nx = 0;
-       } else if (!strncmp(str, "off", 3)) {
-               disable_nx = 1;
-       }
-       x86_configure_nx();
-       return 0;
-}
-early_param("noexec", noexec_setup);
-
-void x86_configure_nx(void)
-{
-       if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
-               __supported_pte_mask |= _PAGE_NX;
-       else
-               __supported_pte_mask &= ~_PAGE_NX;
-}
-
-void __init x86_report_nx(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_NX)) {
-               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "missing in CPU!\n");
-       } else {
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-               if (disable_nx) {
-                       printk(KERN_INFO "NX (Execute Disable) protection: "
-                              "disabled by kernel command line option\n");
-               } else {
-                       printk(KERN_INFO "NX (Execute Disable) protection: "
-                              "active\n");
-               }
-#else
-               /* 32bit non-PAE kernel, NX cannot be used */
-               printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "cannot be enabled: non-PAE kernel!\n");
-#endif
-       }
-}
index 6eb4d91d5365563024f44d8cb92f4df599ef1a0e..d400b6d9d246b93c5a274a579997a3054a5b2e22 100644 (file)
@@ -855,13 +855,11 @@ done:
                        nr_invalidate);
 }
 
-static bool tlb_is_not_lazy(int cpu)
+static bool tlb_is_not_lazy(int cpu, void *data)
 {
        return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
 }
 
-static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
-
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
 EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
 
@@ -890,36 +888,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
         * up on the new contents of what used to be page tables, while
         * doing a speculative memory access.
         */
-       if (info->freed_tables) {
+       if (info->freed_tables)
                on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
-       } else {
-               /*
-                * Although we could have used on_each_cpu_cond_mask(),
-                * open-coding it has performance advantages, as it eliminates
-                * the need for indirect calls or retpolines. In addition, it
-                * allows to use a designated cpumask for evaluating the
-                * condition, instead of allocating one.
-                *
-                * This code works under the assumption that there are no nested
-                * TLB flushes, an assumption that is already made in
-                * flush_tlb_mm_range().
-                *
-                * cond_cpumask is logically a stack-local variable, but it is
-                * more efficient to have it off the stack and not to allocate
-                * it on demand. Preemption is disabled and this code is
-                * non-reentrant.
-                */
-               struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
-               int cpu;
-
-               cpumask_clear(cond_cpumask);
-
-               for_each_cpu(cpu, cpumask) {
-                       if (tlb_is_not_lazy(cpu))
-                               __cpumask_set_cpu(cpu, cond_cpumask);
-               }
-               on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
-       }
+       else
+               on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
+                               (void *)info, 1, cpumask);
 }
 
 void flush_tlb_multi(const struct cpumask *cpumask,
index 8fe35ed11fd665f1d80b110a516219cdfc4de579..16b6efacf7c6770706f9ef3846a85f6c2af7b2af 100644 (file)
@@ -412,6 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
                EMIT_LFENCE();
                EMIT2(0xFF, 0xE0 + reg);
        } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+               OPTIMIZER_HIDE_VAR(reg);
                emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
        } else
 #endif
index 97b63e35e1528b116a8d5dc7a4e78598088d6357..a498b847d7403af4d0a4258e117b6cf4795b2e5f 100644 (file)
@@ -25,6 +25,8 @@
 #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
 
+#define IRT_SIGNATURE  (('$' << 0) + ('I' << 8) + ('R' << 16) + ('T' << 24))
+
 static int broken_hp_bios_irq9;
 static int acer_tm360_irqrouting;
 
@@ -68,30 +70,99 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
  *  and perform checksum verification.
  */
 
-static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
+static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr,
+                                                                u8 *limit)
 {
        struct irq_routing_table *rt;
        int i;
        u8 sum;
 
-       rt = (struct irq_routing_table *) addr;
+       rt = (struct irq_routing_table *)addr;
        if (rt->signature != PIRQ_SIGNATURE ||
            rt->version != PIRQ_VERSION ||
            rt->size % 16 ||
-           rt->size < sizeof(struct irq_routing_table))
+           rt->size < sizeof(struct irq_routing_table) ||
+           (limit && rt->size > limit - addr))
                return NULL;
        sum = 0;
        for (i = 0; i < rt->size; i++)
                sum += addr[i];
        if (!sum) {
-               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
-                       rt);
+               DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%lx\n",
+                   __pa(rt));
                return rt;
        }
        return NULL;
 }
 
+/*
+ * Handle the $IRT PCI IRQ Routing Table format used by AMI for its BCP
+ * (BIOS Configuration Program) external tool meant for tweaking BIOS
+ * structures without the need to rebuild it from sources.  The $IRT
+ * format has been invented by AMI before Microsoft has come up with its
+ * $PIR format and a $IRT table is therefore there in some systems that
+ * lack a $PIR table.
+ *
+ * It uses the same PCI BIOS 2.1 format for interrupt routing entries
+ * themselves but has a different simpler header prepended instead,
+ * occupying 8 bytes, where a `$IRT' signature is followed by one byte
+ * specifying the total number of interrupt routing entries allocated in
+ * the table, then one byte specifying the actual number of entries used
+ * (which the BCP tool can take advantage of when modifying the table),
+ * and finally a 16-bit word giving the IRQs devoted exclusively to PCI.
+ * Unlike with the $PIR table there is no alignment guarantee.
+ *
+ * Given the similarity of the two formats the $IRT one is trivial to
+ * convert to the $PIR one, which we do here, except that obviously we
+ * have no information as to the router device to use, but we can handle
+ * it by matching PCI device IDs actually seen on the bus against ones
+ * that our individual routers recognise.
+ *
+ * Reportedly there is another $IRT table format where a 16-bit word
+ * follows the header instead that points to interrupt routing entries
+ * in a $PIR table provided elsewhere.  In that case this code will not
+ * be reached though as the $PIR table will have been chosen instead.
+ */
+static inline struct irq_routing_table *pirq_convert_irt_table(u8 *addr,
+                                                              u8 *limit)
+{
+       struct irt_routing_table *ir;
+       struct irq_routing_table *rt;
+       u16 size;
+       u8 sum;
+       int i;
+
+       ir = (struct irt_routing_table *)addr;
+       if (ir->signature != IRT_SIGNATURE || !ir->used || ir->size < ir->used)
+               return NULL;
+
+       size = sizeof(*ir) + ir->used * sizeof(ir->slots[0]);
+       if (size > limit - addr)
+               return NULL;
+
+       DBG(KERN_DEBUG "PCI: $IRT Interrupt Routing Table found at 0x%lx\n",
+           __pa(ir));
 
+       size = sizeof(*rt) + ir->used * sizeof(rt->slots[0]);
+       rt = kzalloc(size, GFP_KERNEL);
+       if (!rt)
+               return NULL;
+
+       rt->signature = PIRQ_SIGNATURE;
+       rt->version = PIRQ_VERSION;
+       rt->size = size;
+       rt->exclusive_irqs = ir->exclusive_irqs;
+       for (i = 0; i < ir->used; i++)
+               rt->slots[i] = ir->slots[i];
+
+       addr = (u8 *)rt;
+       sum = 0;
+       for (i = 0; i < size; i++)
+               sum += addr[i];
+       rt->checksum = -sum;
+
+       return rt;
+}
 
 /*
  *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
@@ -99,17 +170,29 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
 
 static struct irq_routing_table * __init pirq_find_routing_table(void)
 {
+       u8 * const bios_start = (u8 *)__va(0xf0000);
+       u8 * const bios_end = (u8 *)__va(0x100000);
        u8 *addr;
        struct irq_routing_table *rt;
 
        if (pirq_table_addr) {
-               rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr));
+               rt = pirq_check_routing_table((u8 *)__va(pirq_table_addr),
+                                             NULL);
                if (rt)
                        return rt;
                printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
        }
-       for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
-               rt = pirq_check_routing_table(addr);
+       for (addr = bios_start;
+            addr < bios_end - sizeof(struct irq_routing_table);
+            addr += 16) {
+               rt = pirq_check_routing_table(addr, bios_end);
+               if (rt)
+                       return rt;
+       }
+       for (addr = bios_start;
+            addr < bios_end - sizeof(struct irt_routing_table);
+            addr++) {
+               rt = pirq_convert_irt_table(addr, bios_end);
                if (rt)
                        return rt;
        }
@@ -135,7 +218,8 @@ static void __init pirq_peer_trick(void)
 #ifdef DEBUG
                {
                        int j;
-                       DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+                       DBG(KERN_DEBUG "%02x:%02x.%x slot=%02x",
+                           e->bus, e->devfn / 8, e->devfn % 8, e->slot);
                        for (j = 0; j < 4; j++)
                                DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
                        DBG("\n");
@@ -253,6 +337,15 @@ static void write_pc_conf_nybble(u8 base, u8 index, u8 val)
        pc_conf_set(reg, x);
 }
 
+/*
+ * FinALi pirq rules are as follows:
+ *
+ * - bit 0 selects between INTx Routing Table Mapping Registers,
+ *
+ * - bit 3 selects the nibble within the INTx Routing Table Mapping Register,
+ *
+ * - bits 7:4 map to bits 3:0 of the PCI INTx Sensitivity Register.
+ */
 static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
                           int pirq)
 {
@@ -260,11 +353,13 @@ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
                0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15
        };
        unsigned long flags;
+       u8 index;
        u8 x;
 
+       index = (pirq & 1) << 1 | (pirq & 8) >> 3;
        raw_spin_lock_irqsave(&pc_conf_lock, flags);
        pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
-       x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)];
+       x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index)];
        pc_conf_set(PC_CONF_FINALI_LOCK, 0);
        raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
        return x;
@@ -278,13 +373,15 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
        };
        u8 val = irqmap[irq];
        unsigned long flags;
+       u8 index;
 
        if (!val)
                return 0;
 
+       index = (pirq & 1) << 1 | (pirq & 8) >> 3;
        raw_spin_lock_irqsave(&pc_conf_lock, flags);
        pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
-       write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val);
+       write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index, val);
        pc_conf_set(PC_CONF_FINALI_LOCK, 0);
        raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
        return 1;
@@ -293,7 +390,7 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
 static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev,
                           int pirq, int irq)
 {
-       u8 mask = ~(1u << (pirq - 1));
+       u8 mask = ~((pirq & 0xf0u) >> 4);
        unsigned long flags;
        u8 trig;
 
@@ -579,6 +676,81 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
        return 1;
 }
 
+
+/*
+ *     PIRQ routing for the SiS85C497 AT Bus Controller & Megacell (ATM)
+ *     ISA bridge used with the SiS 85C496/497 486 Green PC VESA/ISA/PCI
+ *     Chipset.
+ *
+ *     There are four PCI INTx#-to-IRQ Link registers provided in the
+ *     SiS85C497 part of the peculiar combined 85C496/497 configuration
+ *     space decoded by the SiS85C496 PCI & CPU Memory Controller (PCM)
+ *     host bridge, at 0xc0/0xc1/0xc2/0xc3 respectively for the PCI INT
+ *     A/B/C/D lines.  Bit 7 enables the respective link if set and bits
+ *     3:0 select the 8259A IRQ line as follows:
+ *
+ *     0000 : Reserved
+ *     0001 : Reserved
+ *     0010 : Reserved
+ *     0011 : IRQ3
+ *     0100 : IRQ4
+ *     0101 : IRQ5
+ *     0110 : IRQ6
+ *     0111 : IRQ7
+ *     1000 : Reserved
+ *     1001 : IRQ9
+ *     1010 : IRQ10
+ *     1011 : IRQ11
+ *     1100 : IRQ12
+ *     1101 : Reserved
+ *     1110 : IRQ14
+ *     1111 : IRQ15
+ *
+ *     We avoid using a reserved value for disabled links, hence the
+ *     choice of IRQ15 for that case.
+ *
+ *     References:
+ *
+ *     "486 Green PC VESA/ISA/PCI Chipset, SiS 85C496/497", Rev 3.0,
+ *     Silicon Integrated Systems Corp., July 1995
+ */
+
+#define PCI_SIS497_INTA_TO_IRQ_LINK    0xc0u
+
+#define PIRQ_SIS497_IRQ_MASK           0x0fu
+#define PIRQ_SIS497_IRQ_ENABLE         0x80u
+
+static int pirq_sis497_get(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq)
+{
+       int reg;
+       u8 x;
+
+       reg = pirq;
+       if (reg >= 1 && reg <= 4)
+               reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+       pci_read_config_byte(router, reg, &x);
+       return (x & PIRQ_SIS497_IRQ_ENABLE) ? (x & PIRQ_SIS497_IRQ_MASK) : 0;
+}
+
+static int pirq_sis497_set(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq, int irq)
+{
+       int reg;
+       u8 x;
+
+       reg = pirq;
+       if (reg >= 1 && reg <= 4)
+               reg += PCI_SIS497_INTA_TO_IRQ_LINK - 1;
+
+       pci_read_config_byte(router, reg, &x);
+       x &= ~(PIRQ_SIS497_IRQ_MASK | PIRQ_SIS497_IRQ_ENABLE);
+       x |= irq ? (PIRQ_SIS497_IRQ_ENABLE | irq) : PIRQ_SIS497_IRQ_MASK;
+       pci_write_config_byte(router, reg, x);
+       return 1;
+}
+
 /*
  *     PIRQ routing for SiS 85C503 router used in several SiS chipsets.
  *     We have to deal with the following issues here:
@@ -640,11 +812,12 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
  *                             bit 6-4 are probably unused, not like 5595
  */
 
-#define PIRQ_SIS_IRQ_MASK      0x0f
-#define PIRQ_SIS_IRQ_DISABLE   0x80
-#define PIRQ_SIS_USB_ENABLE    0x40
+#define PIRQ_SIS503_IRQ_MASK   0x0f
+#define PIRQ_SIS503_IRQ_DISABLE        0x80
+#define PIRQ_SIS503_USB_ENABLE 0x40
 
-static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+static int pirq_sis503_get(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq)
 {
        u8 x;
        int reg;
@@ -653,10 +826,11 @@ static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
        if (reg >= 0x01 && reg <= 0x04)
                reg += 0x40;
        pci_read_config_byte(router, reg, &x);
-       return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+       return (x & PIRQ_SIS503_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS503_IRQ_MASK);
 }
 
-static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_sis503_set(struct pci_dev *router, struct pci_dev *dev,
+                          int pirq, int irq)
 {
        u8 x;
        int reg;
@@ -665,8 +839,8 @@ static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
        if (reg >= 0x01 && reg <= 0x04)
                reg += 0x40;
        pci_read_config_byte(router, reg, &x);
-       x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
-       x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
+       x &= ~(PIRQ_SIS503_IRQ_MASK | PIRQ_SIS503_IRQ_DISABLE);
+       x |= irq ? irq : PIRQ_SIS503_IRQ_DISABLE;
        pci_write_config_byte(router, reg, x);
        return 1;
 }
@@ -958,13 +1132,19 @@ static __init int serverworks_router_probe(struct irq_router *r,
 
 static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
 {
-       if (device != PCI_DEVICE_ID_SI_503)
-               return 0;
-
-       r->name = "SIS";
-       r->get = pirq_sis_get;
-       r->set = pirq_sis_set;
-       return 1;
+       switch (device) {
+       case PCI_DEVICE_ID_SI_496:
+               r->name = "SiS85C497";
+               r->get = pirq_sis497_get;
+               r->set = pirq_sis497_set;
+               return 1;
+       case PCI_DEVICE_ID_SI_503:
+               r->name = "SiS85C503";
+               r->get = pirq_sis503_get;
+               r->set = pirq_sis503_set;
+               return 1;
+       }
+       return 0;
 }
 
 static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
@@ -1084,10 +1264,32 @@ static struct pci_dev *pirq_router_dev;
  *     chipset" ?
  */
 
+static bool __init pirq_try_router(struct irq_router *r,
+                                  struct irq_routing_table *rt,
+                                  struct pci_dev *dev)
+{
+       struct irq_router_handler *h;
+
+       DBG(KERN_DEBUG "PCI: Trying IRQ router for [%04x:%04x]\n",
+           dev->vendor, dev->device);
+
+       for (h = pirq_routers; h->vendor; h++) {
+               /* First look for a router match */
+               if (rt->rtr_vendor == h->vendor &&
+                   h->probe(r, dev, rt->rtr_device))
+                       return true;
+               /* Fall back to a device match */
+               if (dev->vendor == h->vendor &&
+                   h->probe(r, dev, dev->device))
+                       return true;
+       }
+       return false;
+}
+
 static void __init pirq_find_router(struct irq_router *r)
 {
        struct irq_routing_table *rt = pirq_table;
-       struct irq_router_handler *h;
+       struct pci_dev *dev;
 
 #ifdef CONFIG_PCI_BIOS
        if (!rt->signature) {
@@ -1106,50 +1308,94 @@ static void __init pirq_find_router(struct irq_router *r)
        DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
            rt->rtr_vendor, rt->rtr_device);
 
-       pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
-                                                     rt->rtr_devfn);
-       if (!pirq_router_dev) {
-               DBG(KERN_DEBUG "PCI: Interrupt router not found at "
-                       "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
-               return;
+       /* Use any vendor:device provided by the routing table or try all.  */
+       if (rt->rtr_vendor) {
+               dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
+                                                 rt->rtr_devfn);
+               if (dev && pirq_try_router(r, rt, dev))
+                       pirq_router_dev = dev;
+       } else {
+               dev = NULL;
+               for_each_pci_dev(dev) {
+                       if (pirq_try_router(r, rt, dev)) {
+                               pirq_router_dev = dev;
+                               break;
+                       }
+               }
        }
 
-       for (h = pirq_routers; h->vendor; h++) {
-               /* First look for a router match */
-               if (rt->rtr_vendor == h->vendor &&
-                       h->probe(r, pirq_router_dev, rt->rtr_device))
-                       break;
-               /* Fall back to a device match */
-               if (pirq_router_dev->vendor == h->vendor &&
-                       h->probe(r, pirq_router_dev, pirq_router_dev->device))
-                       break;
-       }
-       dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
-                pirq_router.name,
-                pirq_router_dev->vendor, pirq_router_dev->device);
+       if (pirq_router_dev)
+               dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
+                        pirq_router.name,
+                        pirq_router_dev->vendor, pirq_router_dev->device);
+       else
+               DBG(KERN_DEBUG "PCI: Interrupt router not found at "
+                   "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
 
        /* The device remains referenced for the kernel lifetime */
 }
 
-static struct irq_info *pirq_get_info(struct pci_dev *dev)
+/*
+ * We're supposed to match on the PCI device only and not the function,
+ * but some BIOSes build their tables with the PCI function included
+ * for motherboard devices, so if a complete match is found, then give
+ * it precedence over a slot match.
+ */
+static struct irq_info *pirq_get_dev_info(struct pci_dev *dev)
 {
        struct irq_routing_table *rt = pirq_table;
        int entries = (rt->size - sizeof(struct irq_routing_table)) /
                sizeof(struct irq_info);
+       struct irq_info *slotinfo = NULL;
        struct irq_info *info;
 
        for (info = rt->slots; entries--; info++)
-               if (info->bus == dev->bus->number &&
-                       PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
-                       return info;
-       return NULL;
+               if (info->bus == dev->bus->number) {
+                       if (info->devfn == dev->devfn)
+                               return info;
+                       if (!slotinfo &&
+                           PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+                               slotinfo = info;
+               }
+       return slotinfo;
+}
+
+/*
+ * Buses behind bridges are typically not listed in the PIRQ routing table.
+ * Do the usual dance then and walk the tree of bridges up adjusting the
+ * pin number accordingly on the way until the originating root bus device
+ * has been reached and then use its routing information.
+ */
+static struct irq_info *pirq_get_info(struct pci_dev *dev, u8 *pin)
+{
+       struct pci_dev *temp_dev = dev;
+       struct irq_info *info;
+       u8 temp_pin = *pin;
+       u8 dpin = temp_pin;
+
+       info = pirq_get_dev_info(dev);
+       while (!info && temp_dev->bus->parent) {
+               struct pci_dev *bridge = temp_dev->bus->self;
+
+               temp_pin = pci_swizzle_interrupt_pin(temp_dev, temp_pin);
+               info = pirq_get_dev_info(bridge);
+               if (info)
+                       dev_warn(&dev->dev,
+                                "using bridge %s INT %c to get INT %c\n",
+                                pci_name(bridge),
+                                'A' + temp_pin - 1, 'A' + dpin - 1);
+
+               temp_dev = bridge;
+       }
+       *pin = temp_pin;
+       return info;
 }
 
 static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 {
-       u8 pin;
        struct irq_info *info;
        int i, pirq, newirq;
+       u8 dpin, pin;
        int irq = 0;
        u32 mask;
        struct irq_router *r = &pirq_router;
@@ -1157,8 +1403,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
        char *msg = NULL;
 
        /* Find IRQ pin */
-       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-       if (!pin) {
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &dpin);
+       if (!dpin) {
                dev_dbg(&dev->dev, "no interrupt pin\n");
                return 0;
        }
@@ -1171,20 +1417,21 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
        if (!pirq_table)
                return 0;
 
-       info = pirq_get_info(dev);
+       pin = dpin;
+       info = pirq_get_info(dev, &pin);
        if (!info) {
                dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
-                       'A' + pin - 1);
+                       'A' + dpin - 1);
                return 0;
        }
        pirq = info->irq[pin - 1].link;
        mask = info->irq[pin - 1].bitmap;
        if (!pirq) {
-               dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1);
+               dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + dpin - 1);
                return 0;
        }
        dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
-               'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs);
+               'A' + dpin - 1, pirq, mask, pirq_table->exclusive_irqs);
        mask &= pcibios_irq_mask;
 
        /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -1226,7 +1473,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                                newirq = i;
                }
        }
-       dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq);
+       dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + dpin - 1, newirq);
 
        /* Check if it is hardcoded */
        if ((pirq & 0xf0) == 0xf0) {
@@ -1260,15 +1507,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                        return 0;
                }
        }
-       dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
+       dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n",
+                msg, 'A' + dpin - 1, irq);
 
        /* Update IRQ for all devices with the same pirq value */
        for_each_pci_dev(dev2) {
-               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
-               if (!pin)
+               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &dpin);
+               if (!dpin)
                        continue;
 
-               info = pirq_get_info(dev2);
+               pin = dpin;
+               info = pirq_get_info(dev2, &pin);
                if (!info)
                        continue;
                if (info->irq[pin - 1].link == pirq) {
index 9bb1e29411796344a471bb76ca6f85ee59ed6954..b94f727251b6436d3101cb6d0abe04c7fc13a660 100644 (file)
@@ -467,7 +467,6 @@ static __init void xen_setup_pci_msi(void)
                else
                        xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
                xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
-               pci_msi_ignore_mask = 1;
        } else if (xen_hvm_domain()) {
                xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
                xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
@@ -481,6 +480,11 @@ static __init void xen_setup_pci_msi(void)
         * in allocating the native domain and never use it.
         */
        x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
+       /*
+        * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+        * controlled by the hypervisor.
+        */
+       pci_msi_ignore_mask = 1;
 }
 
 #else /* CONFIG_PCI_MSI */
index 147c30a81f15b8fc0fbdb61f632ecf42f36f04ce..1591d67e0bcde3e1b6a97789e1f05ed0b6ea2a5b 100644 (file)
@@ -93,6 +93,9 @@ static const unsigned long * const efi_tables[] = {
 #ifdef CONFIG_LOAD_UEFI_KEYS
        &efi.mokvar_table,
 #endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       &efi.coco_secret,
+#endif
 };
 
 u64 efi_setup;         /* efi setup_data physical address */
index 72c1e42d121df59d8454f84b6ddeb113f5740931..7fe564eaf228aaa7995071f6da18cd8a16617533 100644 (file)
@@ -50,6 +50,7 @@
 #define PVH_DS_SEL             (PVH_GDT_ENTRY_DS * 8)
 
 SYM_CODE_START_LOCAL(pvh_start_xen)
+       UNWIND_HINT_EMPTY
        cld
 
        lgdt (_pa(gdt))
index 1e9ff28bc2e04c270ab594fa82f11dbc3590bc0d..a60af0230e27b32526ade6f5b3906e81d19327cd 100644 (file)
@@ -244,8 +244,10 @@ static inline bool uv_nmi_action_is(const char *action)
 /* Setup which NMI support is present in system */
 static void uv_nmi_setup_mmrs(void)
 {
+       bool new_nmi_method_only = false;
+
        /* First determine arch specific MMRs to handshake with BIOS */
-       if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+       if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {      /* UV2,3,4 setup */
                uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
                uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
                uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
@@ -255,26 +257,25 @@ static void uv_nmi_setup_mmrs(void)
                uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
                uvh_nmi_mmrx_req_shift = 62;
 
-       } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+       } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) { /* UV5+ setup */
                uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
                uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
                uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
                uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
 
-               uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
-               uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
-               uvh_nmi_mmrx_req_shift = 62;
+               new_nmi_method_only = true;             /* Newer nmi always valid on UV5+ */
+               uvh_nmi_mmrx_req = 0;                   /* no request bit to clear */
 
        } else {
-               pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
-                       __func__);
+               pr_err("UV:%s:NMI support not available on this system\n", __func__);
                return;
        }
 
        /* Then find out if new NMI is supported */
-       if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) {
-               uv_write_local_mmr(uvh_nmi_mmrx_req,
-                                       1UL << uvh_nmi_mmrx_req_shift);
+       if (new_nmi_method_only || uv_read_local_mmr(uvh_nmi_mmrx_supported)) {
+               if (uvh_nmi_mmrx_req)
+                       uv_write_local_mmr(uvh_nmi_mmrx_req,
+                                               1UL << uvh_nmi_mmrx_req_shift);
                nmi_mmr = uvh_nmi_mmrx;
                nmi_mmr_clear = uvh_nmi_mmrx_clear;
                nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
@@ -985,7 +986,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 
        /* Clear global flags */
        if (master) {
-               if (cpumask_weight(uv_nmi_cpu_mask))
+               if (!cpumask_empty(uv_nmi_cpu_mask))
                        uv_nmi_cleanup_mask();
                atomic_set(&uv_nmi_cpus_in_nmi, -1);
                atomic_set(&uv_nmi_cpu, -1);
index 9f2b251e83c566cad8117fc5316d2c3e2ac9378a..bb176c72891c933c1d7b77a8ef848f264c838321 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/cpu.h>
 #include <asm/mmu_context.h>
 #include <asm/cpu_device_id.h>
+#include <asm/microcode.h>
 
 #ifdef CONFIG_X86_32
 __visible unsigned long saved_context_ebx;
@@ -40,7 +41,8 @@ static void msr_save_context(struct saved_context *ctxt)
        struct saved_msr *end = msr + ctxt->saved_msrs.num;
 
        while (msr < end) {
-               msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
+               if (msr->valid)
+                       rdmsrl(msr->info.msr_no, msr->info.reg.q);
                msr++;
        }
 }
@@ -261,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
        x86_platform.restore_sched_clock_state();
        mtrr_bp_restore();
        perf_restore_debug_store();
-       msr_restore_context(ctxt);
 
        c = &cpu_data(smp_processor_id());
        if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
                init_ia32_feat_ctl(c);
+
+       microcode_bsp_resume();
+
+       /*
+        * This needs to happen after the microcode has been updated upon resume
+        * because some of the MSRs are "emulated" in microcode.
+        */
+       msr_restore_context(ctxt);
 }
 
 /* Needed by apm.c */
@@ -424,8 +433,10 @@ static int msr_build_context(const u32 *msr_id, const int num)
        }
 
        for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
+               u64 dummy;
+
                msr_array[i].info.msr_no        = msr_id[j];
-               msr_array[i].valid              = false;
+               msr_array[i].valid              = !rdmsrl_safe(msr_id[j], &dummy);
                msr_array[i].info.reg.q         = 0;
        }
        saved_msrs->num   = total_num;
@@ -500,10 +511,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c)
        return ret;
 }
 
+static void pm_save_spec_msr(void)
+{
+       u32 spec_msr_id[] = {
+               MSR_IA32_SPEC_CTRL,
+               MSR_IA32_TSX_CTRL,
+               MSR_TSX_FORCE_ABORT,
+               MSR_IA32_MCU_OPT_CTRL,
+               MSR_AMD64_LS_CFG,
+       };
+
+       msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
+}
+
 static int pm_check_save_msr(void)
 {
        dmi_check_system(msr_save_dmi_table);
        pm_cpu_check(msr_save_cpu_table);
+       pm_save_spec_msr();
 
        return 0;
 }
index c5e29db02a4693f007559dc70aacae585c8297f9..41d7669a97ad167f50485fab3ac1ebafe255ddfc 100644 (file)
@@ -67,7 +67,7 @@ void __init reserve_real_mode(void)
        memblock_reserve(0, SZ_1M);
 }
 
-static void sme_sev_setup_real_mode(struct trampoline_header *th)
+static void __init sme_sev_setup_real_mode(struct trampoline_header *th)
 {
 #ifdef CONFIG_AMD_MEM_ENCRYPT
        if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
index 8c1db5bf5d78a9aa7cc4b4bef35f2e31b6998998..2eb62be6d2562bf4044ffb8e1f444b8892aed041 100644 (file)
@@ -24,6 +24,7 @@ SYM_DATA_START(real_mode_header)
        .long   pa_sev_es_trampoline_start
 #endif
 #ifdef CONFIG_X86_64
+       .long   pa_trampoline_start64
        .long   pa_trampoline_pgd;
 #endif
        /* ACPI S3 wakeup */
index cc8391f86cdb61469b2b775a64490e470b2ef534..e38d61d6562e4611c9150b935b018bee46717fe5 100644 (file)
@@ -70,7 +70,7 @@ SYM_CODE_START(trampoline_start)
        movw    $__KERNEL_DS, %dx       # Data segment descriptor
 
        # Enable protected mode
-       movl    $X86_CR0_PE, %eax       # protected mode (PE) bit
+       movl    $(CR0_STATE & ~X86_CR0_PG), %eax
        movl    %eax, %cr0              # into protected mode
 
        # flush prefetch and jump to startup_32
@@ -143,13 +143,24 @@ SYM_CODE_START(startup_32)
        movl    %eax, %cr3
 
        # Set up EFER
+       movl    $MSR_EFER, %ecx
+       rdmsr
+       /*
+        * Skip writing to EFER if the register already has desired
+        * value (to avoid #VE for the TDX guest).
+        */
+       cmp     pa_tr_efer, %eax
+       jne     .Lwrite_efer
+       cmp     pa_tr_efer + 4, %edx
+       je      .Ldone_efer
+.Lwrite_efer:
        movl    pa_tr_efer, %eax
        movl    pa_tr_efer + 4, %edx
-       movl    $MSR_EFER, %ecx
        wrmsr
 
-       # Enable paging and in turn activate Long Mode
-       movl    $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
+.Ldone_efer:
+       # Enable paging and in turn activate Long Mode.
+       movl    $CR0_STATE, %eax
        movl    %eax, %cr0
 
        /*
@@ -161,6 +172,19 @@ SYM_CODE_START(startup_32)
        ljmpl   $__KERNEL_CS, $pa_startup_64
 SYM_CODE_END(startup_32)
 
+SYM_CODE_START(pa_trampoline_compat)
+       /*
+        * In compatibility mode.  Prep ESP and DX for startup_32, then disable
+        * paging and complete the switch to legacy 32-bit mode.
+        */
+       movl    $rm_stack_end, %esp
+       movw    $__KERNEL_DS, %dx
+
+       movl    $(CR0_STATE & ~X86_CR0_PG), %eax
+       movl    %eax, %cr0
+       ljmpl   $__KERNEL32_CS, $pa_startup_32
+SYM_CODE_END(pa_trampoline_compat)
+
        .section ".text64","ax"
        .code64
        .balign 4
@@ -169,6 +193,20 @@ SYM_CODE_START(startup_64)
        jmpq    *tr_start(%rip)
 SYM_CODE_END(startup_64)
 
+SYM_CODE_START(trampoline_start64)
+       /*
+        * APs start here on a direct transfer from 64-bit BIOS with identity
+        * mapped page tables.  Load the kernel's GDT in order to gear down to
+        * 32-bit mode (to handle 4-level vs. 5-level paging), and to (re)load
+        * segment registers.  Load the zero IDT so any fault triggers a
+        * shutdown instead of jumping back into BIOS.
+        */
+       lidt    tr_idt(%rip)
+       lgdt    tr_gdt64(%rip)
+
+       ljmpl   *tr_compat(%rip)
+SYM_CODE_END(trampoline_start64)
+
        .section ".rodata","a"
        # Duplicate the global descriptor table
        # so the kernel can live anywhere
@@ -182,6 +220,17 @@ SYM_DATA_START(tr_gdt)
        .quad   0x00cf93000000ffff      # __KERNEL_DS
 SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)
 
+SYM_DATA_START(tr_gdt64)
+       .short  tr_gdt_end - tr_gdt - 1 # gdt limit
+       .long   pa_tr_gdt
+       .long   0
+SYM_DATA_END(tr_gdt64)
+
+SYM_DATA_START(tr_compat)
+       .long   pa_trampoline_compat
+       .short  __KERNEL32_CS
+SYM_DATA_END(tr_compat)
+
        .bss
        .balign PAGE_SIZE
 SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
index 5033e640f957edf7f690db623da0397cd81c906f..4331c32c47f84438b932a41ac07bf0dc4050213a 100644 (file)
@@ -1,4 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
        .section ".rodata","a"
        .balign 16
-SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
+
+/*
+ * When a bootloader hands off to the kernel in 32-bit mode an
+ * IDT with a 2-byte limit and 4-byte base is needed. When a boot
+ * loader hands off to a kernel 64-bit mode the base address
+ * extends to 8-bytes. Reserve enough space for either scenario.
+ */
+SYM_DATA_START_LOCAL(tr_idt)
+       .short  0
+       .quad   0
+SYM_DATA_END(tr_idt)
index 1d6437e6d2ba3bcf5701a8c49fb117dc36c3e8a2..a6f4d8388ad888fbd10996c548efeab2d195e599 100644 (file)
@@ -62,8 +62,12 @@ static void send_morse(const char *pattern)
        }
 }
 
+struct port_io_ops pio_ops;
+
 void main(void)
 {
+       init_default_io_ops();
+
        /* Kill machine if structures are wrong */
        if (wakeup_header.real_magic != 0x12345678)
                while (1)
diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
new file mode 100644 (file)
index 0000000..49a5435
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/asm-offsets.h>
+#include <asm/tdx.h>
+
+/*
+ * TDCALL and SEAMCALL are supported in Binutils >= 2.36.
+ */
+#define tdcall         .byte 0x66,0x0f,0x01,0xcc
+#define seamcall       .byte 0x66,0x0f,0x01,0xcf
+
+/*
+ * TDX_MODULE_CALL - common helper macro for both
+ *                 TDCALL and SEAMCALL instructions.
+ *
+ * TDCALL   - used by TDX guests to make requests to the
+ *            TDX module and hypercalls to the VMM.
+ * SEAMCALL - used by TDX hosts to make requests to the
+ *            TDX module.
+ */
+.macro TDX_MODULE_CALL host:req
+       /*
+        * R12 will be used as temporary storage for struct tdx_module_output
+        * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL
+        * services supported by this function, it can be reused.
+        */
+
+       /* Callee saved, so preserve it */
+       push %r12
+
+       /*
+        * Push output pointer to stack.
+        * After the operation, it will be fetched into R12 register.
+        */
+       push %r9
+
+       /* Mangle function call ABI into TDCALL/SEAMCALL ABI: */
+       /* Move Leaf ID to RAX */
+       mov %rdi, %rax
+       /* Move input 4 to R9 */
+       mov %r8,  %r9
+       /* Move input 3 to R8 */
+       mov %rcx, %r8
+       /* Move input 1 to RCX */
+       mov %rsi, %rcx
+       /* Leave input param 2 in RDX */
+
+       .if \host
+       seamcall
+       /*
+        * SEAMCALL instruction is essentially a VMExit from VMX root
+        * mode to SEAM VMX root mode.  VMfailInvalid (CF=1) indicates
+        * that the targeted SEAM firmware is not loaded or disabled,
+        * or P-SEAMLDR is busy with another SEAMCALL.  %rax is not
+        * changed in this case.
+        *
+        * Set %rax to TDX_SEAMCALL_VMFAILINVALID for VMfailInvalid.
+        * This value will never be used as actual SEAMCALL error code as
+        * it is from the Reserved status code class.
+        */
+       jnc .Lno_vmfailinvalid
+       mov $TDX_SEAMCALL_VMFAILINVALID, %rax
+.Lno_vmfailinvalid:
+
+       .else
+       tdcall
+       .endif
+
+       /*
+        * Fetch output pointer from stack to R12 (It is used
+        * as temporary storage)
+        */
+       pop %r12
+
+       /*
+        * Since this macro can be invoked with NULL as an output pointer,
+        * check if caller provided an output struct before storing output
+        * registers.
+        *
+        * Update output registers, even if the call failed (RAX != 0).
+        * Other registers may contain details of the failure.
+        */
+       test %r12, %r12
+       jz .Lno_output_struct
+
+       /* Copy result registers to output struct: */
+       movq %rcx, TDX_MODULE_rcx(%r12)
+       movq %rdx, TDX_MODULE_rdx(%r12)
+       movq %r8,  TDX_MODULE_r8(%r12)
+       movq %r9,  TDX_MODULE_r9(%r12)
+       movq %r10, TDX_MODULE_r10(%r12)
+       movq %r11, TDX_MODULE_r11(%r12)
+
+.Lno_output_struct:
+       /* Restore the state of R12 register */
+       pop %r12
+.endm
index 688aa8b6ae29a2f57075996ad05788d084b49466..ba7af2eca755b7f08e4fe9a4c31f350defe80c2d 100644 (file)
@@ -260,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
                return 0;
 
        ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
-       if (ctxt == NULL)
+       if (ctxt == NULL) {
+               cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
+               cpumask_clear_cpu(cpu, cpu_callout_mask);
                return -ENOMEM;
+       }
 
        gdt = get_cpu_gdt_rw(cpu);
 
index ac17196e2518775eaac5a1c69d7001203de90885..3a2cd93bf0590e6acb5b2bad7d8a7e282050152f 100644 (file)
@@ -45,6 +45,7 @@ SYM_CODE_END(hypercall_page)
        __INIT
 SYM_CODE_START(startup_xen)
        UNWIND_HINT_EMPTY
+       ANNOTATE_NOENDBR
        cld
 
        /* Clear .bss */
index 45cc0ae0af6f966ed778253b9624d46a783c2152..c7b9f12896f20a6870588cd50e9849e11c992dcf 100644 (file)
@@ -29,7 +29,7 @@
        .if XTENSA_HAVE_COPROCESSOR(x);                                 \
                .align 4;                                               \
        .Lsave_cp_regs_cp##x:                                           \
-               xchal_cp##x##_store a2 a4 a5 a6 a7;                     \
+               xchal_cp##x##_store a2 a3 a4 a5 a6;                     \
                jx      a0;                                             \
        .endif
 
@@ -46,7 +46,7 @@
        .if XTENSA_HAVE_COPROCESSOR(x);                                 \
                .align 4;                                               \
        .Lload_cp_regs_cp##x:                                           \
-               xchal_cp##x##_load a2 a4 a5 a6 a7;                      \
+               xchal_cp##x##_load a2 a3 a4 a5 a6;                      \
                jx      a0;                                             \
        .endif
 
index 0dde21e0d3de4c2836bbce5c7fee361811863ec8..ad1841cecdfb769a32c7ed61fa62d027cde4bbcf 100644 (file)
@@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data)
 {
        struct patch *patch = data;
 
-       if (atomic_inc_return(&patch->cpu_count) == 1) {
+       if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
                local_patch_text(patch->addr, patch->data, patch->sz);
                atomic_inc(&patch->cpu_count);
        } else {
index 81d7c7e8f7e96077e2a26f69d173af19f28a5f18..10b79d3c74e070ed1afdace04ef805e59e4155bd 100644 (file)
@@ -36,24 +36,19 @@ static void rs_poll(struct timer_list *);
 static struct tty_driver *serial_driver;
 static struct tty_port serial_port;
 static DEFINE_TIMER(serial_timer, rs_poll);
-static DEFINE_SPINLOCK(timer_lock);
 
 static int rs_open(struct tty_struct *tty, struct file * filp)
 {
-       spin_lock_bh(&timer_lock);
        if (tty->count == 1)
                mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
-       spin_unlock_bh(&timer_lock);
 
        return 0;
 }
 
 static void rs_close(struct tty_struct *tty, struct file * filp)
 {
-       spin_lock_bh(&timer_lock);
        if (tty->count == 1)
                del_timer_sync(&serial_timer);
-       spin_unlock_bh(&timer_lock);
 }
 
 
@@ -73,8 +68,6 @@ static void rs_poll(struct timer_list *unused)
        int rd = 1;
        unsigned char c;
 
-       spin_lock(&timer_lock);
-
        while (simc_poll(0)) {
                rd = simc_read(0, &c, 1);
                if (rd <= 0)
@@ -87,7 +80,6 @@ static void rs_poll(struct timer_list *unused)
                tty_flip_buffer_push(port);
        if (rd)
                mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
-       spin_unlock(&timer_lock);
 }
 
 
index 3950ecbc5c263b0bc93575383d0c388ef6d2edde..4e01bb71ad6e07b7176caf277724a35b54fa4056 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)   += bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
 obj-$(CONFIG_BLK_CGROUP_RWSTAT)        += blk-cgroup-rwstat.o
+obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)       += blk-throttle.o
 obj-$(CONFIG_BLK_CGROUP_IOPRIO)        += blk-ioprio.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)     += blk-iolatency.o
index d39056630d9c1de07d3923daeb8b80e6ab6a086e..3afb550c0f7b782ad9bc95be4a50e111154994ac 100644 (file)
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
                s >>= bb->shift;
                target += (1<<bb->shift) - 1;
                target >>= bb->shift;
-               sectors = target - s;
        }
        /* 'target' is now the first block after the bad range */
 
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                s += (1<<bb->shift) - 1;
                s >>= bb->shift;
                target >>= bb->shift;
-               sectors = target - s;
        }
 
        write_seqlock_irq(&bb->lock);
index 13de871fa816993a946360e1f97610652e0ceac1..7bf88e591aaf3ba40d60fa9d68443785f2e95730 100644 (file)
@@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
                }
        }
 
-       if (!bdev->bd_openers)
+       if (!atomic_read(&bdev->bd_openers))
                set_init_blocksize(bdev);
        if (test_bit(GD_NEED_PART_SCAN, &disk->state))
                bdev_disk_changed(disk, false);
-       bdev->bd_openers++;
+       atomic_inc(&bdev->bd_openers);
        return 0;
 }
 
 static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
 {
-       if (!--bdev->bd_openers)
+       if (atomic_dec_and_test(&bdev->bd_openers))
                blkdev_flush_mapping(bdev);
        if (bdev->bd_disk->fops->release)
                bdev->bd_disk->fops->release(bdev->bd_disk, mode);
@@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
        struct gendisk *disk = part->bd_disk;
        int ret;
 
-       if (part->bd_openers)
+       if (atomic_read(&part->bd_openers))
                goto done;
 
        ret = blkdev_get_whole(bdev_whole(part), mode);
@@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
        disk->open_partitions++;
        set_init_blocksize(part);
 done:
-       part->bd_openers++;
+       atomic_inc(&part->bd_openers);
        return 0;
 
 out_blkdev_put:
@@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 {
        struct block_device *whole = bdev_whole(part);
 
-       if (--part->bd_openers)
+       if (!atomic_dec_and_test(&part->bd_openers))
                return;
        blkdev_flush_mapping(part);
        whole->bd_disk->open_partitions--;
@@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
         * of the world and we want to avoid long (could be several minute)
         * syncs while holding the mutex.
         */
-       if (bdev->bd_openers == 1)
+       if (atomic_read(&bdev->bd_openers) == 1)
                sync_blockdev(bdev);
 
        mutex_lock(&disk->open_mutex);
@@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
                bdev = I_BDEV(inode);
 
                mutex_lock(&bdev->bd_disk->open_mutex);
-               if (!bdev->bd_openers) {
+               if (!atomic_read(&bdev->bd_openers)) {
                        ; /* skip */
                } else if (wait) {
                        /*
index 420eda2589c0ec99ea4647233fdd85a32b6b63e8..09574af835662ff1bf998fbd6dcf0df6b53c5b68 100644 (file)
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
                                   */
        bfqg->bfqd = bfqd;
        bfqg->active_entities = 0;
+       bfqg->online = true;
        bfqg->rq_pos_tree = RB_ROOT;
 }
 
@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
        entity->sched_data = &parent->sched_data;
 }
 
-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
-                                        struct blkcg *blkcg)
+static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
 {
-       struct blkcg_gq *blkg;
-
-       blkg = blkg_lookup(blkcg, bfqd->queue);
-       if (likely(blkg))
-               return blkg_to_bfqg(blkg);
-       return NULL;
-}
-
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
-                                    struct blkcg *blkcg)
-{
-       struct bfq_group *bfqg, *parent;
+       struct bfq_group *parent;
        struct bfq_entity *entity;
 
-       bfqg = bfq_lookup_bfqg(bfqd, blkcg);
-
-       if (unlikely(!bfqg))
-               return NULL;
-
        /*
         * Update chain of bfq_groups as we might be handling a leaf group
         * which, along with some of its relatives, has not been hooked yet
@@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
                        bfq_group_set_parent(curr_bfqg, parent);
                }
        }
+}
 
-       return bfqg;
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+{
+       struct blkcg_gq *blkg = bio->bi_blkg;
+       struct bfq_group *bfqg;
+
+       while (blkg) {
+               bfqg = blkg_to_bfqg(blkg);
+               if (bfqg->online) {
+                       bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
+                       return bfqg;
+               }
+               blkg = blkg->parent;
+       }
+       bio_associate_blkg_from_css(bio,
+                               &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
+       return bfqd->root_group;
 }
 
 /**
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  * Move bic to blkcg, assuming that bfqd->lock is held; which makes
  * sure that the reference to cgroup is valid across the call (see
  * comments in bfq_bic_update_cgroup on this issue)
- *
- * NOTE: an alternative approach might have been to store the current
- * cgroup in bfqq and getting a reference to it, reducing the lookup
- * time here, at the price of slightly more complex code.
  */
-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
-                                               struct bfq_io_cq *bic,
-                                               struct blkcg *blkcg)
+static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+                                    struct bfq_io_cq *bic,
+                                    struct bfq_group *bfqg)
 {
        struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
        struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
-       struct bfq_group *bfqg;
        struct bfq_entity *entity;
 
-       bfqg = bfq_find_set_group(bfqd, blkcg);
-
-       if (unlikely(!bfqg))
-               bfqg = bfqd->root_group;
-
        if (async_bfqq) {
                entity = &async_bfqq->entity;
 
@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
        }
 
        if (sync_bfqq) {
-               entity = &sync_bfqq->entity;
-               if (entity->sched_data != &bfqg->sched_data)
-                       bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+               if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+                       /* We are the only user of this bfqq, just move it */
+                       if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+                               bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+               } else {
+                       struct bfq_queue *bfqq;
+
+                       /*
+                        * The queue was merged to a different queue. Check
+                        * that the merge chain still belongs to the same
+                        * cgroup.
+                        */
+                       for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+                               if (bfqq->entity.sched_data !=
+                                   &bfqg->sched_data)
+                                       break;
+                       if (bfqq) {
+                               /*
+                                * Some queue changed cgroup so the merge is
+                                * not valid anymore. We cannot easily just
+                                * cancel the merge (by clearing new_bfqq) as
+                                * there may be other processes using this
+                                * queue and holding refs to all queues below
+                                * sync_bfqq->new_bfqq. Similarly if the merge
+                                * already happened, we need to detach from
+                                * bfqq now so that we cannot merge bio to a
+                                * request from the old cgroup.
+                                */
+                               bfq_put_cooperator(sync_bfqq);
+                               bfq_release_process_ref(bfqd, sync_bfqq);
+                               bic_set_bfqq(bic, NULL, 1);
+                       }
+               }
        }
 
        return bfqg;
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 {
        struct bfq_data *bfqd = bic_to_bfqd(bic);
-       struct bfq_group *bfqg = NULL;
+       struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
        uint64_t serial_nr;
 
-       rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
 
        /*
         * Check whether blkcg has changed.  The condition may trigger
         * spuriously on a newly created cic but there's no harm.
         */
        if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
-               goto out;
+               return;
 
-       bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+       /*
+        * New cgroup for this process. Make sure it is linked to bfq internal
+        * cgroup hierarchy.
+        */
+       bfq_link_bfqg(bfqd, bfqg);
+       __bfq_bic_change_cgroup(bfqd, bic, bfqg);
        /*
         * Update blkg_path for bfq_log_* functions. We cache this
         * path, and update it here, for the following
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
         */
        blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
        bic->blkcg_serial_nr = serial_nr;
-out:
-       rcu_read_unlock();
 }
 
 /**
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
 
 put_async_queues:
        bfq_put_async_queues(bfqd, bfqg);
+       bfqg->online = false;
 
        spin_unlock_irqrestore(&bfqd->lock, flags);
        /*
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
 }
 
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
 {
        return bfqd->root_group;
 }
index 2e0dd68a3cbee5e55a8f6138481a9833649f8bdc..0d46cb728bbfab177eaf505b2760637ec2e27d97 100644 (file)
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
  */
 static const unsigned long bfq_late_stable_merging = 600;
 
-#define RQ_BIC(rq)             icq_to_bic((rq)->elv.priv[0])
+#define RQ_BIC(rq)             ((struct bfq_io_cq *)((rq)->elv.priv[0]))
 #define RQ_BFQQ(rq)            ((rq)->elv.priv[1])
 
 struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
  */
 void bfq_schedule_dispatch(struct bfq_data *bfqd)
 {
+       lockdep_assert_held(&bfqd->lock);
+
        if (bfqd->queued != 0) {
                bfq_log(bfqd, "schedule dispatch");
                blk_mq_run_hw_queues(bfqd->queue, true);
@@ -569,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
        struct bfq_entity *entity = &bfqq->entity;
        struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
        struct bfq_entity **entities = inline_entities;
-       int depth, level;
+       int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
        int class_idx = bfqq->ioprio_class - 1;
        struct bfq_sched_data *sched_data;
        unsigned long wsum;
@@ -578,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
        if (!entity->on_st_or_in_serv)
                return false;
 
+retry:
+       spin_lock_irq(&bfqd->lock);
        /* +1 for bfqq entity, root cgroup not included */
        depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
-       if (depth > BFQ_LIMIT_INLINE_DEPTH) {
+       if (depth > alloc_depth) {
+               spin_unlock_irq(&bfqd->lock);
+               if (entities != inline_entities)
+                       kfree(entities);
                entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
                if (!entities)
                        return false;
+               alloc_depth = depth;
+               goto retry;
        }
 
-       spin_lock_irq(&bfqd->lock);
        sched_data = entity->sched_data;
        /* Gather our ancestors as we need to traverse them in reverse order */
        level = 0;
@@ -2127,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
        if (!bfqd->last_completed_rq_bfqq ||
            bfqd->last_completed_rq_bfqq == bfqq ||
            bfq_bfqq_has_short_ttime(bfqq) ||
-           bfqq->dispatched > 0 ||
-           now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
-           bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+           now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
                return;
 
        /*
@@ -2202,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
 
        bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
        bfqq->queued[rq_is_sync(rq)]++;
-       bfqd->queued++;
+       /*
+        * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+        * may be read without holding the lock in bfq_has_work().
+        */
+       WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
 
-       if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+       if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
                bfq_check_waker(bfqd, bfqq, now_ns);
 
                /*
@@ -2394,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
        if (rq->queuelist.prev != &rq->queuelist)
                list_del_init(&rq->queuelist);
        bfqq->queued[sync]--;
-       bfqd->queued--;
+       /*
+        * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+        * may be read without holding the lock in bfq_has_work().
+        */
+       WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
        elv_rb_del(&bfqq->sort_list, rq);
 
        elv_rqhash_del(q, rq);
@@ -2457,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 
        spin_lock_irq(&bfqd->lock);
 
-       if (bic)
+       if (bic) {
+               /*
+                * Make sure cgroup info is uptodate for current process before
+                * considering the merge.
+                */
+               bfq_bic_update_cgroup(bic, bio);
+
                bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
-       else
+       } else {
                bfqd->bio_bfqq = NULL;
+       }
        bfqd->bio_bic = bic;
 
        ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2490,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
        return ELEVATOR_NO_MERGE;
 }
 
-static struct bfq_queue *bfq_init_rq(struct request *rq);
-
 static void bfq_request_merged(struct request_queue *q, struct request *req,
                               enum elv_merge type)
 {
@@ -2500,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
            blk_rq_pos(req) <
            blk_rq_pos(container_of(rb_prev(&req->rb_node),
                                    struct request, rb_node))) {
-               struct bfq_queue *bfqq = bfq_init_rq(req);
+               struct bfq_queue *bfqq = RQ_BFQQ(req);
                struct bfq_data *bfqd;
                struct request *prev, *next_rq;
 
@@ -2552,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
 static void bfq_requests_merged(struct request_queue *q, struct request *rq,
                                struct request *next)
 {
-       struct bfq_queue *bfqq = bfq_init_rq(rq),
-               *next_bfqq = bfq_init_rq(next);
+       struct bfq_queue *bfqq = RQ_BFQQ(rq),
+               *next_bfqq = RQ_BFQQ(next);
 
        if (!bfqq)
                goto remove;
@@ -2758,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
        if (process_refs == 0 || new_process_refs == 0)
                return NULL;
 
+       /*
+        * Make sure merged queues belong to the same parent. Parents could
+        * have changed since the time we decided the two queues are suitable
+        * for merging.
+        */
+       if (new_bfqq->entity.parent != bfqq->entity.parent)
+               return NULL;
+
        bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
                new_bfqq->pid);
 
@@ -2895,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                                struct bfq_queue *new_bfqq =
                                        bfq_setup_merge(bfqq, stable_merge_bfqq);
 
-                               bic->stably_merged = true;
-                               if (new_bfqq && new_bfqq->bic)
-                                       new_bfqq->bic->stably_merged = true;
+                               if (new_bfqq) {
+                                       bic->stably_merged = true;
+                                       if (new_bfqq->bic)
+                                               new_bfqq->bic->stably_merged =
+                                                                       true;
+                               }
                                return new_bfqq;
                        } else
                                return NULL;
@@ -5039,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
        struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 
        /*
-        * Avoiding lock: a race on bfqd->busy_queues should cause at
+        * Avoiding lock: a race on bfqd->queued should cause at
         * most a call to dispatch for nothing
         */
        return !list_empty_careful(&bfqd->dispatch) ||
-               bfq_tot_busy_queues(bfqd) > 0;
+               READ_ONCE(bfqd->queued);
 }
 
 static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -5304,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
        bfq_put_queue(bfqq);
 }
 
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
 {
        struct bfq_queue *__bfqq, *next;
 
@@ -5710,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
        struct bfq_queue *bfqq;
        struct bfq_group *bfqg;
 
-       rcu_read_lock();
-
-       bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
-       if (!bfqg) {
-               bfqq = &bfqd->oom_bfqq;
-               goto out;
-       }
-
+       bfqg = bfq_bio_bfqg(bfqd, bio);
        if (!is_sync) {
                async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
                                                  ioprio);
@@ -5763,8 +5786,6 @@ out:
 
        if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
                bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
-
-       rcu_read_unlock();
        return bfqq;
 }
 
@@ -6111,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
                                           unsigned int cmd_flags) {}
 #endif /* CONFIG_BFQ_CGROUP_DEBUG */
 
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                               bool at_head)
 {
@@ -6126,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                bfqg_stats_update_legacy_io(q, rq);
 #endif
        spin_lock_irq(&bfqd->lock);
+       bfqq = bfq_init_rq(rq);
        if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
                spin_unlock_irq(&bfqd->lock);
                blk_mq_free_requests(&free);
                return;
        }
 
-       spin_unlock_irq(&bfqd->lock);
-
        trace_block_rq_insert(rq);
 
-       spin_lock_irq(&bfqd->lock);
-       bfqq = bfq_init_rq(rq);
        if (!bfqq || at_head) {
                if (at_head)
                        list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6354,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
                bfq_schedule_dispatch(bfqd);
 }
 
-static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
-{
-       bfqq_request_freed(bfqq);
-       bfq_put_queue(bfqq);
-}
-
 /*
  * The processes associated with bfqq may happen to generate their
  * cumulative I/O at a lower rate than the rate at which the device
@@ -6556,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
 
                bfq_completed_request(bfqq, bfqd);
        }
-       bfq_finish_requeue_request_body(bfqq);
+       bfqq_request_freed(bfqq);
+       bfq_put_queue(bfqq);
+       RQ_BIC(rq)->requests--;
        spin_unlock_irqrestore(&bfqd->lock, flags);
 
        /*
@@ -6790,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
 
        bfqq_request_allocated(bfqq);
        bfqq->ref++;
+       bic->requests++;
        bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
                     rq, bfqq, bfqq->ref);
 
@@ -6886,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
        bfq_bfqq_expire(bfqd, bfqq, true, reason);
 
 schedule_dispatch:
-       spin_unlock_irqrestore(&bfqd->lock, flags);
        bfq_schedule_dispatch(bfqd);
+       spin_unlock_irqrestore(&bfqd->lock, flags);
 }
 
 /*
index 3b83e3d1c2e5879af814292ba3d095f1ef3f926b..ca8177d7bf7c685c16b6aa3314d6c4d2155b2e46 100644 (file)
@@ -468,6 +468,7 @@ struct bfq_io_cq {
        struct bfq_queue *stable_merge_bfqq;
 
        bool stably_merged;     /* non splittable if true */
+       unsigned int requests;  /* Number of requests this process has in flight */
 };
 
 /**
@@ -928,6 +929,8 @@ struct bfq_group {
 
        /* reference counter (see comments in bfq_bic_update_cgroup) */
        int ref;
+       /* Is bfq_group still online? */
+       bool online;
 
        struct bfq_entity entity;
        struct bfq_sched_data sched_data;
@@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
 void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                     bool compensate, enum bfqq_expiration reason);
 void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
 void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
 void bfq_schedule_dispatch(struct bfq_data *bfqd);
@@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
 void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
 void bfq_end_wr_async(struct bfq_data *bfqd);
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
-                                    struct blkcg *blkcg);
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
@@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
                break;                                                  \
        bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH);           \
        blk_add_cgroup_trace_msg((bfqd)->queue,                         \
-                       bfqg_to_blkg(bfqq_group(bfqq))->blkcg,          \
+                       &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css,    \
                        "%s " fmt, pid_str, ##args);                    \
 } while (0)
 
 #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {                    \
        blk_add_cgroup_trace_msg((bfqd)->queue,                         \
-               bfqg_to_blkg(bfqg)->blkcg, fmt, ##args);                \
+               &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args);          \
 } while (0)
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED */
index cdd7b2915c532c1590abafe0019594402b98cbc3..a3893d80dccc9a45443df45eebf707759d070e1e 100644 (file)
@@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit);
 static void bio_free(struct bio *bio)
 {
        struct bio_set *bs = bio->bi_pool;
-       void *p;
-
-       bio_uninit(bio);
+       void *p = bio;
 
-       if (bs) {
-               bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+       WARN_ON_ONCE(!bs);
 
-               /*
-                * If we have front padding, adjust the bio pointer before freeing
-                */
-               p = bio;
-               p -= bs->front_pad;
-
-               mempool_free(p, &bs->bio_pool);
-       } else {
-               /* Bio was allocated by bio_kmalloc() */
-               kfree(bio);
-       }
+       bio_uninit(bio);
+       bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+       mempool_free(p - bs->front_pad, &bs->bio_pool);
 }
 
 /*
@@ -419,6 +408,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        queue_work(bs->rescue_workqueue, &bs->rescue_work);
 }
 
+static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
+               unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
+               struct bio_set *bs)
+{
+       struct bio_alloc_cache *cache;
+       struct bio *bio;
+
+       cache = per_cpu_ptr(bs->cache, get_cpu());
+       if (!cache->free_list) {
+               put_cpu();
+               return NULL;
+       }
+       bio = cache->free_list;
+       cache->free_list = bio->bi_next;
+       cache->nr--;
+       put_cpu();
+
+       bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
+       bio->bi_pool = bs;
+       return bio;
+}
+
 /**
  * bio_alloc_bioset - allocate a bio for I/O
  * @bdev:      block device to allocate the bio for (can be %NULL)
@@ -451,6 +462,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
  * for per bio allocations.
  *
+ * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
+ * context, not hard/soft IRQ.
+ *
  * Returns: Pointer to new bio on success, NULL on failure.
  */
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
@@ -465,6 +479,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
        if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
                return NULL;
 
+       if (opf & REQ_ALLOC_CACHE) {
+               if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
+                       bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
+                                                    gfp_mask, bs);
+                       if (bio)
+                               return bio;
+                       /*
+                        * No cached bio available, bio returned below marked with
+                        * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
+                        */
+               } else {
+                       opf &= ~REQ_ALLOC_CACHE;
+               }
+       }
+
        /*
         * submit_bio_noacct() converts recursion to iteration; this means if
         * we're running beneath it, any bios we allocate and submit will not be
@@ -528,28 +557,28 @@ err_free:
 EXPORT_SYMBOL(bio_alloc_bioset);
 
 /**
- * bio_kmalloc - kmalloc a bio for I/O
+ * bio_kmalloc - kmalloc a bio
+ * @nr_vecs:   number of bio_vecs to allocate
  * @gfp_mask:   the GFP_* mask given to the slab allocator
- * @nr_iovecs: number of iovecs to pre-allocate
  *
- * Use kmalloc to allocate and initialize a bio.
+ * Use kmalloc to allocate a bio (including bvecs).  The bio must be initialized
+ * using bio_init() before use.  To free a bio returned from this function use
+ * kfree() after calling bio_uninit().  A bio returned from this function can
+ * be reused by calling bio_uninit() before calling bio_init() again.
+ *
+ * Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
+ * function are not backed by a mempool can can fail.  Do not use this function
+ * for allocations in the file system I/O path.
  *
  * Returns: Pointer to new bio on success, NULL on failure.
  */
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
 {
        struct bio *bio;
 
-       if (nr_iovecs > UIO_MAXIOV)
-               return NULL;
-
-       bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
-       if (unlikely(!bio))
+       if (nr_vecs > UIO_MAXIOV)
                return NULL;
-       bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
-                0);
-       bio->bi_pool = NULL;
-       return bio;
+       return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
 }
 EXPORT_SYMBOL(bio_kmalloc);
 
@@ -711,7 +740,7 @@ void bio_put(struct bio *bio)
                        return;
        }
 
-       if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+       if (bio->bi_opf & REQ_ALLOC_CACHE) {
                struct bio_alloc_cache *cache;
 
                bio_uninit(bio);
@@ -732,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
        bio_set_flag(bio, BIO_CLONED);
        if (bio_flagged(bio_src, BIO_THROTTLED))
                bio_set_flag(bio, BIO_THROTTLED);
-       if (bio->bi_bdev == bio_src->bi_bdev &&
-           bio_flagged(bio_src, BIO_REMAPPED))
-               bio_set_flag(bio, BIO_REMAPPED);
        bio->bi_ioprio = bio_src->bi_ioprio;
        bio->bi_iter = bio_src->bi_iter;
 
-       bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
+       if (bio->bi_bdev) {
+               if (bio->bi_bdev == bio_src->bi_bdev &&
+                   bio_flagged(bio_src, BIO_REMAPPED))
+                       bio_set_flag(bio, BIO_REMAPPED);
+               bio_clone_blkg_association(bio, bio_src);
+       }
 
        if (bio_crypt_clone(bio, bio_src, gfp) < 0)
                return -ENOMEM;
@@ -1598,7 +1628,7 @@ EXPORT_SYMBOL(bio_split);
 void bio_trim(struct bio *bio, sector_t offset, sector_t size)
 {
        if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
-                        offset + size > bio->bi_iter.bi_size))
+                        offset + size > bio_sectors(bio)))
                return;
 
        size <<= 9;
@@ -1727,55 +1757,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
                flags |= BIOSET_NEED_BVECS;
        if (src->rescue_workqueue)
                flags |= BIOSET_NEED_RESCUER;
+       if (src->cache)
+               flags |= BIOSET_PERCPU_CACHE;
 
        return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
 }
 EXPORT_SYMBOL(bioset_init_from_src);
 
-/**
- * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
- * @kiocb:     kiocb describing the IO
- * @bdev:      block device to allocate the bio for (can be %NULL)
- * @nr_vecs:   number of iovecs to pre-allocate
- * @opf:       operation and flags for bio
- * @bs:                bio_set to allocate from
- *
- * Description:
- *    Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
- *    used to check if we should dip into the per-cpu bio_set allocation
- *    cache. The allocation uses GFP_KERNEL internally. On return, the
- *    bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
- *    MUST be done from process context, not hard/soft IRQ.
- *
- */
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
-               unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
-{
-       struct bio_alloc_cache *cache;
-       struct bio *bio;
-
-       if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
-               return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-
-       cache = per_cpu_ptr(bs->cache, get_cpu());
-       if (cache->free_list) {
-               bio = cache->free_list;
-               cache->free_list = bio->bi_next;
-               cache->nr--;
-               put_cpu();
-               bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
-                        nr_vecs, opf);
-               bio->bi_pool = bs;
-               bio_set_flag(bio, BIO_PERCPU_CACHE);
-               return bio;
-       }
-       put_cpu();
-       bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-       bio_set_flag(bio, BIO_PERCPU_CACHE);
-       return bio;
-}
-EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
-
 static int __init init_bio(void)
 {
        int i;
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
new file mode 100644 (file)
index 0000000..760a2e1
--- /dev/null
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "blk-cgroup.h"
+
+/**
+ * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
+ * @app_id: application identifier
+ * @cgrp_id: cgroup id
+ * @app_id_len: size of application identifier
+ */
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
+{
+       struct cgroup *cgrp;
+       struct cgroup_subsys_state *css;
+       struct blkcg *blkcg;
+       int ret  = 0;
+
+       if (app_id_len > FC_APPID_LEN)
+               return -EINVAL;
+
+       cgrp = cgroup_get_from_id(cgrp_id);
+       if (!cgrp)
+               return -ENOENT;
+       css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+       if (!css) {
+               ret = -ENOENT;
+               goto out_cgrp_put;
+       }
+       blkcg = css_to_blkcg(css);
+       /*
+        * There is a slight race condition on setting the appid.
+        * Worst case an I/O may not find the right id.
+        * This is no different from the I/O we let pass while obtaining
+        * the vmid from the fabric.
+        * Adding the overhead of a lock is not necessary.
+        */
+       strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+       css_put(css);
+out_cgrp_put:
+       cgroup_put(cgrp);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
+
+/**
+ * blkcg_get_fc_appid - get the fc app identifier associated with a bio
+ * @bio: target bio
+ *
+ * On success return the fc_app_id, on failure return NULL
+ */
+char *blkcg_get_fc_appid(struct bio *bio)
+{
+       if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
+               return NULL;
+       return bio->bi_blkg->blkcg->fc_app_id;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
index 8dfe62786cd5fa5eae77afed283fcff238070835..40161a3f68d04abbed2facae6099108d93290603 100644 (file)
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
 
 #define BLKG_DESTROY_BATCH_SIZE  64
 
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static struct cgroup_subsys_state *blkcg_css(void)
+{
+       struct cgroup_subsys_state *css;
+
+       css = kthread_blkcg();
+       if (css)
+               return css;
+       return task_css(current, io_cgrp_id);
+}
+
 static bool blkcg_policy_enabled(struct request_queue *q,
                                 const struct blkcg_policy *pol)
 {
@@ -155,6 +172,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
                blk_finish_plug(&plug);
 }
 
+/**
+ * bio_blkcg_css - return the blkcg CSS associated with a bio
+ * @bio: target bio
+ *
+ * This returns the CSS for the blkcg associated with a bio, or %NULL if not
+ * associated. Callers are expected to either handle %NULL or know association
+ * has been done prior to calling this.
+ */
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
+{
+       if (!bio || !bio->bi_blkg)
+               return NULL;
+       return &bio->bi_blkg->blkcg->css;
+}
+EXPORT_SYMBOL_GPL(bio_blkcg_css);
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg.  Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+       return css_to_blkcg(blkcg->css.parent);
+}
+
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        struct blkcg_gq *blkg;
        int i, ret;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
        lockdep_assert_held(&q->queue_lock);
 
        /* request_queue is dying, do not create/recreate a blkg */
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
 {
        struct blkg_iostat_set *bis = &blkg->iostat;
        u64 rbytes, wbytes, rios, wios, dbytes, dios;
-       bool has_stats = false;
        const char *dname;
        unsigned seq;
        int i;
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
        } while (u64_stats_fetch_retry(&bis->sync, seq));
 
        if (rbytes || wbytes || rios || wios) {
-               has_stats = true;
                seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
                        rbytes, wbytes, rios, wios,
                        dbytes, dios);
        }
 
        if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
-               has_stats = true;
                seq_printf(s, " use_delay=%d delay_nsec=%llu",
                        atomic_read(&blkg->use_delay),
                        atomic64_read(&blkg->delay_nsec));
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
                if (!blkg->pd[i] || !pol->pd_stat_fn)
                        continue;
 
-               if (pol->pd_stat_fn(blkg->pd[i], s))
-                       has_stats = true;
+               pol->pd_stat_fn(blkg->pd[i], s);
        }
 
-       if (has_stats)
-               seq_printf(s, "\n");
+       seq_puts(s, "\n");
 }
 
 static int blkcg_print_stat(struct seq_file *sf, void *v)
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
        { }     /* terminate */
 };
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
+{
+       return &css_to_blkcg(css)->cgwb_list;
+}
+#endif
+
 /*
  * blkcg destruction is a three-stage process.
  *
@@ -1015,25 +1060,6 @@ static struct cftype blkcg_legacy_files[] = {
  *    This finally frees the blkcg.
  */
 
-/**
- * blkcg_css_offline - cgroup css_offline callback
- * @css: css of interest
- *
- * This function is called when @css is about to go away.  Here the cgwbs are
- * offlined first and only once writeback associated with the blkcg has
- * finished do we start step 2 (see above).
- */
-static void blkcg_css_offline(struct cgroup_subsys_state *css)
-{
-       struct blkcg *blkcg = css_to_blkcg(css);
-
-       /* this prevents anyone from attaching or migrating to this blkcg */
-       wb_blkcg_offline(blkcg);
-
-       /* put the base online pin allowing step 2 to be triggered */
-       blkcg_unpin_online(blkcg);
-}
-
 /**
  * blkcg_destroy_blkgs - responsible for shooting down blkgs
  * @blkcg: blkcg of interest
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-void blkcg_destroy_blkgs(struct blkcg *blkcg)
+static void blkcg_destroy_blkgs(struct blkcg *blkcg)
 {
        might_sleep();
 
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
        spin_unlock_irq(&blkcg->lock);
 }
 
+/**
+ * blkcg_pin_online - pin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * While pinned, a blkcg is kept online.  This is primarily used to
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
+ * while an associated cgwb is still active.
+ */
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
+{
+       refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
+}
+
+/**
+ * blkcg_unpin_online - unpin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
+ * that blkg doesn't go offline while an associated cgwb is still active.
+ * When this count goes to zero, all active cgwbs have finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ */
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
+{
+       struct blkcg *blkcg = css_to_blkcg(blkcg_css);
+
+       do {
+               if (!refcount_dec_and_test(&blkcg->online_pin))
+                       break;
+               blkcg_destroy_blkgs(blkcg);
+               blkcg = blkcg_parent(blkcg);
+       } while (blkcg);
+}
+
+/**
+ * blkcg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away.  Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
+ */
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
+{
+       /* this prevents anyone from attaching or migrating to this blkcg */
+       wb_blkcg_offline(css);
+
+       /* put the base online pin allowing step 2 to be triggered */
+       blkcg_unpin_online(css);
+}
+
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
        struct blkcg *blkcg = css_to_blkcg(css);
@@ -1163,8 +1240,7 @@ unlock:
 
 static int blkcg_css_online(struct cgroup_subsys_state *css)
 {
-       struct blkcg *blkcg = css_to_blkcg(css);
-       struct blkcg *parent = blkcg_parent(blkcg);
+       struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
 
        /*
         * blkcg_pin_online() is used to delay blkcg offline so that blkgs
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
         * parent so that offline always happens towards the root.
         */
        if (parent)
-               blkcg_pin_online(parent);
+               blkcg_pin_online(css);
        return 0;
 }
 
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
        preloaded = !radix_tree_preload(GFP_KERNEL);
 
        /* Make sure the root blkg exists. */
-       rcu_read_lock();
+       /* spin_lock_irq can serve as RCU read-side critical section. */
        spin_lock_irq(&q->queue_lock);
        blkg = blkg_create(&blkcg_root, q, new_blkg);
        if (IS_ERR(blkg))
                goto err_unlock;
        q->root_blkg = blkg;
        spin_unlock_irq(&q->queue_lock);
-       rcu_read_unlock();
 
        if (preloaded)
                radix_tree_preload_end();
@@ -1234,7 +1309,6 @@ err_destroy_all:
        return ret;
 err_unlock:
        spin_unlock_irq(&q->queue_lock);
-       rcu_read_unlock();
        if (preloaded)
                radix_tree_preload_end();
        return PTR_ERR(blkg);
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
 void blkcg_maybe_throttle_current(void)
 {
        struct request_queue *q = current->throttle_queue;
-       struct cgroup_subsys_state *css;
        struct blkcg *blkcg;
        struct blkcg_gq *blkg;
        bool use_memdelay = current->use_memdelay;
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
        current->use_memdelay = false;
 
        rcu_read_lock();
-       css = kthread_blkcg();
-       if (css)
-               blkcg = css_to_blkcg(css);
-       else
-               blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
-
+       blkcg = css_to_blkcg(blkcg_css());
        if (!blkcg)
                goto out;
        blkg = blkg_lookup(blkcg, q);
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
        rcu_read_lock();
 
        if (bio->bi_blkg)
-               css = &bio_blkcg(bio)->css;
+               css = bio_blkcg_css(bio);
        else
                css = blkcg_css();
 
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
        put_cpu();
 }
 
+bool blk_cgroup_congested(void)
+{
+       struct cgroup_subsys_state *css;
+       bool ret = false;
+
+       rcu_read_lock();
+       for (css = blkcg_css(); css; css = css->parent) {
+               if (atomic_read(&css->cgroup->congestion_count)) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
 static int __init blkcg_init(void)
 {
        blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
index 47e1e38390c965a5b4d796616418b7cdbdae09f3..d4de0a35e0660aff2b0358499e807022a65cd07b 100644 (file)
  */
 
 #include <linux/blk-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/kthread.h>
 #include <linux/blk-mq.h>
 
+struct blkcg_gq;
+struct blkg_policy_data;
+
+
 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
 #define BLKG_STAT_CPU_BATCH    (INT_MAX / 2)
 
 #ifdef CONFIG_BLK_CGROUP
 
+enum blkg_iostat_type {
+       BLKG_IOSTAT_READ,
+       BLKG_IOSTAT_WRITE,
+       BLKG_IOSTAT_DISCARD,
+
+       BLKG_IOSTAT_NR,
+};
+
+struct blkg_iostat {
+       u64                             bytes[BLKG_IOSTAT_NR];
+       u64                             ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+       struct u64_stats_sync           sync;
+       struct blkg_iostat              cur;
+       struct blkg_iostat              last;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+       /* Pointer to the associated request_queue */
+       struct request_queue            *q;
+       struct list_head                q_node;
+       struct hlist_node               blkcg_node;
+       struct blkcg                    *blkcg;
+
+       /* all non-root blkcg_gq's are guaranteed to have access to parent */
+       struct blkcg_gq                 *parent;
+
+       /* reference count */
+       struct percpu_ref               refcnt;
+
+       /* is this blkg online? protected by both blkcg and q locks */
+       bool                            online;
+
+       struct blkg_iostat_set __percpu *iostat_cpu;
+       struct blkg_iostat_set          iostat;
+
+       struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
+
+       spinlock_t                      async_bio_lock;
+       struct bio_list                 async_bios;
+       union {
+               struct work_struct      async_bio_work;
+               struct work_struct      free_work;
+       };
+
+       atomic_t                        use_delay;
+       atomic64_t                      delay_nsec;
+       atomic64_t                      delay_start;
+       u64                             last_delay;
+       int                             last_use;
+
+       struct rcu_head                 rcu_head;
+};
+
+struct blkcg {
+       struct cgroup_subsys_state      css;
+       spinlock_t                      lock;
+       refcount_t                      online_pin;
+
+       struct radix_tree_root          blkg_tree;
+       struct blkcg_gq __rcu           *blkg_hint;
+       struct hlist_head               blkg_list;
+
+       struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
+
+       struct list_head                all_blkcgs_node;
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
+       char                            fc_app_id[FC_APPID_LEN];
+#endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+       struct list_head                cgwb_list;
+#endif
+};
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
 /*
  * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  * request_queue (q).  This is used by blkcg policies which need to track
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
                                struct seq_file *s);
 
 struct blkcg_policy {
@@ -122,53 +210,15 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-       struct cgroup_subsys_state *css;
-
-       css = kthread_blkcg();
-       if (css)
-               return css;
-       return task_css(current, io_cgrp_id);
-}
-
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use.  The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio.  This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it.  However, the latter potentially gets
- * it from task_css().  This can race against task migration and the cgroup
- * dying.  It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return css_to_blkcg(blkcg_css());
-}
-
 /**
  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  * @return: true if this bio needs to be submitted with the root blkg context.
  *
  * In order to avoid priority inversions we sometimes need to issue a bio as if
  * it were attached to the root blkg, and then backcharge to the actual owning
- * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio.  Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
+ * blkg.  The idea is we do bio_blkcg_css() to look up the actual context for
+ * the bio and attach the appropriate blkg to the bio.  Then we call this helper
+ * and if it is true run with the root blkg for that queue and then do any
  * backcharging to the originating cgroup once the io is complete.
  */
 static inline bool bio_issue_as_root_blkg(struct bio *bio)
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
 struct blkcg_policy {
 };
 
-#ifdef CONFIG_BLOCK
+struct blkcg {
+};
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
                                           const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
-
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
                                                  struct blkcg_policy *pol) { return NULL; }
 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
 #define blk_queue_for_each_rl(rl, q)   \
        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 
-#endif /* CONFIG_BLOCK */
 #endif /* CONFIG_BLK_CGROUP */
 
 #endif /* _BLK_CGROUP_PRIVATE_H */
index 937bb6b863317a96908ae382c61e90fc1782b1b7..80fa73c419a99646de341a64dcc0c49d1b9ec89f 100644 (file)
@@ -50,7 +50,6 @@
 #include "blk-pm.h"
 #include "blk-cgroup.h"
 #include "blk-throttle.h"
-#include "blk-rq-qos.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q)
         */
        blk_freeze_queue(q);
 
-       /* cleanup rq qos structures for queue without disk */
-       rq_qos_exit(q);
-
        blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
        blk_sync_queue(q);
@@ -592,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
            (nr_sectors > maxsector ||
             bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
                pr_info_ratelimited("%s: attempt to access beyond end of device\n"
-                                   "%pg: rw=%d, want=%llu, limit=%llu\n",
-                                   current->comm,
-                                   bio->bi_bdev, bio->bi_opf,
-                                   bio_end_sector(bio), maxsector);
+                                   "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
+                                   current->comm, bio->bi_bdev, bio->bi_opf,
+                                   bio->bi_iter.bi_sector, nr_sectors, maxsector);
                return -EIO;
        }
        return 0;
@@ -820,11 +815,11 @@ void submit_bio_noacct(struct bio *bio)
 
        switch (bio_op(bio)) {
        case REQ_OP_DISCARD:
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(bdev))
                        goto not_supported;
                break;
        case REQ_OP_SECURE_ERASE:
-               if (!blk_queue_secure_erase(q))
+               if (!bdev_max_secure_erase_sectors(bdev))
                        goto not_supported;
                break;
        case REQ_OP_ZONE_APPEND:
@@ -893,19 +888,11 @@ void submit_bio(struct bio *bio)
        if (blkcg_punt_bio_submit(bio))
                return;
 
-       /*
-        * If it's a regular read/write or a barrier with data attached,
-        * go through the normal accounting stuff before submission.
-        */
-       if (bio_has_data(bio)) {
-               unsigned int count = bio_sectors(bio);
-
-               if (op_is_write(bio_op(bio))) {
-                       count_vm_events(PGPGOUT, count);
-               } else {
-                       task_io_account_read(bio->bi_iter.bi_size);
-                       count_vm_events(PGPGIN, count);
-               }
+       if (bio_op(bio) == REQ_OP_READ) {
+               task_io_account_read(bio->bi_iter.bi_size);
+               count_vm_events(PGPGIN, bio_sectors(bio));
+       } else if (bio_op(bio) == REQ_OP_WRITE) {
+               count_vm_events(PGPGOUT, bio_sectors(bio));
        }
 
        /*
@@ -1022,21 +1009,22 @@ again:
        }
 }
 
-static unsigned long __part_start_io_acct(struct block_device *part,
-                                         unsigned int sectors, unsigned int op,
-                                         unsigned long start_time)
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+                                unsigned int sectors, unsigned int op,
+                                unsigned long start_time)
 {
        const int sgrp = op_stat_group(op);
 
        part_stat_lock();
-       update_io_ticks(part, start_time, false);
-       part_stat_inc(part, ios[sgrp]);
-       part_stat_add(part, sectors[sgrp], sectors);
-       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       update_io_ticks(bdev, start_time, false);
+       part_stat_inc(bdev, ios[sgrp]);
+       part_stat_add(bdev, sectors[sgrp], sectors);
+       part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
 
        return start_time;
 }
+EXPORT_SYMBOL(bdev_start_io_acct);
 
 /**
  * bio_start_io_acct_time - start I/O accounting for bio based drivers
@@ -1045,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
  */
 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
 {
-       __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                            bio_op(bio), start_time);
+       bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+                          bio_op(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
 
@@ -1058,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
  */
 unsigned long bio_start_io_acct(struct bio *bio)
 {
-       return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-                                   bio_op(bio), jiffies);
+       return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+                                 bio_op(bio), jiffies);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
-                                unsigned int op)
-{
-       return __part_start_io_acct(disk->part0, sectors, op, jiffies);
-}
-EXPORT_SYMBOL(disk_start_io_acct);
-
-static void __part_end_io_acct(struct block_device *part, unsigned int op,
-                              unsigned long start_time)
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+                     unsigned long start_time)
 {
        const int sgrp = op_stat_group(op);
        unsigned long now = READ_ONCE(jiffies);
        unsigned long duration = now - start_time;
 
        part_stat_lock();
-       update_io_ticks(part, now, true);
-       part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
-       part_stat_local_dec(part, in_flight[op_is_write(op)]);
+       update_io_ticks(bdev, now, true);
+       part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
+       part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
        part_stat_unlock();
 }
+EXPORT_SYMBOL(bdev_end_io_acct);
 
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
-               struct block_device *orig_bdev)
+                             struct block_device *orig_bdev)
 {
-       __part_end_io_acct(orig_bdev, bio_op(bio), start_time);
+       bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
 
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
-                     unsigned long start_time)
-{
-       __part_end_io_acct(disk->part0, op, start_time);
-}
-EXPORT_SYMBOL(disk_end_io_acct);
-
 /**
  * blk_lld_busy - Check if underlying low-level drivers of a device are busy
  * @q : the queue of the device being checked
index 7c854584b52b506fece323d6275948a10f9a7849..621abd1b0e4d329655b2ea276bed7005e93522ef 100644 (file)
@@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
 
        src_bio->bi_status = enc_bio->bi_status;
 
-       bio_put(enc_bio);
+       bio_uninit(enc_bio);
+       kfree(enc_bio);
        bio_endio(src_bio);
 }
 
 static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
 {
+       unsigned int nr_segs = bio_segments(bio_src);
        struct bvec_iter iter;
        struct bio_vec bv;
        struct bio *bio;
 
-       bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
+       bio = bio_kmalloc(nr_segs, GFP_NOIO);
        if (!bio)
                return NULL;
-       bio->bi_bdev            = bio_src->bi_bdev;
+       bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs,
+                bio_src->bi_opf);
        if (bio_flagged(bio_src, BIO_REMAPPED))
                bio_set_flag(bio, BIO_REMAPPED);
-       bio->bi_opf             = bio_src->bi_opf;
        bio->bi_ioprio          = bio_src->bi_ioprio;
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
@@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
                bio->bi_io_vec[bio->bi_vcnt++] = bv;
 
        bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
 
        return bio;
 }
@@ -363,8 +364,8 @@ out_release_keyslot:
        blk_crypto_put_keyslot(slot);
 out_put_enc_bio:
        if (enc_bio)
-               bio_put(enc_bio);
-
+               bio_uninit(enc_bio);
+       kfree(enc_bio);
        return ret;
 }
 
index 70a0a3d680a3582e99ea1f85dd03b3ccb3f6ebc7..33a11ba971eafeb0ac9e022ffb5032ee06f133be 100644 (file)
@@ -533,8 +533,7 @@ struct ioc_gq {
 
        /* statistics */
        struct iocg_pcpu_stat __percpu  *pcpu_stat;
-       struct iocg_stat                local_stat;
-       struct iocg_stat                desc_stat;
+       struct iocg_stat                stat;
        struct iocg_stat                last_stat;
        u64                             last_stat_abs_vusage;
        u64                             usage_delta_us;
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
                return true;
        } else {
                if (iocg->indelay_since) {
-                       iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
+                       iocg->stat.indelay_us += now->now - iocg->indelay_since;
                        iocg->indelay_since = 0;
                }
                iocg->delay = 0;
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
 
        /* if debt is paid in full, restore inuse */
        if (!iocg->abs_vdebt) {
-               iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
+               iocg->stat.indebt_us += now->now - iocg->indebt_since;
                iocg->indebt_since = 0;
 
                propagate_weights(iocg, iocg->active, iocg->last_inuse,
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
 
        if (!waitqueue_active(&iocg->waitq)) {
                if (iocg->wait_since) {
-                       iocg->local_stat.wait_us += now->now - iocg->wait_since;
+                       iocg->stat.wait_us += now->now - iocg->wait_since;
                        iocg->wait_since = 0;
                }
                return;
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
        }
 }
 
+/* propagate the deltas to the parent */
+static void iocg_flush_stat_upward(struct ioc_gq *iocg)
+{
+       if (iocg->level > 0) {
+               struct iocg_stat *parent_stat =
+                       &iocg->ancestors[iocg->level - 1]->stat;
+
+               parent_stat->usage_us +=
+                       iocg->stat.usage_us - iocg->last_stat.usage_us;
+               parent_stat->wait_us +=
+                       iocg->stat.wait_us - iocg->last_stat.wait_us;
+               parent_stat->indebt_us +=
+                       iocg->stat.indebt_us - iocg->last_stat.indebt_us;
+               parent_stat->indelay_us +=
+                       iocg->stat.indelay_us - iocg->last_stat.indelay_us;
+       }
+
+       iocg->last_stat = iocg->stat;
+}
+
 /* collect per-cpu counters and propagate the deltas to the parent */
-static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
+static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
 {
        struct ioc *ioc = iocg->ioc;
-       struct iocg_stat new_stat;
        u64 abs_vusage = 0;
        u64 vusage_delta;
        int cpu;
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
        iocg->last_stat_abs_vusage = abs_vusage;
 
        iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
-       iocg->local_stat.usage_us += iocg->usage_delta_us;
-
-       /* propagate upwards */
-       new_stat.usage_us =
-               iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
-       new_stat.wait_us =
-               iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
-       new_stat.indebt_us =
-               iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
-       new_stat.indelay_us =
-               iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
-
-       /* propagate the deltas to the parent */
-       if (iocg->level > 0) {
-               struct iocg_stat *parent_stat =
-                       &iocg->ancestors[iocg->level - 1]->desc_stat;
+       iocg->stat.usage_us += iocg->usage_delta_us;
 
-               parent_stat->usage_us +=
-                       new_stat.usage_us - iocg->last_stat.usage_us;
-               parent_stat->wait_us +=
-                       new_stat.wait_us - iocg->last_stat.wait_us;
-               parent_stat->indebt_us +=
-                       new_stat.indebt_us - iocg->last_stat.indebt_us;
-               parent_stat->indelay_us +=
-                       new_stat.indelay_us - iocg->last_stat.indelay_us;
-       }
-
-       iocg->last_stat = new_stat;
+       iocg_flush_stat_upward(iocg);
 }
 
 /* get stat counters ready for reading on all active iocgs */
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
 
        /* flush leaves and build inner node walk list */
        list_for_each_entry(iocg, target_iocgs, active_list) {
-               iocg_flush_stat_one(iocg, now);
+               iocg_flush_stat_leaf(iocg, now);
                iocg_build_inner_walk(iocg, &inner_walk);
        }
 
        /* keep flushing upwards by walking the inner list backwards */
        list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
-               iocg_flush_stat_one(iocg, now);
+               iocg_flush_stat_upward(iocg);
                list_del_init(&iocg->walk_list);
        }
 }
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
 
                /* flush wait and indebt stat deltas */
                if (iocg->wait_since) {
-                       iocg->local_stat.wait_us += now->now - iocg->wait_since;
+                       iocg->stat.wait_us += now->now - iocg->wait_since;
                        iocg->wait_since = now->now;
                }
                if (iocg->indebt_since) {
-                       iocg->local_stat.indebt_us +=
+                       iocg->stat.indebt_us +=
                                now->now - iocg->indebt_since;
                        iocg->indebt_since = now->now;
                }
                if (iocg->indelay_since) {
-                       iocg->local_stat.indelay_us +=
+                       iocg->stat.indelay_us +=
                                now->now - iocg->indelay_since;
                        iocg->indelay_since = now->now;
                }
@@ -2322,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer)
                                iocg->hweight_donating = hwa;
                                iocg->hweight_after_donation = new_hwi;
                                list_add(&iocg->surplus_list, &surpluses);
-                       } else {
+                       } else if (!iocg->abs_vdebt) {
+                               /*
+                                * @iocg doesn't have enough to donate. Reset
+                                * its inuse to active.
+                                *
+                                * Don't reset debtors as their inuse's are
+                                * owned by debt handling. This shouldn't affect
+                                * donation calculuation in any meaningful way
+                                * as @iocg doesn't have a meaningful amount of
+                                * share anyway.
+                                */
                                TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
                                                iocg->inuse, iocg->active,
                                                iocg->hweight_inuse, new_hwi);
@@ -2995,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
        kfree(iocg);
 }
 
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
 {
        struct ioc_gq *iocg = pd_to_iocg(pd);
        struct ioc *ioc = iocg->ioc;
 
        if (!ioc->enabled)
-               return false;
+               return;
 
        if (iocg->level == 0) {
                unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3017,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
                        iocg->last_stat.wait_us,
                        iocg->last_stat.indebt_us,
                        iocg->last_stat.indelay_us);
-       return true;
 }
 
 static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
index 2f33932e72e368124331e74ee87df5180b4b15a1..5b676c7cf2b634d038f3f0ba0f6f5bb6230802f4 100644 (file)
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
        return 0;
 }
 
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
 {
        struct latency_stat stat;
        int cpu;
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
                        (unsigned long long)stat.ps.missed,
                        (unsigned long long)stat.ps.total,
                        iolat->rq_depth.max_depth);
-       return true;
 }
 
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
 {
        struct iolatency_grp *iolat = pd_to_lat(pd);
        unsigned long long avg_lat;
        unsigned long long cur_win;
 
        if (!blkcg_debug_stats)
-               return false;
+               return;
 
        if (iolat->ssd)
                return iolatency_ssd_stat(iolat, s);
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
        else
                seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
                        iolat->rq_depth.max_depth, avg_lat, cur_win);
-       return true;
 }
 
 static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
index 237d60d8b585799916fcab8415e020796ed01cce..09b7e1200c0f40fb0d539ba889db9b8aa553b668 100644 (file)
 
 #include "blk.h"
 
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
+{
+       unsigned int discard_granularity = bdev_discard_granularity(bdev);
+       sector_t granularity_aligned_sector;
+
+       if (bdev_is_partition(bdev))
+               sector += bdev->bd_start_sect;
+
+       granularity_aligned_sector =
+               round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+       /*
+        * Make sure subsequent bios start aligned to the discard granularity if
+        * it needs to be split.
+        */
+       if (granularity_aligned_sector != sector)
+               return granularity_aligned_sector - sector;
+
+       /*
+        * Align the bio size to the discard granularity to make splitting the bio
+        * at discard granularity boundaries easier in the driver if needed.
+        */
+       return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
+}
+
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, int flags,
-               struct bio **biop)
+               sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
        struct bio *bio = *biop;
-       unsigned int op;
-       sector_t bs_mask, part_offset = 0;
+       sector_t bs_mask;
 
        if (bdev_read_only(bdev))
                return -EPERM;
-
-       if (flags & BLKDEV_DISCARD_SECURE) {
-               if (!blk_queue_secure_erase(q))
-                       return -EOPNOTSUPP;
-               op = REQ_OP_SECURE_ERASE;
-       } else {
-               if (!blk_queue_discard(q))
-                       return -EOPNOTSUPP;
-               op = REQ_OP_DISCARD;
-       }
+       if (!bdev_max_discard_sectors(bdev))
+               return -EOPNOTSUPP;
 
        /* In case the discard granularity isn't set by buggy device driver */
-       if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
+       if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
                char dev_name[BDEVNAME_SIZE];
 
                bdevname(bdev, dev_name);
@@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        if (!nr_sects)
                return -EINVAL;
 
-       /* In case the discard request is in a partition */
-       if (bdev_is_partition(bdev))
-               part_offset = bdev->bd_start_sect;
-
        while (nr_sects) {
-               sector_t granularity_aligned_lba, req_sects;
-               sector_t sector_mapped = sector + part_offset;
-
-               granularity_aligned_lba = round_up(sector_mapped,
-                               q->limits.discard_granularity >> SECTOR_SHIFT);
-
-               /*
-                * Check whether the discard bio starts at a discard_granularity
-                * aligned LBA,
-                * - If no: set (granularity_aligned_lba - sector_mapped) to
-                *   bi_size of the first split bio, then the second bio will
-                *   start at a discard_granularity aligned LBA on the device.
-                * - If yes: use bio_aligned_discard_max_sectors() as the max
-                *   possible bi_size of the first split bio. Then when this bio
-                *   is split in device drive, the split ones are very probably
-                *   to be aligned to discard_granularity of the device's queue.
-                */
-               if (granularity_aligned_lba == sector_mapped)
-                       req_sects = min_t(sector_t, nr_sects,
-                                         bio_aligned_discard_max_sectors(q));
-               else
-                       req_sects = min_t(sector_t, nr_sects,
-                                         granularity_aligned_lba - sector_mapped);
-
-               WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
+               sector_t req_sects =
+                       min(nr_sects, bio_discard_limit(bdev, sector));
 
-               bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
+               bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_iter.bi_size = req_sects << 9;
                sector += req_sects;
@@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
  * @sector:    start sector
  * @nr_sects:  number of sectors to discard
  * @gfp_mask:  memory allocation flags (for bio_alloc)
- * @flags:     BLKDEV_DISCARD_* flags to control behaviour
  *
  * Description:
  *    Issue a discard request for the sectors in question.
  */
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+               sector_t nr_sects, gfp_t gfp_mask)
 {
        struct bio *bio = NULL;
        struct blk_plug plug;
        int ret;
 
        blk_start_plug(&plug);
-       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
-                       &bio);
+       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
        if (!ret && bio) {
                ret = submit_bio_wait(bio);
                if (ret == -EOPNOTSUPP)
@@ -316,3 +301,42 @@ retry:
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp)
+{
+       sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+       unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
+       struct bio *bio = NULL;
+       struct blk_plug plug;
+       int ret = 0;
+
+       if (max_sectors == 0)
+               return -EOPNOTSUPP;
+       if ((sector | nr_sects) & bs_mask)
+               return -EINVAL;
+       if (bdev_read_only(bdev))
+               return -EPERM;
+
+       blk_start_plug(&plug);
+       for (;;) {
+               unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+               bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+               bio->bi_iter.bi_sector = sector;
+               bio->bi_iter.bi_size = len;
+
+               sector += len << SECTOR_SHIFT;
+               nr_sects -= len << SECTOR_SHIFT;
+               if (!nr_sects) {
+                       ret = submit_bio_wait(bio);
+                       bio_put(bio);
+                       break;
+               }
+               cond_resched();
+       }
+       blk_finish_plug(&plug);
+
+       return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_secure_erase);
index c7f71d83eff18924898fb75c41326f98065d7971..df8b066cd548913ca6455d81fee594d2feeed69c 100644 (file)
@@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
        nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
 
        ret = -ENOMEM;
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                goto out_bmd;
-       bio->bi_opf |= req_op(rq);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
 
        if (map_data) {
                nr_pages = 1 << map_data->page_order;
@@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 cleanup:
        if (!map_data)
                bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 out_bmd:
        kfree(bmd);
        return ret;
@@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
                gfp_t gfp_mask)
 {
        unsigned int max_sectors = queue_max_hw_sectors(rq->q);
+       unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
        struct bio *bio;
        int ret;
        int j;
@@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
        if (!iov_iter_count(iter))
                return -EINVAL;
 
-       bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
+       bio = bio_kmalloc(nr_vecs, gfp_mask);
        if (!bio)
                return -ENOMEM;
-       bio->bi_opf |= req_op(rq);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
 
        while (iov_iter_count(iter)) {
                struct page **pages;
@@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 
                npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
 
-               if (unlikely(offs & queue_dma_alignment(rq->q))) {
-                       ret = -EINVAL;
+               if (unlikely(offs & queue_dma_alignment(rq->q)))
                        j = 0;
-               else {
+               else {
                        for (j = 0; j < npages; j++) {
                                struct page *page = pages[j];
                                unsigned int n = PAGE_SIZE - offs;
@@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 
  out_unmap:
        bio_release_pages(bio, false);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return ret;
 }
 
@@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
 static void bio_map_kern_endio(struct bio *bio)
 {
        bio_invalidate_vmalloc_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 /**
@@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
        int offset, i;
        struct bio *bio;
 
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                return ERR_PTR(-ENOMEM);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
 
        if (is_vmalloc) {
                flush_kernel_vmap_range(data, len);
@@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
                if (bio_add_pc_page(q, bio, page, bytes,
                                    offset) < bytes) {
                        /* we don't support partial mappings */
-                       bio_put(bio);
+                       bio_uninit(bio);
+                       kfree(bio);
                        return ERR_PTR(-EINVAL);
                }
 
@@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
 static void bio_copy_kern_endio(struct bio *bio)
 {
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 static void bio_copy_kern_endio_read(struct bio *bio)
@@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
                return ERR_PTR(-EINVAL);
 
        nr_pages = end - start;
-       bio = bio_kmalloc(gfp_mask, nr_pages);
+       bio = bio_kmalloc(nr_pages, gfp_mask);
        if (!bio)
                return ERR_PTR(-ENOMEM);
+       bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
 
        while (len) {
                struct page *page;
@@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
 
 cleanup:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return ERR_PTR(-ENOMEM);
 }
 
@@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio)
 
                next_bio = bio;
                bio = bio->bi_next;
-               bio_put(next_bio);
+               bio_uninit(next_bio);
+               kfree(next_bio);
        }
 
        return ret;
@@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        bio->bi_opf |= req_op(rq);
 
        ret = blk_rq_append_bio(rq, bio);
-       if (unlikely(ret))
-               bio_put(bio);
+       if (unlikely(ret)) {
+               bio_uninit(bio);
+               kfree(bio);
+       }
        return ret;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
index aa0349e9f083b9495fd3ec17973e42636810146d..7e4136a60e1cc69d1fddf9fb6f9b8342c5d329f7 100644 (file)
@@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = {
        QUEUE_FLAG_NAME(FAIL_IO),
        QUEUE_FLAG_NAME(NONROT),
        QUEUE_FLAG_NAME(IO_STAT),
-       QUEUE_FLAG_NAME(DISCARD),
        QUEUE_FLAG_NAME(NOXMERGES),
        QUEUE_FLAG_NAME(ADD_RANDOM),
-       QUEUE_FLAG_NAME(SECERASE),
        QUEUE_FLAG_NAME(SAME_FORCE),
        QUEUE_FLAG_NAME(DEAD),
        QUEUE_FLAG_NAME(INIT_DONE),
index ed3ed86f7dd2426ed8d31ccaa6660f1189d352c3..ae116b7556482a43de920fcee24955b4fdb24bf9 100644 (file)
@@ -794,7 +794,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
 #endif
 
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
-                    !(req->rq_flags & RQF_QUIET))) {
+                    !(req->rq_flags & RQF_QUIET)) &&
+                    !test_bit(GD_DEAD, &req->q->disk->state)) {
                blk_print_req_error(req, error);
                trace_block_rq_error(req, error, nr_bytes);
        }
@@ -1082,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
 
        /*
-        * For a polled request, always complete locallly, it's pointless
+        * For a polled request, always complete locally, it's pointless
         * to redirect the completion.
         */
        if (rq->cmd_flags & REQ_POLLED)
@@ -1130,14 +1131,7 @@ void blk_mq_start_request(struct request *rq)
        trace_block_rq_issue(rq);
 
        if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-               u64 start_time;
-#ifdef CONFIG_BLK_CGROUP
-               if (rq->bio)
-                       start_time = bio_issue_time(&rq->bio->bi_issue);
-               else
-#endif
-                       start_time = ktime_get_ns();
-               rq->io_start_time_ns = start_time;
+               rq->io_start_time_ns = ktime_get_ns();
                rq->stats_sectors = blk_rq_sectors(rq);
                rq->rq_flags |= RQF_STATS;
                rq_qos_issue(q, rq);
@@ -1175,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
        complete(waiting);
 }
 
+/*
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+       if (plug->multiple_queues)
+               return BLK_MAX_REQUEST_COUNT * 2;
+       return BLK_MAX_REQUEST_COUNT;
+}
+
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+       struct request *last = rq_list_peek(&plug->mq_list);
+
+       if (!plug->rq_count) {
+               trace_block_plug(rq->q);
+       } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
+                  (!blk_queue_nomerges(rq->q) &&
+                   blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+               blk_mq_flush_plug_list(plug, false);
+               trace_block_plug(rq->q);
+       }
+
+       if (!plug->multiple_queues && last && last->q != rq->q)
+               plug->multiple_queues = true;
+       if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+               plug->has_elevator = true;
+       rq->rq_next = NULL;
+       rq_list_add(&plug->mq_list, rq);
+       plug->rq_count++;
+}
+
+static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
+               rq_end_io_fn *done, bool use_plug)
+{
+       WARN_ON(irqs_disabled());
+       WARN_ON(!blk_rq_is_passthrough(rq));
+
+       rq->end_io = done;
+
+       blk_account_io_start(rq);
+
+       if (use_plug && current->plug) {
+               blk_add_rq_to_plug(current->plug, rq);
+               return;
+       }
+       /*
+        * don't check dying flag for MQ because the request won't
+        * be reused after dying flag is set
+        */
+       blk_mq_sched_insert_request(rq, at_head, true, false);
+}
+
+
 /**
  * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
  * @rq:                request to insert
@@ -1190,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
  */
 void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
 {
-       WARN_ON(irqs_disabled());
-       WARN_ON(!blk_rq_is_passthrough(rq));
-
-       rq->end_io = done;
+       __blk_execute_rq_nowait(rq, at_head, done, true);
 
-       blk_account_io_start(rq);
-
-       /*
-        * don't check dying flag for MQ because the request won't
-        * be reused after dying flag is set
-        */
-       blk_mq_sched_insert_request(rq, at_head, true, false);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
@@ -1239,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
        DECLARE_COMPLETION_ONSTACK(wait);
        unsigned long hang_check;
 
+       /*
+        * iopoll requires request to be submitted to driver, so can't
+        * use plug
+        */
        rq->end_io_data = &wait;
-       blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
+       __blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
+                       !blk_rq_is_poll(rq));
 
        /* Prevent hang_check timer from firing at us during very long I/O */
        hang_check = sysctl_hung_task_timeout_secs;
@@ -2682,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                hctx->queue->mq_ops->commit_rqs(hctx);
 }
 
-/*
- * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
- * queues. This is important for md arrays to benefit from merging
- * requests.
- */
-static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
-{
-       if (plug->multiple_queues)
-               return BLK_MAX_REQUEST_COUNT * 2;
-       return BLK_MAX_REQUEST_COUNT;
-}
-
-static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
-{
-       struct request *last = rq_list_peek(&plug->mq_list);
-
-       if (!plug->rq_count) {
-               trace_block_plug(rq->q);
-       } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
-                  (!blk_queue_nomerges(rq->q) &&
-                   blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
-               blk_mq_flush_plug_list(plug, false);
-               trace_block_plug(rq->q);
-       }
-
-       if (!plug->multiple_queues && last && last->q != rq->q)
-               plug->multiple_queues = true;
-       if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
-               plug->has_elevator = true;
-       rq->rq_next = NULL;
-       rq_list_add(&plug->mq_list, rq);
-       plug->rq_count++;
-}
-
 static bool blk_mq_attempt_bio_merge(struct request_queue *q,
                                     struct bio *bio, unsigned int nr_segs)
 {
index b83df3d2eebcaac76dccdbc6427c9f172012f28c..6ccceb421ed2f7503b96c5e9cf58633400510d3f 100644 (file)
@@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->max_zone_append_sectors = 0;
        lim->max_discard_sectors = 0;
        lim->max_hw_discard_sectors = 0;
+       lim->max_secure_erase_sectors = 0;
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
@@ -176,6 +177,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
+/**
+ * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
+ * @q:  the request queue for the device
+ * @max_sectors: maximum number of sectors to secure_erase
+ **/
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+               unsigned int max_sectors)
+{
+       q->limits.max_secure_erase_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
+
 /**
  * blk_queue_max_write_zeroes_sectors - set max sectors for a single
  *                                      write zeroes
@@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
+static int queue_limit_alignment_offset(struct queue_limits *lim,
+               sector_t sector)
+{
+       unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+       unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
+               << SECTOR_SHIFT;
+
+       return (granularity + lim->alignment_offset - alignment) % granularity;
+}
+
+static unsigned int queue_limit_discard_alignment(struct queue_limits *lim,
+               sector_t sector)
+{
+       unsigned int alignment, granularity, offset;
+
+       if (!lim->max_discard_sectors)
+               return 0;
+
+       /* Why are these in bytes, not sectors? */
+       alignment = lim->discard_alignment >> SECTOR_SHIFT;
+       granularity = lim->discard_granularity >> SECTOR_SHIFT;
+       if (!granularity)
+               return 0;
+
+       /* Offset of the partition start in 'granularity' sectors */
+       offset = sector_div(sector, granularity);
+
+       /* And why do we do this modulus *again* in blkdev_issue_discard()? */
+       offset = (granularity + alignment - offset) % granularity;
+
+       /* Turn it back into bytes, gaah */
+       return offset << SECTOR_SHIFT;
+}
+
 static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
 {
        sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
@@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
                        t->discard_granularity;
        }
-
+       t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
+                                                  b->max_secure_erase_sectors);
        t->zone_write_granularity = max(t->zone_write_granularity,
                                        b->zone_write_granularity);
        t->zoned = max(t->zoned, b->zoned);
@@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
        }
 }
 EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
+
+int bdev_alignment_offset(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (q->limits.misaligned)
+               return -1;
+       if (bdev_is_partition(bdev))
+               return queue_limit_alignment_offset(&q->limits,
+                               bdev->bd_start_sect);
+       return q->limits.alignment_offset;
+}
+EXPORT_SYMBOL_GPL(bdev_alignment_offset);
+
+unsigned int bdev_discard_alignment(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (bdev_is_partition(bdev))
+               return queue_limit_discard_alignment(&q->limits,
+                               bdev->bd_start_sect);
+       return q->limits.discard_alignment;
+}
+EXPORT_SYMBOL_GPL(bdev_discard_alignment);
index 469c483719bea8309a92a14a6c2c39cb9cc5bd15..139b2d7a99e2fae6a4d40fd1e23783d096064330 100644 (file)
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
                break;                                                  \
        if ((__tg)) {                                                   \
                blk_add_cgroup_trace_msg(__td->queue,                   \
-                       tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
+                       &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
        } else {                                                        \
                blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);  \
        }                                                               \
@@ -2189,13 +2189,14 @@ again:
        }
 
 out_unlock:
-       spin_unlock_irq(&q->queue_lock);
        bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
                bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
 #endif
+       spin_unlock_irq(&q->queue_lock);
+
        rcu_read_unlock();
        return throttled;
 }
index 8ccbc6e076369b753f21f56b62f8baf4e4e4ef61..434017701403fb699668ec4db2a6a467b2b6ae9f 100644 (file)
@@ -346,20 +346,6 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
        return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
 }
 
-/*
- * The max bio size which is aligned to q->limits.discard_granularity. This
- * is a hint to split large discard bio in generic block layer, then if device
- * driver needs to split the discard bio into smaller ones, their bi_size can
- * be very probably and easily aligned to discard_granularity of the device's
- * queue.
- */
-static inline unsigned int bio_aligned_discard_max_sectors(
-                                       struct request_queue *q)
-{
-       return round_down(UINT_MAX, q->limits.discard_granularity) >>
-                       SECTOR_SHIFT;
-}
-
 /*
  * Internal io_context interface
  */
@@ -450,13 +436,6 @@ extern struct device_attribute dev_attr_events;
 extern struct device_attribute dev_attr_events_async;
 extern struct device_attribute dev_attr_events_poll_msecs;
 
-static inline void bio_clear_polled(struct bio *bio)
-{
-       /* can't support alloc cache if we turn off polling */
-       bio_clear_flag(bio, BIO_PERCPU_CACHE);
-       bio->bi_opf &= ~REQ_POLLED;
-}
-
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
index 467be46d0e65620c08067579ff4fd30f1ce666a6..8f7b6fe3b4db5f621e5efce566ab3e9697b44fd9 100644 (file)
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
                goto err_put;
 
        bio_clone_blkg_association(bio, bio_src);
-       blkcg_bio_issue_init(bio);
 
        return bio;
 
index 9f2ecec406b04e51f305e997c090176db70ce90b..b9b83030e0dfa4a77ef9df1f05bcb07737ef0aa2 100644 (file)
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 
 #define DIO_INLINE_BIO_VECS 4
 
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
-       struct task_struct *waiter = bio->bi_private;
-
-       WRITE_ONCE(bio->bi_private, NULL);
-       blk_wake_io_task(waiter);
-}
-
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
                struct iov_iter *iter, unsigned int nr_pages)
 {
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
                bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
        }
        bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
-       bio.bi_private = current;
-       bio.bi_end_io = blkdev_bio_end_io_simple;
        bio.bi_ioprio = iocb->ki_ioprio;
 
        ret = bio_iov_iter_get_pages(&bio, iter);
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 
        if (iocb->ki_flags & IOCB_NOWAIT)
                bio.bi_opf |= REQ_NOWAIT;
-       if (iocb->ki_flags & IOCB_HIPRI)
-               bio_set_polled(&bio, iocb);
 
-       submit_bio(&bio);
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (!READ_ONCE(bio.bi_private))
-                       break;
-               if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
-                       blk_io_schedule();
-       }
-       __set_current_state(TASK_RUNNING);
+       submit_bio_wait(&bio);
 
        bio_release_pages(&bio, should_dirty);
        if (unlikely(bio.bi_status))
@@ -197,8 +177,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
 
-       bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
-
+       if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+               opf |= REQ_ALLOC_CACHE;
+       bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+                              &blkdev_dio_pool);
        dio = container_of(bio, struct blkdev_dio, bio);
        atomic_set(&dio->ref, 1);
        /*
@@ -320,7 +302,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
 
-       bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
+       if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+               opf |= REQ_ALLOC_CACHE;
+       bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+                              &blkdev_dio_pool);
        dio = container_of(bio, struct blkdev_dio, bio);
        dio->flags = 0;
        dio->iocb = iocb;
@@ -672,7 +657,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
                break;
        case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
                error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
-                                            len >> SECTOR_SHIFT, GFP_KERNEL, 0);
+                                            len >> SECTOR_SHIFT, GFP_KERNEL);
                break;
        default:
                error = -EOPNOTSUPP;
index b8b6759d670f01dc3584b8c4fda150a0c03579f1..36532b93184191e08f5ab8048ecb6ce99cfc33ee 100644 (file)
@@ -1010,7 +1010,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 {
        struct gendisk *disk = dev_to_disk(dev);
 
-       return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+       return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
 }
 
 static ssize_t disk_discard_alignment_show(struct device *dev,
@@ -1019,7 +1019,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
        struct gendisk *disk = dev_to_disk(dev);
 
-       return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
+       return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
 }
 
 static ssize_t diskseq_show(struct device *dev,
index 4a86340133e46b2465ed0de12fbf81b836ee81c3..46949f1b0dba50b99ca5924a34b2b891ada2233d 100644 (file)
@@ -83,18 +83,17 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 #endif
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
-               unsigned long arg, unsigned long flags)
+               unsigned long arg)
 {
        uint64_t range[2];
        uint64_t start, len;
-       struct request_queue *q = bdev_get_queue(bdev);
        struct inode *inode = bdev->bd_inode;
        int err;
 
        if (!(mode & FMODE_WRITE))
                return -EBADF;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
@@ -115,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
        err = truncate_bdev_range(bdev, mode, start, start + len - 1);
        if (err)
                goto fail;
-
-       err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
-                                  GFP_KERNEL, flags);
-
+       err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
 fail:
        filemap_invalidate_unlock(inode->i_mapping);
        return err;
 }
 
+static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+               void __user *argp)
+{
+       uint64_t start, len;
+       uint64_t range[2];
+       int err;
+
+       if (!(mode & FMODE_WRITE))
+               return -EBADF;
+       if (!bdev_max_secure_erase_sectors(bdev))
+               return -EOPNOTSUPP;
+       if (copy_from_user(range, argp, sizeof(range)))
+               return -EFAULT;
+
+       start = range[0];
+       len = range[1];
+       if ((start & 511) || (len & 511))
+               return -EINVAL;
+       if (start + len > bdev_nr_bytes(bdev))
+               return -EINVAL;
+
+       filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+       err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+       if (!err)
+               err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+                                               GFP_KERNEL);
+       filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+       return err;
+}
+
+
 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
                unsigned long arg)
 {
@@ -451,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
        case BLKROSET:
                return blkdev_roset(bdev, mode, cmd, arg);
        case BLKDISCARD:
-               return blk_ioctl_discard(bdev, mode, arg, 0);
+               return blk_ioctl_discard(bdev, mode, arg);
        case BLKSECDISCARD:
-               return blk_ioctl_discard(bdev, mode, arg,
-                               BLKDEV_DISCARD_SECURE);
+               return blk_ioctl_secure_erase(bdev, mode, argp);
        case BLKZEROOUT:
                return blk_ioctl_zeroout(bdev, mode, arg);
        case BLKGETDISKSEQ:
@@ -489,7 +515,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
                                    queue_max_sectors(bdev_get_queue(bdev)));
                return put_ushort(argp, max_sectors);
        case BLKROTATIONAL:
-               return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev)));
+               return put_ushort(argp, !bdev_nonrot(bdev));
        case BLKRASET:
        case BLKFRASET:
                if(!capable(CAP_SYS_ADMIN))
@@ -629,7 +655,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                return compat_put_long(argp,
                        (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
        case BLKGETSIZE:
-               if (bdev_nr_sectors(bdev) > ~0UL)
+               if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0)
                        return -EFBIG;
                return compat_put_ulong(argp, bdev_nr_sectors(bdev));
 
index 3ed5eaf3446a2791f18df91820827409a3e28fe7..6ed602b2f80a5904892717bd2bbe19203045260a 100644 (file)
@@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
        if (at_head) {
                list_add(&rq->queuelist, &per_prio->dispatch);
+               rq->fifo_time = jiffies;
        } else {
                deadline_add_rq_rb(per_prio, rq);
 
index 2c381c694c574c0b6875f52ee40eef6ab782e389..d2fc122d74262de1e9bd29cbe6d96e57c239fbec 100644 (file)
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
 #ifdef CONFIG_ACORN_PARTITION_RISCIX
                case PARTITION_RISCIX_SCSI:
                case PARTITION_RISCIX_MFM:
-                       slot = riscix_partition(state, start_sect, slot,
+                       riscix_partition(state, start_sect, slot,
                                                nr_sects);
                        break;
 #endif
 
                case PARTITION_LINUX:
-                       slot = linux_partition(state, start_sect, slot,
+                       linux_partition(state, start_sect, slot,
                                               nr_sects);
                        break;
                }
index da59941754163dcaa72796ded13cb21e942a3f13..9655c728262a4d5a639b38a891790c491bc08db6 100644 (file)
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
                                /* accept only GEM,BGM,RAW,LNX,SWP partitions */
                                if (!((pi->flg & 1) && OK_id(pi->id)))
                                        continue;
-                               part_fmt = 2;
                                put_partition (state, slot,
                                                be32_to_cpu(pi->st),
                                                be32_to_cpu(pi->siz));
index 2ef8dfa1e5c85f35d4310a2cf7c7a441bf7225ee..8a0ec929023bcd2ded0579194d99a1044bc7cb6d 100644 (file)
@@ -200,21 +200,13 @@ static ssize_t part_ro_show(struct device *dev,
 static ssize_t part_alignment_offset_show(struct device *dev,
                                          struct device_attribute *attr, char *buf)
 {
-       struct block_device *bdev = dev_to_bdev(dev);
-
-       return sprintf(buf, "%u\n",
-               queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits,
-                               bdev->bd_start_sect));
+       return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
 }
 
 static ssize_t part_discard_alignment_show(struct device *dev,
                                           struct device_attribute *attr, char *buf)
 {
-       struct block_device *bdev = dev_to_bdev(dev);
-
-       return sprintf(buf, "%u\n",
-               queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits,
-                               bdev->bd_start_sect));
+       return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
 }
 
 static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
@@ -486,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
                goto out_unlock;
 
        ret = -EBUSY;
-       if (part->bd_openers)
+       if (atomic_read(&part->bd_openers))
                goto out_unlock;
 
        delete_partition(part);
index 27f6c7d9c776de7ae53f85b962aa327b7f3f5e75..38e58960ae036a4e4df2fdaefca2c7842340db42 100644 (file)
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
                len = r_cols;
        } else {
                r_stripe = 0;
-               r_cols   = 0;
                len = r_parent;
        }
        if (len < 0)
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
                r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
                r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
                len = r_id2;
-       } else {
-               r_id1 = 0;
-               r_id2 = 0;
+       } else
                len = r_diskid;
-       }
        if (len < 0)
                return false;
 
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
                r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
                r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
                len = r_id2;
-       } else {
-               r_id1 = 0;
-               r_id2 = 0;
+       } else
                len = r_name;
-       }
        if (len < 0)
                return false;
 
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
                        return false;
                }
                len = r_index;
-       } else {
-               r_index = 0;
+       } else
                len = r_diskid;
-       }
        if (len < 0) {
                ldm_error("len %d < 0", len);
                return false;
index 32b20efff5f83c9fab837057872002a20bed841a..eb95e188d62bc27a764c3adf813f5a990b93dff4 100644 (file)
@@ -96,11 +96,6 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
          DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
          DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
         (void *)1},
-       /* T40 can not handle C3 idle state */
-       { set_max_cstate, "IBM ThinkPad T40", {
-         DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
-         DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")},
-        (void *)2},
        {},
 };
 
@@ -570,8 +565,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 {
        struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
 
-       if (cx->type == ACPI_STATE_C3)
-               ACPI_FLUSH_CPU_CACHE();
+       ACPI_FLUSH_CPU_CACHE();
 
        while (1) {
 
@@ -796,7 +790,8 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
                if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ||
                    cx->type == ACPI_STATE_C3) {
                        state->enter_dead = acpi_idle_play_dead;
-                       drv->safe_state_index = count;
+                       if (cx->type != ACPI_STATE_C3)
+                               drv->safe_state_index = count;
                }
                /*
                 * Halt-induced C1 is not good for ->enter_s2idle, because it
index 9efbfe087de761264f8f0fc8743f9182a77d5cc6..762b61f67e6c6db32ff1edcad58bec823e0aa4ff 100644 (file)
@@ -588,19 +588,6 @@ static struct acpi_device *handle_to_device(acpi_handle handle,
        return adev;
 }
 
-int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device)
-{
-       if (!device)
-               return -EINVAL;
-
-       *device = handle_to_device(handle, NULL);
-       if (!*device)
-               return -ENODEV;
-
-       return 0;
-}
-EXPORT_SYMBOL(acpi_bus_get_device);
-
 /**
  * acpi_fetch_acpi_dev - Retrieve ACPI device object.
  * @handle: ACPI handle associated with the requested ACPI device object.
index 8351c5638880bb21e1281f6c6a42846b6a169c6d..f3b639e89dd884a4e875f55c92ee0394ac972f4b 100644 (file)
@@ -2295,6 +2295,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
 {
        int ret = 0;
        struct binder_sg_copy *sgc, *tmpsgc;
+       struct binder_ptr_fixup *tmppf;
        struct binder_ptr_fixup *pf =
                list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
                                         node);
@@ -2349,7 +2350,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
                list_del(&sgc->node);
                kfree(sgc);
        }
-       BUG_ON(!list_empty(pf_head));
+       list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+               BUG_ON(pf->skip_size == 0);
+               list_del(&pf->node);
+               kfree(pf);
+       }
        BUG_ON(!list_empty(sgc_head));
 
        return ret > 0 ? -EINVAL : ret;
@@ -2486,6 +2491,9 @@ static int binder_translate_fd_array(struct list_head *pf_head,
        struct binder_proc *proc = thread->proc;
        int ret;
 
+       if (fda->num_fds == 0)
+               return 0;
+
        fd_buf_size = sizeof(u32) * fda->num_fds;
        if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
                binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n",
index e5641e6c52ee27dab1b8ffa3a89a2f1afe4f6713..bb45a9c00514475b994813155045880c6ac219af 100644 (file)
@@ -115,14 +115,16 @@ config SATA_AHCI
 
          If unsure, say N.
 
-config SATA_LPM_POLICY
+config SATA_MOBILE_LPM_POLICY
        int "Default SATA Link Power Management policy for low power chipsets"
        range 0 4
        default 0
        depends on SATA_AHCI
        help
          Select the Default SATA Link Power Management (LPM) policy to use
-         for chipsets / "South Bridges" designated as supporting low power.
+         for chipsets / "South Bridges" supporting low-power modes. Such
+         chipsets are typically found on most laptops but desktops and
+         servers now also widely use chipsets supporting low power modes.
 
          The value set has the following meanings:
                0 => Keep firmware settings
index 84456c05e84525c6b301689a7cd39c0403dda0c7..c1eca72b4575df73f32dc24ddc23d460cf5de581 100644 (file)
@@ -324,7 +324,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
        { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
        { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
-       { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG/Lewisburg RAID*/
        { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
        { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
        { PCI_VDEVICE(INTEL, 0x1e03), board_ahci_low_power }, /* Panther M AHCI */
@@ -367,7 +366,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
        { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg/Lewisburg AHCI*/
-       { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg/Lewisburg RAID*/
+       { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* *burg SATA0 'RAID' */
+       { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* *burg SATA1 'RAID' */
+       { PCI_VDEVICE(INTEL, 0x282f), board_ahci }, /* *burg SATA2 'RAID' */
        { PCI_VDEVICE(INTEL, 0x43d4), board_ahci }, /* Rocket Lake PCH-H RAID */
        { PCI_VDEVICE(INTEL, 0x43d5), board_ahci }, /* Rocket Lake PCH-H RAID */
        { PCI_VDEVICE(INTEL, 0x43d6), board_ahci }, /* Rocket Lake PCH-H RAID */
@@ -1595,7 +1596,7 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
 static void ahci_update_initial_lpm_policy(struct ata_port *ap,
                                           struct ahci_host_priv *hpriv)
 {
-       int policy = CONFIG_SATA_LPM_POLICY;
+       int policy = CONFIG_SATA_MOBILE_LPM_POLICY;
 
 
        /* Ignore processing for chipsets that don't use policy */
index 6ead58c1b6e5293a805b041bdf56df0be42a7f5a..ad11a4c52fbeb9120010b872799c97c0fceb26e3 100644 (file)
@@ -236,7 +236,7 @@ enum {
        AHCI_HFLAG_NO_WRITE_TO_RO       = (1 << 24), /* don't write to read
                                                        only registers */
        AHCI_HFLAG_USE_LPM_POLICY       = (1 << 25), /* chipset that should use
-                                                       SATA_LPM_POLICY
+                                                       SATA_MOBILE_LPM_POLICY
                                                        as default lpm_policy */
        AHCI_HFLAG_SUSPEND_PHYS         = (1 << 26), /* handle PHYs during
                                                        suspend/resume */
index ab8552b1ff2a14ad548ba066b93bf3b0bc0f7d83..f61795c546cf128deb6dbb84dedcf4e7ab896a09 100644 (file)
@@ -549,15 +549,10 @@ static int brcm_ahci_remove(struct platform_device *pdev)
        struct ata_host *host = dev_get_drvdata(&pdev->dev);
        struct ahci_host_priv *hpriv = host->private_data;
        struct brcm_ahci_priv *priv = hpriv->plat_data;
-       int ret;
 
        brcm_sata_phys_disable(priv);
 
-       ret = ata_platform_remove_one(pdev);
-       if (ret)
-               return ret;
-
-       return 0;
+       return ata_platform_remove_one(pdev);
 }
 
 static void brcm_ahci_shutdown(struct platform_device *pdev)
index cceedde5112690f1cbaa873418d3463078f0ae08..40e816419f48c75d57930abccc015948c1c2551b 100644 (file)
@@ -96,7 +96,8 @@ struct ata_force_param {
        unsigned long   xfer_mask;
        unsigned int    horkage_on;
        unsigned int    horkage_off;
-       u16             lflags;
+       u16             lflags_on;
+       u16             lflags_off;
 };
 
 struct ata_force_ent {
@@ -386,11 +387,17 @@ static void ata_force_link_limits(struct ata_link *link)
                }
 
                /* let lflags stack */
-               if (fe->param.lflags) {
-                       link->flags |= fe->param.lflags;
+               if (fe->param.lflags_on) {
+                       link->flags |= fe->param.lflags_on;
                        ata_link_notice(link,
                                        "FORCE: link flag 0x%x forced -> 0x%x\n",
-                                       fe->param.lflags, link->flags);
+                                       fe->param.lflags_on, link->flags);
+               }
+               if (fe->param.lflags_off) {
+                       link->flags &= ~fe->param.lflags_off;
+                       ata_link_notice(link,
+                               "FORCE: link flag 0x%x cleared -> 0x%x\n",
+                               fe->param.lflags_off, link->flags);
                }
        }
 }
@@ -898,7 +905,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
  *     RETURNS:
  *     Matching xfer_shift, -1 if no match found.
  */
-int ata_xfer_mode2shift(unsigned long xfer_mode)
+int ata_xfer_mode2shift(u8 xfer_mode)
 {
        const struct ata_xfer_ent *ent;
 
@@ -1398,7 +1405,7 @@ unsigned long ata_id_xfermask(const u16 *id)
 
                /* But wait.. there's more. Design your standards by
                 * committee and you too can get a free iordy field to
-                * process. However its the speeds not the modes that
+                * process. However it is the speeds not the modes that
                 * are supported... Note drivers using the timing API
                 * will get this right anyway
                 */
@@ -3898,7 +3905,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* Devices where NCQ should be avoided */
        /* NCQ is slow */
        { "WDC WD740ADFD-00",   NULL,           ATA_HORKAGE_NONCQ },
-       { "WDC WD740ADFD-00NLR1", NULL,         ATA_HORKAGE_NONCQ, },
+       { "WDC WD740ADFD-00NLR1", NULL,         ATA_HORKAGE_NONCQ },
        /* http://thread.gmane.org/gmane.linux.ide/14907 */
        { "FUJITSU MHT2060BH",  NULL,           ATA_HORKAGE_NONCQ },
        /* NCQ is broken */
@@ -3924,23 +3931,23 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* drives which fail FPDMA_AA activation (some may freeze afterwards)
           the ST disks also have LPM issues */
        { "ST1000LM024 HN-M101MBB", NULL,       ATA_HORKAGE_BROKEN_FPDMA_AA |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        { "VB0250EAVER",        "HPG7",         ATA_HORKAGE_BROKEN_FPDMA_AA },
 
        /* Blacklist entries taken from Silicon Image 3124/3132
           Windows driver .inf file - also several Linux problem reports */
-       { "HTS541060G9SA00",    "MB3OC60D",     ATA_HORKAGE_NONCQ, },
-       { "HTS541080G9SA00",    "MB4OC60D",     ATA_HORKAGE_NONCQ, },
-       { "HTS541010G9SA00",    "MBZOC60D",     ATA_HORKAGE_NONCQ, },
+       { "HTS541060G9SA00",    "MB3OC60D",     ATA_HORKAGE_NONCQ },
+       { "HTS541080G9SA00",    "MB4OC60D",     ATA_HORKAGE_NONCQ },
+       { "HTS541010G9SA00",    "MBZOC60D",     ATA_HORKAGE_NONCQ },
 
        /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
-       { "C300-CTFDDAC128MAG", "0001",         ATA_HORKAGE_NONCQ, },
+       { "C300-CTFDDAC128MAG", "0001",         ATA_HORKAGE_NONCQ },
 
        /* Sandisk SD7/8/9s lock up hard on large trims */
-       { "SanDisk SD[789]*",   NULL,           ATA_HORKAGE_MAX_TRIM_128M, },
+       { "SanDisk SD[789]*",   NULL,           ATA_HORKAGE_MAX_TRIM_128M },
 
        /* devices which puke on READ_NATIVE_MAX */
-       { "HDS724040KLSA80",    "KFAOA20N",     ATA_HORKAGE_BROKEN_HPA, },
+       { "HDS724040KLSA80",    "KFAOA20N",     ATA_HORKAGE_BROKEN_HPA },
        { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },
        { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
        { "MAXTOR 6L080L4",     "A93.0500",     ATA_HORKAGE_BROKEN_HPA },
@@ -3949,22 +3956,22 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "OCZ-VERTEX",             "1.30",     ATA_HORKAGE_BROKEN_HPA },
 
        /* Devices which report 1 sector over size HPA */
-       { "ST340823A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
-       { "ST320413A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
-       { "ST310211A",          NULL,           ATA_HORKAGE_HPA_SIZE, },
+       { "ST340823A",          NULL,           ATA_HORKAGE_HPA_SIZE },
+       { "ST320413A",          NULL,           ATA_HORKAGE_HPA_SIZE },
+       { "ST310211A",          NULL,           ATA_HORKAGE_HPA_SIZE },
 
        /* Devices which get the IVB wrong */
-       { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB, },
+       { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB },
        /* Maybe we should just blacklist TSSTcorp... */
-       { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]",  ATA_HORKAGE_IVB, },
+       { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]",  ATA_HORKAGE_IVB },
 
        /* Devices that do not need bridging limits applied */
-       { "MTRON MSP-SATA*",            NULL,   ATA_HORKAGE_BRIDGE_OK, },
-       { "BUFFALO HD-QSU2/R5",         NULL,   ATA_HORKAGE_BRIDGE_OK, },
+       { "MTRON MSP-SATA*",            NULL,   ATA_HORKAGE_BRIDGE_OK },
+       { "BUFFALO HD-QSU2/R5",         NULL,   ATA_HORKAGE_BRIDGE_OK },
 
        /* Devices which aren't very happy with higher link speeds */
-       { "WD My Book",                 NULL,   ATA_HORKAGE_1_5_GBPS, },
-       { "Seagate FreeAgent GoFlex",   NULL,   ATA_HORKAGE_1_5_GBPS, },
+       { "WD My Book",                 NULL,   ATA_HORKAGE_1_5_GBPS },
+       { "Seagate FreeAgent GoFlex",   NULL,   ATA_HORKAGE_1_5_GBPS },
 
        /*
         * Devices which choke on SETXFER.  Applies only if both the
@@ -3982,54 +3989,57 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
        { "Crucial_CT512MX100*",        "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        /* 512GB MX100 with newer firmware has only LPM issues */
        { "Crucial_CT512MX100*",        NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
 
        /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */
        { "Crucial_CT480M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
        { "Crucial_CT960M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NOLPM, },
+                                               ATA_HORKAGE_NOLPM },
 
        /* These specific Samsung models/firmware-revs do not handle LPM well */
-       { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM, },
-       { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, },
+       { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM },
+       { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM },
+       { "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM },
+       { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM },
 
        /* devices that don't properly handle queued TRIM commands */
        { "Micron_M500IT_*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M5[15]0_*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Samsung SSD 840 EVO*",       NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_NO_DMA_LOG |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 840*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 850*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Samsung SSD 860*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NO_NCQ_ON_ATI, },
+                                               ATA_HORKAGE_NO_NCQ_ON_ATI },
        { "Samsung SSD 870*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM |
-                                               ATA_HORKAGE_NO_NCQ_ON_ATI, },
+                                               ATA_HORKAGE_NO_NCQ_ON_ATI },
        { "FCCT*M500*",                 NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
 
        /* devices that don't properly handle TRIM commands */
-       { "SuperSSpeed S238*",          NULL,   ATA_HORKAGE_NOTRIM, },
-       { "M88V29*",                    NULL,   ATA_HORKAGE_NOTRIM, },
+       { "SuperSSpeed S238*",          NULL,   ATA_HORKAGE_NOTRIM },
+       { "M88V29*",                    NULL,   ATA_HORKAGE_NOTRIM },
 
        /*
         * As defined, the DRAT (Deterministic Read After Trim) and RZAT
@@ -4047,16 +4057,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         * The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude
         * that model before whitelisting all other intel SSDs.
         */
-       { "INTEL*SSDSC2MH*",            NULL,   0, },
+       { "INTEL*SSDSC2MH*",            NULL,   0 },
 
-       { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SAMSUNG*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "SAMSUNG*MZ7KM*",             NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "ST[1248][0248]0[FH]*",       NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SAMSUNG*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "SAMSUNG*MZ7KM*",             NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "ST[1248][0248]0[FH]*",       NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM },
 
        /*
         * Some WD SATA-I drives spin up and down erratically when the link
@@ -4563,42 +4573,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 #endif /* __BIG_ENDIAN */
 }
 
-/**
- *     ata_qc_new_init - Request an available ATA command, and initialize it
- *     @dev: Device from whom we request an available command structure
- *     @tag: tag
- *
- *     LOCKING:
- *     None.
- */
-
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
-{
-       struct ata_port *ap = dev->link->ap;
-       struct ata_queued_cmd *qc;
-
-       /* no command while frozen */
-       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
-               return NULL;
-
-       /* libsas case */
-       if (ap->flags & ATA_FLAG_SAS_HOST) {
-               tag = ata_sas_allocate_tag(ap);
-               if (tag < 0)
-                       return NULL;
-       }
-
-       qc = __ata_qc_from_tag(ap, tag);
-       qc->tag = qc->hw_tag = tag;
-       qc->scsicmd = NULL;
-       qc->ap = ap;
-       qc->dev = dev;
-
-       ata_qc_reinit(qc);
-
-       return qc;
-}
-
 /**
  *     ata_qc_free - free unused ata_queued_cmd
  *     @qc: Command to complete
@@ -4611,19 +4585,9 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
  */
 void ata_qc_free(struct ata_queued_cmd *qc)
 {
-       struct ata_port *ap;
-       unsigned int tag;
-
-       WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
-       ap = qc->ap;
-
        qc->flags = 0;
-       tag = qc->tag;
-       if (ata_tag_valid(tag)) {
+       if (ata_tag_valid(qc->tag))
                qc->tag = ATA_TAG_POISON;
-               if (ap->flags & ATA_FLAG_SAS_HOST)
-                       ata_sas_free_tag(tag, ap);
-       }
 }
 
 void __ata_qc_complete(struct ata_queued_cmd *qc)
@@ -5602,7 +5566,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops)
  *     Start and then freeze ports of @host.  Started status is
  *     recorded in host->flags, so this function can be called
  *     multiple times.  Ports are guaranteed to get started only
- *     once.  If host->ops isn't initialized yet, its set to the
+ *     once.  If host->ops is not initialized yet, it is set to the
  *     first non-dummy port ops.
  *
  *     LOCKING:
@@ -6143,67 +6107,113 @@ int ata_platform_remove_one(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(ata_platform_remove_one);
 
 #ifdef CONFIG_ATA_FORCE
+
+#define force_cbl(name, flag)                          \
+       { #name,        .cbl            = (flag) }
+
+#define force_spd_limit(spd, val)                      \
+       { #spd, .spd_limit              = (val) }
+
+#define force_xfer(mode, shift)                                \
+       { #mode,        .xfer_mask      = (1UL << (shift)) }
+
+#define force_lflag_on(name, flags)                    \
+       { #name,        .lflags_on      = (flags) }
+
+#define force_lflag_onoff(name, flags)                 \
+       { "no" #name,   .lflags_on      = (flags) },    \
+       { #name,        .lflags_off     = (flags) }
+
+#define force_horkage_on(name, flag)                   \
+       { #name,        .horkage_on     = (flag) }
+
+#define force_horkage_onoff(name, flag)                        \
+       { "no" #name,   .horkage_on     = (flag) },     \
+       { #name,        .horkage_off    = (flag) }
+
+static const struct ata_force_param force_tbl[] __initconst = {
+       force_cbl(40c,                  ATA_CBL_PATA40),
+       force_cbl(80c,                  ATA_CBL_PATA80),
+       force_cbl(short40c,             ATA_CBL_PATA40_SHORT),
+       force_cbl(unk,                  ATA_CBL_PATA_UNK),
+       force_cbl(ign,                  ATA_CBL_PATA_IGN),
+       force_cbl(sata,                 ATA_CBL_SATA),
+
+       force_spd_limit(1.5Gbps,        1),
+       force_spd_limit(3.0Gbps,        2),
+
+       force_xfer(pio0,                ATA_SHIFT_PIO + 0),
+       force_xfer(pio1,                ATA_SHIFT_PIO + 1),
+       force_xfer(pio2,                ATA_SHIFT_PIO + 2),
+       force_xfer(pio3,                ATA_SHIFT_PIO + 3),
+       force_xfer(pio4,                ATA_SHIFT_PIO + 4),
+       force_xfer(pio5,                ATA_SHIFT_PIO + 5),
+       force_xfer(pio6,                ATA_SHIFT_PIO + 6),
+       force_xfer(mwdma0,              ATA_SHIFT_MWDMA + 0),
+       force_xfer(mwdma1,              ATA_SHIFT_MWDMA + 1),
+       force_xfer(mwdma2,              ATA_SHIFT_MWDMA + 2),
+       force_xfer(mwdma3,              ATA_SHIFT_MWDMA + 3),
+       force_xfer(mwdma4,              ATA_SHIFT_MWDMA + 4),
+       force_xfer(udma0,               ATA_SHIFT_UDMA + 0),
+       force_xfer(udma16,              ATA_SHIFT_UDMA + 0),
+       force_xfer(udma/16,             ATA_SHIFT_UDMA + 0),
+       force_xfer(udma1,               ATA_SHIFT_UDMA + 1),
+       force_xfer(udma25,              ATA_SHIFT_UDMA + 1),
+       force_xfer(udma/25,             ATA_SHIFT_UDMA + 1),
+       force_xfer(udma2,               ATA_SHIFT_UDMA + 2),
+       force_xfer(udma33,              ATA_SHIFT_UDMA + 2),
+       force_xfer(udma/33,             ATA_SHIFT_UDMA + 2),
+       force_xfer(udma3,               ATA_SHIFT_UDMA + 3),
+       force_xfer(udma44,              ATA_SHIFT_UDMA + 3),
+       force_xfer(udma/44,             ATA_SHIFT_UDMA + 3),
+       force_xfer(udma4,               ATA_SHIFT_UDMA + 4),
+       force_xfer(udma66,              ATA_SHIFT_UDMA + 4),
+       force_xfer(udma/66,             ATA_SHIFT_UDMA + 4),
+       force_xfer(udma5,               ATA_SHIFT_UDMA + 5),
+       force_xfer(udma100,             ATA_SHIFT_UDMA + 5),
+       force_xfer(udma/100,            ATA_SHIFT_UDMA + 5),
+       force_xfer(udma6,               ATA_SHIFT_UDMA + 6),
+       force_xfer(udma133,             ATA_SHIFT_UDMA + 6),
+       force_xfer(udma/133,            ATA_SHIFT_UDMA + 6),
+       force_xfer(udma7,               ATA_SHIFT_UDMA + 7),
+
+       force_lflag_on(nohrst,          ATA_LFLAG_NO_HRST),
+       force_lflag_on(nosrst,          ATA_LFLAG_NO_SRST),
+       force_lflag_on(norst,           ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST),
+       force_lflag_on(rstonce,         ATA_LFLAG_RST_ONCE),
+       force_lflag_onoff(dbdelay,      ATA_LFLAG_NO_DEBOUNCE_DELAY),
+
+       force_horkage_onoff(ncq,        ATA_HORKAGE_NONCQ),
+       force_horkage_onoff(ncqtrim,    ATA_HORKAGE_NO_NCQ_TRIM),
+       force_horkage_onoff(ncqati,     ATA_HORKAGE_NO_NCQ_ON_ATI),
+
+       force_horkage_onoff(trim,       ATA_HORKAGE_NOTRIM),
+       force_horkage_on(trim_zero,     ATA_HORKAGE_ZERO_AFTER_TRIM),
+       force_horkage_on(max_trim_128m, ATA_HORKAGE_MAX_TRIM_128M),
+
+       force_horkage_onoff(dma,        ATA_HORKAGE_NODMA),
+       force_horkage_on(atapi_dmadir,  ATA_HORKAGE_ATAPI_DMADIR),
+       force_horkage_on(atapi_mod16_dma, ATA_HORKAGE_ATAPI_MOD16_DMA),
+
+       force_horkage_onoff(dmalog,     ATA_HORKAGE_NO_DMA_LOG),
+       force_horkage_onoff(iddevlog,   ATA_HORKAGE_NO_ID_DEV_LOG),
+       force_horkage_onoff(logdir,     ATA_HORKAGE_NO_LOG_DIR),
+
+       force_horkage_on(max_sec_128,   ATA_HORKAGE_MAX_SEC_128),
+       force_horkage_on(max_sec_1024,  ATA_HORKAGE_MAX_SEC_1024),
+       force_horkage_on(max_sec_lba48, ATA_HORKAGE_MAX_SEC_LBA48),
+
+       force_horkage_onoff(lpm,        ATA_HORKAGE_NOLPM),
+       force_horkage_onoff(setxfer,    ATA_HORKAGE_NOSETXFER),
+       force_horkage_on(dump_id,       ATA_HORKAGE_DUMP_ID),
+
+       force_horkage_on(disable,       ATA_HORKAGE_DISABLE),
+};
+
 static int __init ata_parse_force_one(char **cur,
                                      struct ata_force_ent *force_ent,
                                      const char **reason)
 {
-       static const struct ata_force_param force_tbl[] __initconst = {
-               { "40c",        .cbl            = ATA_CBL_PATA40 },
-               { "80c",        .cbl            = ATA_CBL_PATA80 },
-               { "short40c",   .cbl            = ATA_CBL_PATA40_SHORT },
-               { "unk",        .cbl            = ATA_CBL_PATA_UNK },
-               { "ign",        .cbl            = ATA_CBL_PATA_IGN },
-               { "sata",       .cbl            = ATA_CBL_SATA },
-               { "1.5Gbps",    .spd_limit      = 1 },
-               { "3.0Gbps",    .spd_limit      = 2 },
-               { "noncq",      .horkage_on     = ATA_HORKAGE_NONCQ },
-               { "ncq",        .horkage_off    = ATA_HORKAGE_NONCQ },
-               { "noncqtrim",  .horkage_on     = ATA_HORKAGE_NO_NCQ_TRIM },
-               { "ncqtrim",    .horkage_off    = ATA_HORKAGE_NO_NCQ_TRIM },
-               { "noncqati",   .horkage_on     = ATA_HORKAGE_NO_NCQ_ON_ATI },
-               { "ncqati",     .horkage_off    = ATA_HORKAGE_NO_NCQ_ON_ATI },
-               { "dump_id",    .horkage_on     = ATA_HORKAGE_DUMP_ID },
-               { "pio0",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 0) },
-               { "pio1",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 1) },
-               { "pio2",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 2) },
-               { "pio3",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 3) },
-               { "pio4",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 4) },
-               { "pio5",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 5) },
-               { "pio6",       .xfer_mask      = 1 << (ATA_SHIFT_PIO + 6) },
-               { "mwdma0",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 0) },
-               { "mwdma1",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 1) },
-               { "mwdma2",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 2) },
-               { "mwdma3",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 3) },
-               { "mwdma4",     .xfer_mask      = 1 << (ATA_SHIFT_MWDMA + 4) },
-               { "udma0",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma16",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma/16",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 0) },
-               { "udma1",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma25",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma/25",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 1) },
-               { "udma2",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma33",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma/33",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 2) },
-               { "udma3",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma44",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma/44",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 3) },
-               { "udma4",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma66",     .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma/66",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 4) },
-               { "udma5",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma100",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma/100",   .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 5) },
-               { "udma6",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma133",    .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma/133",   .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 6) },
-               { "udma7",      .xfer_mask      = 1 << (ATA_SHIFT_UDMA + 7) },
-               { "nohrst",     .lflags         = ATA_LFLAG_NO_HRST },
-               { "nosrst",     .lflags         = ATA_LFLAG_NO_SRST },
-               { "norst",      .lflags         = ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST },
-               { "rstonce",    .lflags         = ATA_LFLAG_RST_ONCE },
-               { "atapi_dmadir", .horkage_on   = ATA_HORKAGE_ATAPI_DMADIR },
-               { "disable",    .horkage_on     = ATA_HORKAGE_DISABLE },
-       };
        char *start = *cur, *p = *cur;
        char *id, *val, *endp;
        const struct ata_force_param *match_fp = NULL;
@@ -6285,7 +6295,7 @@ static void __init ata_parse_force_param(void)
        int last_port = -1, last_device = -1;
        char *p, *cur, *next;
 
-       /* calculate maximum number of params and allocate force_tbl */
+       /* Calculate maximum number of params and allocate ata_force_tbl */
        for (p = ata_force_param_buf; *p; p++)
                if (*p == ',')
                        size++;
index 044a16daa2d42e040a936b34ab46251b0b02fb0e..7a5fe41aa5ae10eda99cf10c3f8d895de4995f3c 100644 (file)
@@ -1268,31 +1268,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
 }
 EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
 
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
-       unsigned int max_queue = ap->host->n_tags;
-       unsigned int i, tag;
-
-       for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
-               tag = tag < max_queue ? tag : 0;
-
-               /* the last tag is reserved for internal command. */
-               if (ata_tag_internal(tag))
-                       continue;
-
-               if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
-                       ap->sas_last_tag = tag;
-                       return tag;
-               }
-       }
-       return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
-       clear_bit(tag, &ap->sas_tag_allocated);
-}
-
 /**
  *     sata_async_notification - SATA async notification handler
  *     @ap: ATA port where async notification is received
index 06c9d90238d9e217a8b6bec8a14307ba8211056b..42cecf95a4e589bdd21444bd547aeb13a080a411 100644 (file)
@@ -638,24 +638,48 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
 static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
                                              struct scsi_cmnd *cmd)
 {
+       struct ata_port *ap = dev->link->ap;
        struct ata_queued_cmd *qc;
+       int tag;
 
-       qc = ata_qc_new_init(dev, scsi_cmd_to_rq(cmd)->tag);
-       if (qc) {
-               qc->scsicmd = cmd;
-               qc->scsidone = scsi_done;
-
-               qc->sg = scsi_sglist(cmd);
-               qc->n_elem = scsi_sg_count(cmd);
+       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+               goto fail;
 
-               if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
-                       qc->flags |= ATA_QCFLAG_QUIET;
+       if (ap->flags & ATA_FLAG_SAS_HOST) {
+               /*
+                * SAS hosts may queue > ATA_MAX_QUEUE commands so use
+                * unique per-device budget token as a tag.
+                */
+               if (WARN_ON_ONCE(cmd->budget_token >= ATA_MAX_QUEUE))
+                       goto fail;
+               tag = cmd->budget_token;
        } else {
-               cmd->result = (DID_OK << 16) | SAM_STAT_TASK_SET_FULL;
-               scsi_done(cmd);
+               tag = scsi_cmd_to_rq(cmd)->tag;
        }
 
+       qc = __ata_qc_from_tag(ap, tag);
+       qc->tag = qc->hw_tag = tag;
+       qc->ap = ap;
+       qc->dev = dev;
+
+       ata_qc_reinit(qc);
+
+       qc->scsicmd = cmd;
+       qc->scsidone = scsi_done;
+
+       qc->sg = scsi_sglist(cmd);
+       qc->n_elem = scsi_sg_count(cmd);
+
+       if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
+               qc->flags |= ATA_QCFLAG_QUIET;
+
        return qc;
+
+fail:
+       set_host_byte(cmd, DID_OK);
+       set_status_byte(cmd, SAM_STAT_TASK_SET_FULL);
+       scsi_done(cmd);
+       return NULL;
 }
 
 static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
index b3be7a8f5bea6079e5e5d80e0a55b32494483c7f..b1666adc1c3a39160fa4b4f133fc8719990999dc 100644 (file)
@@ -1634,7 +1634,7 @@ EXPORT_SYMBOL_GPL(ata_sff_interrupt);
 
 void ata_sff_lost_interrupt(struct ata_port *ap)
 {
-       u8 status;
+       u8 status = 0;
        struct ata_queued_cmd *qc;
 
        /* Only one outstanding command per SFF channel */
index c9c2496d91ea474a8f02655bceedf1e71e3c0c45..926a7f41303dc534435a9bb72fd79c2cda04bc36 100644 (file)
@@ -44,7 +44,6 @@ static inline void ata_force_cbl(struct ata_port *ap) { }
 #endif
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
                           u64 block, u32 n_block, unsigned int tf_flags,
                           unsigned int tag, int class);
@@ -91,18 +90,6 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 
 #define to_ata_port(d) container_of(d, struct ata_port, tdev)
 
-/* libata-sata.c */
-#ifdef CONFIG_SATA_HOST
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
-#else
-static inline int ata_sas_allocate_tag(struct ata_port *ap)
-{
-       return -EOPNOTSUPP;
-}
-static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { }
-#endif
-
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
 extern unsigned int ata_acpi_gtf_filter;
index 2e35505b683c764999684dda38936f5a7b917e70..0117df0fe3c5932060a82a6a4dd9f975cb8e1960 100644 (file)
@@ -536,8 +536,8 @@ static int pata_ftide010_probe(struct platform_device *pdev)
        return 0;
 
 err_dis_clk:
-       if (!IS_ERR(ftide->pclk))
-               clk_disable_unprepare(ftide->pclk);
+       clk_disable_unprepare(ftide->pclk);
+
        return ret;
 }
 
@@ -547,8 +547,7 @@ static int pata_ftide010_remove(struct platform_device *pdev)
        struct ftide010 *ftide = host->private_data;
 
        ata_host_detach(ftide->host);
-       if (!IS_ERR(ftide->pclk))
-               clk_disable_unprepare(ftide->pclk);
+       clk_disable_unprepare(ftide->pclk);
 
        return 0;
 }
index 0c5a51970fbf54994b1536cb5c04dbbe9ec1b95c..014ccb0f45dc4628f8422d079f31727759c3537c 100644 (file)
@@ -77,6 +77,8 @@ static int marvell_cable_detect(struct ata_port *ap)
        switch(ap->port_no)
        {
        case 0:
+               if (!ap->ioaddr.bmdma_addr)
+                       return ATA_CBL_PATA_UNK;
                if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1)
                        return ATA_CBL_PATA40;
                return ATA_CBL_PATA80;
index 3250ef317df6bd2202b52e1298a8caca6dea7587..03b6ae37a57826729bb6806fc7ba749d4d68f8bf 100644 (file)
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/libata.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/types.h>
 
 #include <asm/cacheflush.h>
-#include <asm/prom.h>
 #include <asm/mpc52xx.h>
 
 #include <linux/fsl/bestcomm/bestcomm.h>
index 0da58ce20d82b5e00e741fb96d665646c418a256..67ef2e26d7df13d4246542caaf6b691122fc8295 100644 (file)
  *     criticial.
  */
 
-static unsigned long sil680_selreg(struct ata_port *ap, int r)
+static int sil680_selreg(struct ata_port *ap, int r)
 {
-       unsigned long base = 0xA0 + r;
-       base += (ap->port_no << 4);
-       return base;
+       return 0xA0 + (ap->port_no << 4) + r;
 }
 
 /**
@@ -65,12 +63,9 @@ static unsigned long sil680_selreg(struct ata_port *ap, int r)
  *     the unit shift.
  */
 
-static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
+static int sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
 {
-       unsigned long base = 0xA0 + r;
-       base += (ap->port_no << 4);
-       base |= adev->devno ? 2 : 0;
-       return base;
+       return 0xA0 + (ap->port_no << 4) + r + (adev->devno << 1);
 }
 
 
@@ -85,8 +80,9 @@ static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev,
 static int sil680_cable_detect(struct ata_port *ap)
 {
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       unsigned long addr = sil680_selreg(ap, 0);
+       int addr = sil680_selreg(ap, 0);
        u8 ata66;
+
        pci_read_config_byte(pdev, addr, &ata66);
        if (ata66 & 1)
                return ATA_CBL_PATA80;
@@ -113,9 +109,9 @@ static void sil680_set_piomode(struct ata_port *ap, struct ata_device *adev)
                0x328A, 0x2283, 0x1281, 0x10C3, 0x10C1
        };
 
-       unsigned long tfaddr = sil680_selreg(ap, 0x02);
-       unsigned long addr = sil680_seldev(ap, adev, 0x04);
-       unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+       int tfaddr = sil680_selreg(ap, 0x02);
+       int addr = sil680_seldev(ap, adev, 0x04);
+       int addr_mask = 0x80 + 4 * ap->port_no;
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
        int pio = adev->pio_mode - XFER_PIO_0;
        int lowest_pio = pio;
@@ -165,9 +161,9 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev)
        static const u16 dma_table[3] = { 0x2208, 0x10C2, 0x10C1 };
 
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-       unsigned long ma = sil680_seldev(ap, adev, 0x08);
-       unsigned long ua = sil680_seldev(ap, adev, 0x0C);
-       unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+       int ma = sil680_seldev(ap, adev, 0x08);
+       int ua = sil680_seldev(ap, adev, 0x0C);
+       int addr_mask = 0x80 + 4 * ap->port_no;
        int port_shift = adev->devno * 4;
        u8 scsc, mode;
        u16 multi, ultra;
@@ -219,7 +215,7 @@ static void sil680_sff_exec_command(struct ata_port *ap,
 static bool sil680_sff_irq_check(struct ata_port *ap)
 {
        struct pci_dev *pdev    = to_pci_dev(ap->host->dev);
-       unsigned long addr      = sil680_selreg(ap, 1);
+       int addr                = sil680_selreg(ap, 1);
        u8 val;
 
        pci_read_config_byte(pdev, addr, &val);
index 439ca882f73c7fb5b45f5abb725898f72ed32fd0..215c02d4056a7f190f9aec30970be0b58620c6f4 100644 (file)
@@ -248,9 +248,9 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev,
        struct pci_dev *pdev = to_pci_dev(ap->host->dev);
        struct ata_device *peer = ata_dev_pair(adev);
        struct ata_timing t, p;
-       static int via_clock = 33333;   /* Bus clock in kHZ */
-       unsigned long T =  1000000000 / via_clock;
-       unsigned long UT = T;
+       const int via_clock = 33333;    /* Bus clock in kHz */
+       const int T = 1000000000 / via_clock;
+       int UT = T;
        int ut;
        int offset = 3 - (2*ap->port_no) - adev->devno;
 
index bec33d781ae046f989742c0b381b06ca6bd92beb..e3263e961045ac342636156d40986a11a85ebe33 100644 (file)
@@ -137,7 +137,11 @@ struct sata_dwc_device {
 #endif
 };
 
-#define SATA_DWC_QCMD_MAX      32
+/*
+ * Allow one extra special slot for commands and DMA management
+ * to account for libata internal commands.
+ */
+#define SATA_DWC_QCMD_MAX      (ATA_MAX_QUEUE + 1)
 
 struct sata_dwc_device_port {
        struct sata_dwc_device  *hsdev;
index 00e1c7941d0ea0dd4ae5841959b136789cf6c9f7..b729e9919bb0c7c03daffd1b63558906a75174f6 100644 (file)
@@ -318,7 +318,6 @@ static int gemini_sata_probe(struct platform_device *pdev)
        struct device_node *np = dev->of_node;
        struct sata_gemini *sg;
        struct regmap *map;
-       struct resource *res;
        enum gemini_muxmode muxmode;
        u32 gmode;
        u32 gmask;
@@ -329,11 +328,7 @@ static int gemini_sata_probe(struct platform_device *pdev)
                return -ENOMEM;
        sg->dev = dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       sg->base = devm_ioremap_resource(dev, res);
+       sg->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(sg->base))
                return PTR_ERR(sg->base);
 
index 1d6636ebaac5bbbf7d2844777395acdcc61cefaf..f73b836047cf5a4048e1124063fc7dcdb5e78654 100644 (file)
@@ -667,6 +667,15 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
                        core_mask = &cpu_topology[cpu].llc_sibling;
        }
 
+       /*
+        * For systems with no shared cpu-side LLC but with clusters defined,
+        * extend core_mask to cluster_siblings. The sched domain builder will
+        * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled.
+        */
+       if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
+           cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling))
+               core_mask = &cpu_topology[cpu].cluster_sibling;
+
        return core_mask;
 }
 
@@ -684,7 +693,7 @@ void update_siblings_masks(unsigned int cpuid)
        for_each_online_cpu(cpu) {
                cpu_topo = &cpu_topology[cpu];
 
-               if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+               if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
                        cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
                        cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
                }
index af6bea56f4e25a9088499bef419ba0cdb87dbead..3fc3b5940bb31983d1fc0b1f65dede25c5b2c090 100644 (file)
@@ -296,6 +296,7 @@ int driver_deferred_probe_check_state(struct device *dev)
 
        return -EPROBE_DEFER;
 }
+EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state);
 
 static void deferred_probe_timeout_work_func(struct work_struct *work)
 {
index 94d1789a233e0f436e51cc9d214c139bb8dceb80..406a907a4caec29344c367d7920e35e27c372cc4 100644 (file)
@@ -735,6 +735,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
                  size_t offset, u32 opt_flags)
 {
        struct firmware *fw = NULL;
+       struct cred *kern_cred = NULL;
+       const struct cred *old_cred;
        bool nondirect = false;
        int ret;
 
@@ -751,6 +753,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        if (ret <= 0) /* error or already assigned */
                goto out;
 
+       /*
+        * We are about to try to access the firmware file. Because we may have been
+        * called by a driver when serving an unrelated request from userland, we use
+        * the kernel credentials to read the file.
+        */
+       kern_cred = prepare_kernel_cred(NULL);
+       if (!kern_cred) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       old_cred = override_creds(kern_cred);
+
        ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
 
        /* Only full reads can support decompression, platform, and sysfs. */
@@ -776,6 +790,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
        } else
                ret = assign_fw(fw, device);
 
+       revert_creds(old_cred);
+       put_cred(kern_cred);
+
  out:
        if (ret < 0) {
                fw_abort_batch_reqs(fw);
index e9d1efcda89b3be1c4d8f12509c170b8957cb470..ac6ad9ab67f941bdb8f72c47b5f07453abe1654c 100644 (file)
@@ -152,9 +152,19 @@ static struct attribute *default_attrs[] = {
        NULL
 };
 
+static umode_t topology_is_visible(struct kobject *kobj,
+                                  struct attribute *attr, int unused)
+{
+       if (attr == &dev_attr_ppin.attr && !topology_ppin(kobj_to_dev(kobj)->id))
+               return 0;
+
+       return attr->mode;
+}
+
 static const struct attribute_group topology_attr_group = {
        .attrs = default_attrs,
        .bin_attrs = bin_attrs,
+       .is_visible = topology_is_visible,
        .name = "topology"
 };
 
index 519b6d38d4df65859ac68abf485dd8d3dff32f44..fdb81f2794cde1dd8ed376e362258c887fae1b51 100644 (file)
@@ -33,6 +33,22 @@ config BLK_DEV_FD
          To compile this driver as a module, choose M here: the
          module will be called floppy.
 
+config BLK_DEV_FD_RAWCMD
+       bool "Support for raw floppy disk commands (DEPRECATED)"
+       depends on BLK_DEV_FD
+       help
+         If you want to use actual physical floppies and expect to do
+         special low-level hardware accesses to them (access and use
+         non-standard formats, for example), then enable this.
+
+         Note that the code enabled by this option is rarely used and
+         might be unstable or insecure, and distros should not enable it.
+
+         Note: FDRAWCMD is deprecated and will be removed from the kernel
+         in the near future.
+
+         If unsure, say N.
+
 config AMIGA_FLOPPY
        tristate "Amiga floppy support"
        depends on AMIGA
index 84d0fcebd6af5acf15230779755d752ece8510be..749ae1246f4cf894f8544248c1461d0d06720db0 100644 (file)
@@ -244,3 +244,5 @@ void aoenet_exit(void);
 void aoenet_xmit(struct sk_buff_head *);
 int is_aoe_netif(struct net_device *ifp);
 int set_aoe_iflist(const char __user *str, size_t size);
+
+extern struct workqueue_struct *aoe_wq;
index 8a91fcac6f829bdbf8ad1709006f7dbdfb2c43b3..348adf3352177d7644489bfe973ea94d22a2f96f 100644 (file)
@@ -435,7 +435,7 @@ err_mempool:
 err:
        spin_lock_irqsave(&d->lock, flags);
        d->flags &= ~DEVFL_GD_NOW;
-       schedule_work(&d->work);
+       queue_work(aoe_wq, &d->work);
        spin_unlock_irqrestore(&d->lock, flags);
 }
 
index 384073ef2323c9ee1c53bc1fd20b5346da159c25..d7317425be510d1c3d4bbac5a18fba2ea8e76c1d 100644 (file)
@@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
                d->flags |= DEVFL_NEWSIZE;
        else
                d->flags |= DEVFL_GDALLOC;
-       schedule_work(&d->work);
+       queue_work(aoe_wq, &d->work);
 }
 
 static void
index c5753c6bfe8041213f772654b27da2d9938d2ad3..b381d1c3ef327502b9eece7bd37127d12152fd30 100644 (file)
@@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
                        specified = 1;
        }
 
-       flush_scheduled_work();
+       flush_workqueue(aoe_wq);
        /* pass one: do aoedev_downdev, which might sleep */
 restart1:
        spin_lock_irqsave(&devlist_lock, flags);
@@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
 void
 aoedev_exit(void)
 {
-       flush_scheduled_work();
+       flush_workqueue(aoe_wq);
        flush(NULL, 0, EXITING);
 }
 
index 1e4e2971171caf5c0cafd798fe93bc83c439e0e1..6238c4c87cfc73a58e81d3648457f2b95e1541db 100644 (file)
@@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
 MODULE_VERSION(VERSION);
 
 static struct timer_list timer;
+struct workqueue_struct *aoe_wq;
 
 static void discover_timer(struct timer_list *t)
 {
@@ -35,6 +36,7 @@ aoe_exit(void)
        aoechr_exit();
        aoedev_exit();
        aoeblk_exit();          /* free cache after de-allocating bufs */
+       destroy_workqueue(aoe_wq);
 }
 
 static int __init
@@ -42,9 +44,13 @@ aoe_init(void)
 {
        int ret;
 
+       aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
+       if (!aoe_wq)
+               return -ENOMEM;
+
        ret = aoedev_init();
        if (ret)
-               return ret;
+               goto dev_fail;
        ret = aoechr_init();
        if (ret)
                goto chr_fail;
@@ -77,6 +83,8 @@ aoe_init(void)
        aoechr_exit();
  chr_fail:
        aoedev_exit();
+ dev_fail:
+       destroy_workqueue(aoe_wq);
 
        printk(KERN_INFO "aoe: initialisation failure.\n");
        return ret;
index 5d819a466e2f3f84adf392a2a034c7a3270b1bda..e232cc4fd444bf5f9a0e6efae8d1116ddd0137aa 100644 (file)
@@ -303,6 +303,7 @@ static struct atari_floppy_struct {
        int ref;
        int type;
        struct blk_mq_tag_set tag_set;
+       int error_count;
 } unit[FD_MAX_UNITS];
 
 #define        UD      unit[drive]
@@ -705,14 +706,14 @@ static void fd_error( void )
        if (!fd_request)
                return;
 
-       fd_request->error_count++;
-       if (fd_request->error_count >= MAX_ERRORS) {
+       unit[SelectedDrive].error_count++;
+       if (unit[SelectedDrive].error_count >= MAX_ERRORS) {
                printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
                fd_end_request_cur(BLK_STS_IOERR);
                finish_fdc();
                return;
        }
-       else if (fd_request->error_count == RECALIBRATE_ERRORS) {
+       else if (unit[SelectedDrive].error_count == RECALIBRATE_ERRORS) {
                printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
                if (SelectedDrive != -1)
                        SUD.track = -1;
@@ -1491,7 +1492,7 @@ static void setup_req_params( int drive )
        ReqData = ReqBuffer + 512 * ReqCnt;
 
        if (UseTrackbuffer)
-               read_track = (ReqCmd == READ && fd_request->error_count == 0);
+               read_track = (ReqCmd == READ && unit[drive].error_count == 0);
        else
                read_track = 0;
 
@@ -1520,6 +1521,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
                return BLK_STS_RESOURCE;
        }
        fd_request = bd->rq;
+       unit[drive].error_count = 0;
        blk_mq_start_request(fd_request);
 
        atari_disable_irq( IRQ_MFP_FDC );
index df25eecf80af00d99e31519a7bef401035ec2074..9e060e49b3f8c7233b70b5c32e432c6de272e343 100644 (file)
@@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
                }
        }
 
-       want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+       want = PFN_UP(words*sizeof(long));
        have = b->bm_number_of_pages;
        if (want == have) {
                D_ASSERT(device, b->bm_pages != NULL);
index 4b55e864a0a348d2252a2fb2fffd57e5240d4bc3..4d3efaa20b7bffca3c9cae661a417d14bf2458f3 100644 (file)
@@ -1638,22 +1638,22 @@ struct sib_info {
 };
 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
 
-extern void notify_resource_state(struct sk_buff *,
+extern int notify_resource_state(struct sk_buff *,
                                  unsigned int,
                                  struct drbd_resource *,
                                  struct resource_info *,
                                  enum drbd_notification_type);
-extern void notify_device_state(struct sk_buff *,
+extern int notify_device_state(struct sk_buff *,
                                unsigned int,
                                struct drbd_device *,
                                struct device_info *,
                                enum drbd_notification_type);
-extern void notify_connection_state(struct sk_buff *,
+extern int notify_connection_state(struct sk_buff *,
                                    unsigned int,
                                    struct drbd_connection *,
                                    struct connection_info *,
                                    enum drbd_notification_type);
-extern void notify_peer_device_state(struct sk_buff *,
+extern int notify_peer_device_state(struct sk_buff *,
                                     unsigned int,
                                     struct drbd_peer_device *,
                                     struct peer_device_info *,
index 9676a1d214bc5d1d31b5c9fb4958c1cae406f1c3..2887350ae010a78c5c5949c2a61af2eb746ed1d2 100644 (file)
@@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
        }
 }
 
-/* communicated if (agreed_features & DRBD_FF_WSAME) */
-static void
-assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
-                                       struct request_queue *q)
-{
-       if (q) {
-               p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
-               p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
-               p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
-               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
-               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
-               p->qlim->discard_enabled = blk_queue_discard(q);
-               p->qlim->write_same_capable = 0;
-       } else {
-               q = device->rq_queue;
-               p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
-               p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
-               p->qlim->alignment_offset = 0;
-               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
-               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
-               p->qlim->discard_enabled = 0;
-               p->qlim->write_same_capable = 0;
-       }
-}
-
 int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
 {
        struct drbd_device *device = peer_device->device;
@@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
 
        memset(p, 0, packet_size);
        if (get_ldev_if_state(device, D_NEGOTIATING)) {
-               struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+               struct block_device *bdev = device->ldev->backing_bdev;
+               struct request_queue *q = bdev_get_queue(bdev);
+
                d_size = drbd_get_max_capacity(device->ldev);
                rcu_read_lock();
                u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
@@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
                q_order_type = drbd_queue_order_type(device);
                max_bio_size = queue_max_hw_sectors(q) << 9;
                max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
-               assign_p_sizes_qlim(device, p, q);
+               p->qlim->physical_block_size =
+                       cpu_to_be32(bdev_physical_block_size(bdev));
+               p->qlim->logical_block_size =
+                       cpu_to_be32(bdev_logical_block_size(bdev));
+               p->qlim->alignment_offset =
+                       cpu_to_be32(bdev_alignment_offset(bdev));
+               p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev));
+               p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev));
+               p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev);
                put_ldev(device);
        } else {
+               struct request_queue *q = device->rq_queue;
+
+               p->qlim->physical_block_size =
+                       cpu_to_be32(queue_physical_block_size(q));
+               p->qlim->logical_block_size =
+                       cpu_to_be32(queue_logical_block_size(q));
+               p->qlim->alignment_offset = 0;
+               p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+               p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+               p->qlim->discard_enabled = 0;
+
                d_size = 0;
                u_size = 0;
                q_order_type = QUEUE_ORDERED_NONE;
                max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
-               assign_p_sizes_qlim(device, p, NULL);
        }
 
        if (peer_device->connection->agreed_pro_version <= 94)
@@ -2719,6 +2714,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
        sprintf(disk->disk_name, "drbd%d", minor);
        disk->private_data = device;
 
+       blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
        blk_queue_write_cache(disk->queue, true, true);
        /* Setting the max_hw_sectors to an odd value of 8kibyte here
           This triggers a max_bio_size message upon first attach or connect */
@@ -2773,12 +2769,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
        if (init_submitter(device)) {
                err = ERR_NOMEM;
-               goto out_idr_remove_vol;
+               goto out_idr_remove_from_resource;
        }
 
        err = add_disk(disk);
        if (err)
-               goto out_idr_remove_vol;
+               goto out_idr_remove_from_resource;
 
        /* inherit the connection state */
        device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2788,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
        drbd_debugfs_device_add(device);
        return NO_ERROR;
 
-out_idr_remove_vol:
-       idr_remove(&connection->peer_devices, vnr);
 out_idr_remove_from_resource:
        for_each_connection(connection, resource) {
                peer_device = idr_remove(&connection->peer_devices, vnr);
@@ -3587,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
         * when we want to support more than
         * one PRO_VERSION */
        static const char *cmdnames[] = {
+
                [P_DATA]                = "Data",
-               [P_WSAME]               = "WriteSame",
-               [P_TRIM]                = "Trim",
                [P_DATA_REPLY]          = "DataReply",
                [P_RS_DATA_REPLY]       = "RSDataReply",
                [P_BARRIER]             = "Barrier",
@@ -3600,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
                [P_DATA_REQUEST]        = "DataRequest",
                [P_RS_DATA_REQUEST]     = "RSDataRequest",
                [P_SYNC_PARAM]          = "SyncParam",
-               [P_SYNC_PARAM89]        = "SyncParam89",
                [P_PROTOCOL]            = "ReportProtocol",
                [P_UUIDS]               = "ReportUUIDs",
                [P_SIZES]               = "ReportSizes",
@@ -3608,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
                [P_SYNC_UUID]           = "ReportSyncUUID",
                [P_AUTH_CHALLENGE]      = "AuthChallenge",
                [P_AUTH_RESPONSE]       = "AuthResponse",
+               [P_STATE_CHG_REQ]       = "StateChgRequest",
                [P_PING]                = "Ping",
                [P_PING_ACK]            = "PingAck",
                [P_RECV_ACK]            = "RecvAck",
@@ -3618,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
                [P_NEG_DREPLY]          = "NegDReply",
                [P_NEG_RS_DREPLY]       = "NegRSDReply",
                [P_BARRIER_ACK]         = "BarrierAck",
-               [P_STATE_CHG_REQ]       = "StateChgRequest",
                [P_STATE_CHG_REPLY]     = "StateChgReply",
                [P_OV_REQUEST]          = "OVRequest",
                [P_OV_REPLY]            = "OVReply",
                [P_OV_RESULT]           = "OVResult",
                [P_CSUM_RS_REQUEST]     = "CsumRSRequest",
                [P_RS_IS_IN_SYNC]       = "CsumRSIsInSync",
+               [P_SYNC_PARAM89]        = "SyncParam89",
                [P_COMPRESSED_BITMAP]   = "CBitmap",
                [P_DELAY_PROBE]         = "DelayProbe",
                [P_OUT_OF_SYNC]         = "OutOfSync",
-               [P_RETRY_WRITE]         = "RetryWrite",
                [P_RS_CANCEL]           = "RSCancel",
                [P_CONN_ST_CHG_REQ]     = "conn_st_chg_req",
                [P_CONN_ST_CHG_REPLY]   = "conn_st_chg_reply",
                [P_PROTOCOL_UPDATE]     = "protocol_update",
+               [P_TRIM]                = "Trim",
                [P_RS_THIN_REQ]         = "rs_thin_req",
                [P_RS_DEALLOCATED]      = "rs_deallocated",
+               [P_WSAME]               = "WriteSame",
+               [P_ZEROES]              = "Zeroes",
 
                /* enum drbd_packet, but not commands - obsoleted flags:
                 *      P_MAY_IGNORE
index 02030c9c4d3b16d55e0fc857dfe80b53472ac021..013d355a2033bd61f7e20650bf4e64e301ccd479 100644 (file)
@@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
        struct set_role_parms parms;
        int err;
        enum drbd_ret_code retcode;
+       enum drbd_state_rv rv;
 
        retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
        if (!adm_ctx.reply_skb)
@@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
        mutex_lock(&adm_ctx.resource->adm_mutex);
 
        if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
-               retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
-                                               R_PRIMARY, parms.assume_uptodate);
+               rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
        else
-               retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
-                                               R_SECONDARY, 0);
+               rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
 
        mutex_unlock(&adm_ctx.resource->adm_mutex);
        genl_lock();
+       drbd_adm_finish(&adm_ctx, info, rv);
+       return 0;
 out:
        drbd_adm_finish(&adm_ctx, info, retcode);
        return 0;
@@ -1204,50 +1205,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
 }
 
 static void decide_on_discard_support(struct drbd_device *device,
-                       struct request_queue *q,
-                       struct request_queue *b,
-                       bool discard_zeroes_if_aligned)
+               struct drbd_backing_dev *bdev)
 {
-       /* q = drbd device queue (device->rq_queue)
-        * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
-        *     or NULL if diskless
-        */
-       struct drbd_connection *connection = first_peer_device(device)->connection;
-       bool can_do = b ? blk_queue_discard(b) : true;
-
-       if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
-               can_do = false;
-               drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
-       }
-       if (can_do) {
-               /* We don't care for the granularity, really.
-                * Stacking limits below should fix it for the local
-                * device.  Whether or not it is a suitable granularity
-                * on the remote device is not our problem, really. If
-                * you care, you need to use devices with similar
-                * topology on all peers. */
-               blk_queue_discard_granularity(q, 512);
-               q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
-               q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
-       } else {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
-               blk_queue_discard_granularity(q, 0);
-               q->limits.max_discard_sectors = 0;
-               q->limits.max_write_zeroes_sectors = 0;
-       }
-}
+       struct drbd_connection *connection =
+               first_peer_device(device)->connection;
+       struct request_queue *q = device->rq_queue;
 
-static void fixup_discard_if_not_supported(struct request_queue *q)
-{
-       /* To avoid confusion, if this queue does not support discard, clear
-        * max_discard_sectors, which is what lsblk -D reports to the user.
-        * Older kernels got this wrong in "stack limits".
-        * */
-       if (!blk_queue_discard(q)) {
-               blk_queue_max_discard_sectors(q, 0);
-               blk_queue_discard_granularity(q, 0);
+       if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
+               goto not_supported;
+
+       if (connection->cstate >= C_CONNECTED &&
+           !(connection->agreed_features & DRBD_FF_TRIM)) {
+               drbd_info(connection,
+                       "peer DRBD too old, does not support TRIM: disabling discards\n");
+               goto not_supported;
        }
+
+       /*
+        * We don't care for the granularity, really.
+        *
+        * Stacking limits below should fix it for the local device.  Whether or
+        * not it is a suitable granularity on the remote device is not our
+        * problem, really. If you care, you need to use devices with similar
+        * topology on all peers.
+        */
+       blk_queue_discard_granularity(q, 512);
+       q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
+       q->limits.max_write_zeroes_sectors =
+               drbd_max_discard_sectors(connection);
+       return;
+
+not_supported:
+       blk_queue_discard_granularity(q, 0);
+       q->limits.max_discard_sectors = 0;
+       q->limits.max_write_zeroes_sectors = 0;
 }
 
 static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
@@ -1273,7 +1264,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
        unsigned int max_segments = 0;
        struct request_queue *b = NULL;
        struct disk_conf *dc;
-       bool discard_zeroes_if_aligned = true;
 
        if (bdev) {
                b = bdev->backing_bdev->bd_disk->queue;
@@ -1282,7 +1272,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
                rcu_read_lock();
                dc = rcu_dereference(device->ldev->disk_conf);
                max_segments = dc->max_bio_bvecs;
-               discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
                rcu_read_unlock();
 
                blk_set_stacking_limits(&q->limits);
@@ -1292,13 +1281,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
        /* This is the workaround for "bio would need to, but cannot, be split" */
        blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
        blk_queue_segment_boundary(q, PAGE_SIZE-1);
-       decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
+       decide_on_discard_support(device, bdev);
 
        if (b) {
                blk_stack_limits(&q->limits, &b->limits, 0);
                disk_update_readahead(device->vdisk);
        }
-       fixup_discard_if_not_supported(q);
        fixup_write_zeroes(device, q);
 }
 
@@ -1437,14 +1425,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
 static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
                               struct drbd_backing_dev *nbc)
 {
-       struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
+       struct block_device *bdev = nbc->backing_bdev;
 
        if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
                disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
        if (disk_conf->al_extents > drbd_al_extents_max(nbc))
                disk_conf->al_extents = drbd_al_extents_max(nbc);
 
-       if (!blk_queue_discard(q)) {
+       if (!bdev_max_discard_sectors(bdev)) {
                if (disk_conf->rs_discard_granularity) {
                        disk_conf->rs_discard_granularity = 0; /* disable feature */
                        drbd_info(device, "rs_discard_granularity feature disabled\n");
@@ -1453,16 +1441,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
 
        if (disk_conf->rs_discard_granularity) {
                int orig_value = disk_conf->rs_discard_granularity;
+               sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
+               unsigned int discard_granularity = bdev_discard_granularity(bdev);
                int remainder;
 
-               if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
-                       disk_conf->rs_discard_granularity = q->limits.discard_granularity;
+               if (discard_granularity > disk_conf->rs_discard_granularity)
+                       disk_conf->rs_discard_granularity = discard_granularity;
 
-               remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
+               remainder = disk_conf->rs_discard_granularity %
+                               discard_granularity;
                disk_conf->rs_discard_granularity += remainder;
 
-               if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
-                       disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
+               if (disk_conf->rs_discard_granularity > discard_size)
+                       disk_conf->rs_discard_granularity = discard_size;
 
                if (disk_conf->rs_discard_granularity != orig_value)
                        drbd_info(device, "rs_discard_granularity changed to %d\n",
@@ -1611,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
                        drbd_send_sync_param(peer_device);
        }
 
-       synchronize_rcu();
-       kfree(old_disk_conf);
+       kvfree_rcu(old_disk_conf);
        kfree(old_plan);
        mod_timer(&device->request_timer, jiffies + HZ);
        goto success;
@@ -2443,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 
        mutex_unlock(&connection->resource->conf_update);
        mutex_unlock(&connection->data.mutex);
-       synchronize_rcu();
-       kfree(old_net_conf);
+       kvfree_rcu(old_net_conf);
 
        if (connection->cstate >= C_WF_REPORT_PARAMS) {
                struct drbd_peer_device *peer_device;
@@ -2502,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        struct drbd_resource *resource;
        struct drbd_connection *connection;
        enum drbd_ret_code retcode;
+       enum drbd_state_rv rv;
        int i;
        int err;
 
@@ -2621,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        }
        rcu_read_unlock();
 
-       retcode = (enum drbd_ret_code)conn_request_state(connection,
-                                       NS(conn, C_UNCONNECTED), CS_VERBOSE);
+       rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
        conn_reconfig_done(connection);
        mutex_unlock(&adm_ctx.resource->adm_mutex);
-       drbd_adm_finish(&adm_ctx, info, retcode);
+       drbd_adm_finish(&adm_ctx, info, rv);
        return 0;
 
 fail:
@@ -2734,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 
        mutex_lock(&adm_ctx.resource->adm_mutex);
        rv = conn_try_disconnect(connection, parms.force_disconnect);
-       if (rv < SS_SUCCESS)
-               retcode = (enum drbd_ret_code)rv;
-       else
-               retcode = NO_ERROR;
        mutex_unlock(&adm_ctx.resource->adm_mutex);
+       if (rv < SS_SUCCESS) {
+               drbd_adm_finish(&adm_ctx, info, rv);
+               return 0;
+       }
+       retcode = NO_ERROR;
  fail:
        drbd_adm_finish(&adm_ctx, info, retcode);
        return 0;
@@ -2857,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
                new_disk_conf->disk_size = (sector_t)rs.resize_size;
                rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
                mutex_unlock(&device->resource->conf_update);
-               synchronize_rcu();
-               kfree(old_disk_conf);
+               kvfree_rcu(old_disk_conf);
                new_disk_conf = NULL;
        }
 
@@ -4549,7 +4538,7 @@ static int nla_put_notification_header(struct sk_buff *msg,
        return drbd_notification_header_to_skb(msg, &nh, true);
 }
 
-void notify_resource_state(struct sk_buff *skb,
+int notify_resource_state(struct sk_buff *skb,
                           unsigned int seq,
                           struct drbd_resource *resource,
                           struct resource_info *resource_info,
@@ -4591,16 +4580,17 @@ void notify_resource_state(struct sk_buff *skb,
                if (err && err != -ESRCH)
                        goto failed;
        }
-       return;
+       return 0;
 
 nla_put_failure:
        nlmsg_free(skb);
 failed:
        drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
                        err, seq);
+       return err;
 }
 
-void notify_device_state(struct sk_buff *skb,
+int notify_device_state(struct sk_buff *skb,
                         unsigned int seq,
                         struct drbd_device *device,
                         struct device_info *device_info,
@@ -4640,16 +4630,17 @@ void notify_device_state(struct sk_buff *skb,
                if (err && err != -ESRCH)
                        goto failed;
        }
-       return;
+       return 0;
 
 nla_put_failure:
        nlmsg_free(skb);
 failed:
        drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
                 err, seq);
+       return err;
 }
 
-void notify_connection_state(struct sk_buff *skb,
+int notify_connection_state(struct sk_buff *skb,
                             unsigned int seq,
                             struct drbd_connection *connection,
                             struct connection_info *connection_info,
@@ -4689,16 +4680,17 @@ void notify_connection_state(struct sk_buff *skb,
                if (err && err != -ESRCH)
                        goto failed;
        }
-       return;
+       return 0;
 
 nla_put_failure:
        nlmsg_free(skb);
 failed:
        drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
                 err, seq);
+       return err;
 }
 
-void notify_peer_device_state(struct sk_buff *skb,
+int notify_peer_device_state(struct sk_buff *skb,
                              unsigned int seq,
                              struct drbd_peer_device *peer_device,
                              struct peer_device_info *peer_device_info,
@@ -4739,13 +4731,14 @@ void notify_peer_device_state(struct sk_buff *skb,
                if (err && err != -ESRCH)
                        goto failed;
        }
-       return;
+       return 0;
 
 nla_put_failure:
        nlmsg_free(skb);
 failed:
        drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
                 err, seq);
+       return err;
 }
 
 void notify_helper(enum drbd_notification_type type,
@@ -4796,7 +4789,7 @@ fail:
                 err, seq);
 }
 
-static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
 {
        struct drbd_genlmsghdr *dh;
        int err;
@@ -4810,11 +4803,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
        if (nla_put_notification_header(skb, NOTIFY_EXISTS))
                goto nla_put_failure;
        genlmsg_end(skb, dh);
-       return;
+       return 0;
 
 nla_put_failure:
        nlmsg_free(skb);
        pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+       return err;
 }
 
 static void free_state_changes(struct list_head *list)
@@ -4841,6 +4835,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
        unsigned int seq = cb->args[2];
        unsigned int n;
        enum drbd_notification_type flags = 0;
+       int err = 0;
 
        /* There is no need for taking notification_mutex here: it doesn't
           matter if the initial state events mix with later state chage
@@ -4849,32 +4844,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
 
        cb->args[5]--;
        if (cb->args[5] == 1) {
-               notify_initial_state_done(skb, seq);
+               err = notify_initial_state_done(skb, seq);
                goto out;
        }
        n = cb->args[4]++;
        if (cb->args[4] < cb->args[3])
                flags |= NOTIFY_CONTINUES;
        if (n < 1) {
-               notify_resource_state_change(skb, seq, state_change->resource,
+               err = notify_resource_state_change(skb, seq, state_change->resource,
                                             NOTIFY_EXISTS | flags);
                goto next;
        }
        n--;
        if (n < state_change->n_connections) {
-               notify_connection_state_change(skb, seq, &state_change->connections[n],
+               err = notify_connection_state_change(skb, seq, &state_change->connections[n],
                                               NOTIFY_EXISTS | flags);
                goto next;
        }
        n -= state_change->n_connections;
        if (n < state_change->n_devices) {
-               notify_device_state_change(skb, seq, &state_change->devices[n],
+               err = notify_device_state_change(skb, seq, &state_change->devices[n],
                                           NOTIFY_EXISTS | flags);
                goto next;
        }
        n -= state_change->n_devices;
        if (n < state_change->n_devices * state_change->n_connections) {
-               notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+               err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
                                                NOTIFY_EXISTS | flags);
                goto next;
        }
@@ -4889,7 +4884,10 @@ next:
                cb->args[4] = 0;
        }
 out:
-       return skb->len;
+       if (err)
+               return err;
+       else
+               return skb->len;
 }
 
 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
index 08da922f81d1d75c69552dd07207108a8cd225e8..6762be53f40937b60e95f86dca9c904a47c0a7ba 100644 (file)
@@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
        struct drbd_device *device = peer_device->device;
        struct drbd_peer_request *peer_req;
        struct page *page = NULL;
-       unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+       unsigned int nr_pages = PFN_UP(payload_size);
 
        if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
                return NULL;
@@ -1511,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
 {
        struct block_device *bdev = device->ldev->backing_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
        sector_t tmp, nr;
        unsigned int max_discard_sectors, granularity;
        int alignment;
@@ -1521,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                goto zero_out;
 
        /* Zero-sector (unknown) and one-sector granularities are the same.  */
-       granularity = max(q->limits.discard_granularity >> 9, 1U);
+       granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
        alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
 
-       max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+       max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
        max_discard_sectors -= max_discard_sectors % granularity;
        if (unlikely(!max_discard_sectors))
                goto zero_out;
@@ -1548,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                start = tmp;
        }
        while (nr_sectors >= max_discard_sectors) {
-               err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+               err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
+                                           GFP_NOIO);
                nr_sectors -= max_discard_sectors;
                start += max_discard_sectors;
        }
@@ -1560,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
                nr = nr_sectors;
                nr -= (unsigned int)nr % granularity;
                if (nr) {
-                       err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+                       err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
                        nr_sectors -= nr;
                        start += nr;
                }
@@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
 
 static bool can_do_reliable_discards(struct drbd_device *device)
 {
-       struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
        struct disk_conf *dc;
        bool can_do;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
                return false;
 
        rcu_read_lock();
@@ -1629,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
        struct bio *bio;
        struct page *page = peer_req->pages;
        sector_t sector = peer_req->i.sector;
-       unsigned data_size = peer_req->i.size;
-       unsigned n_bios = 0;
-       unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+       unsigned int data_size = peer_req->i.size;
+       unsigned int n_bios = 0;
+       unsigned int nr_pages = PFN_UP(data_size);
 
        /* TRIM/DISCARD: for now, always use the helper function
         * blkdev_issue_zeroout(..., discard=true).
@@ -3751,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
                drbd_info(connection, "peer data-integrity-alg: %s\n",
                          integrity_alg[0] ? integrity_alg : "(none)");
 
-       synchronize_rcu();
-       kfree(old_net_conf);
+       kvfree_rcu(old_net_conf);
        return 0;
 
 disconnect_rcu_unlock:
@@ -3903,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
                                drbd_err(device, "verify-alg of wrong size, "
                                        "peer wants %u, accepting only up to %u byte\n",
                                        data_size, SHARED_SECRET_MAX);
-                               err = -EIO;
                                goto reconnect;
                        }
 
@@ -4121,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
                        rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
                        mutex_unlock(&connection->resource->conf_update);
-                       synchronize_rcu();
-                       kfree(old_disk_conf);
+                       kvfree_rcu(old_disk_conf);
 
                        drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
                                 (unsigned long)p_usize, (unsigned long)my_usize);
index 75be0e16770a090f99e1573140520a30651b54ff..e64bcfba30ef371aa075ca94e05bda2a439e2ec8 100644 (file)
@@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
 
        switch (rbm) {
        case RB_CONGESTED_REMOTE:
-               return 0;
+               return false;
        case RB_LEAST_PENDING:
                return atomic_read(&device->local_cnt) >
                        atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
index b8a27818ab3f83ae1139a30009c0b5e299a3f0b0..3f7bf9f2d874c71ab771c4d0ff594a695df20815 100644 (file)
@@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
        return rv;
 }
 
-void notify_resource_state_change(struct sk_buff *skb,
+int notify_resource_state_change(struct sk_buff *skb,
                                  unsigned int seq,
                                  struct drbd_resource_state_change *resource_state_change,
                                  enum drbd_notification_type type)
@@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb,
                .res_susp_fen = resource_state_change->susp_fen[NEW],
        };
 
-       notify_resource_state(skb, seq, resource, &resource_info, type);
+       return notify_resource_state(skb, seq, resource, &resource_info, type);
 }
 
-void notify_connection_state_change(struct sk_buff *skb,
+int notify_connection_state_change(struct sk_buff *skb,
                                    unsigned int seq,
                                    struct drbd_connection_state_change *connection_state_change,
                                    enum drbd_notification_type type)
@@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb,
                .conn_role = connection_state_change->peer_role[NEW],
        };
 
-       notify_connection_state(skb, seq, connection, &connection_info, type);
+       return notify_connection_state(skb, seq, connection, &connection_info, type);
 }
 
-void notify_device_state_change(struct sk_buff *skb,
+int notify_device_state_change(struct sk_buff *skb,
                                unsigned int seq,
                                struct drbd_device_state_change *device_state_change,
                                enum drbd_notification_type type)
@@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb,
                .dev_disk_state = device_state_change->disk_state[NEW],
        };
 
-       notify_device_state(skb, seq, device, &device_info, type);
+       return notify_device_state(skb, seq, device, &device_info, type);
 }
 
-void notify_peer_device_state_change(struct sk_buff *skb,
+int notify_peer_device_state_change(struct sk_buff *skb,
                                     unsigned int seq,
                                     struct drbd_peer_device_state_change *p,
                                     enum drbd_notification_type type)
@@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb,
                .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
        };
 
-       notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+       return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
 }
 
 static void broadcast_state_change(struct drbd_state_change *state_change)
@@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
        struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
        bool resource_state_has_changed;
        unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
-       void (*last_func)(struct sk_buff *, unsigned int, void *,
+       int (*last_func)(struct sk_buff *, unsigned int, void *,
                          enum drbd_notification_type) = NULL;
        void *last_arg = NULL;
 
@@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
                conn_free_crypto(connection);
                mutex_unlock(&connection->resource->conf_update);
 
-               synchronize_rcu();
-               kfree(old_conf);
+               kvfree_rcu(old_conf);
        }
 
        if (ns_max.susp_fen) {
index ba80f612d6abbc185dc9a73ee5e72dbb86932b8f..d5b0479bc9a6649e6bd423d5793861e5fa0022f6 100644 (file)
@@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_
 extern void copy_old_to_new_state_change(struct drbd_state_change *);
 extern void forget_state_change(struct drbd_state_change *);
 
-extern void notify_resource_state_change(struct sk_buff *,
+extern int notify_resource_state_change(struct sk_buff *,
                                         unsigned int,
                                         struct drbd_resource_state_change *,
                                         enum drbd_notification_type type);
-extern void notify_connection_state_change(struct sk_buff *,
+extern int notify_connection_state_change(struct sk_buff *,
                                           unsigned int,
                                           struct drbd_connection_state_change *,
                                           enum drbd_notification_type type);
-extern void notify_device_state_change(struct sk_buff *,
+extern int notify_device_state_change(struct sk_buff *,
                                       unsigned int,
                                       struct drbd_device_state_change *,
                                       enum drbd_notification_type type);
-extern void notify_peer_device_state_change(struct sk_buff *,
+extern int notify_peer_device_state_change(struct sk_buff *,
                                            unsigned int,
                                            struct drbd_peer_device_state_change *,
                                            enum drbd_notification_type type);
index 0f9956f4e9c4233cc82f3e87affba0e2b46e0d9a..af3051dd8912d02e8ed9e69da6a413fdd732be2d 100644 (file)
@@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
 {
        if (drbd_peer_req_has_active_page(peer_req)) {
                /* This might happen if sendpage() has not finished */
-               int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
+               int i = PFN_UP(peer_req->i.size);
                atomic_add(i, &device->pp_in_use_by_net);
                atomic_sub(i, &device->pp_in_use);
                spin_lock_irq(&device->resource->req_lock);
index 8c647532e3ce99fd285216792590dd95257f0f68..015841f50f4e994830836f41f02b106b25db18cc 100644 (file)
@@ -509,8 +509,8 @@ static unsigned long fdc_busy;
 static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
 static DECLARE_WAIT_QUEUE_HEAD(command_done);
 
-/* Errors during formatting are counted here. */
-static int format_errors;
+/* errors encountered on the current (or last) request */
+static int floppy_errors;
 
 /* Format request descriptor. */
 static struct format_descr format_req;
@@ -530,7 +530,6 @@ static struct format_descr format_req;
 static char *floppy_track_buffer;
 static int max_buffer_sectors;
 
-static int *errors;
 typedef void (*done_f)(int);
 static const struct cont_t {
        void (*interrupt)(void);
@@ -1455,7 +1454,7 @@ static int interpret_errors(void)
                        if (drive_params[current_drive].flags & FTD_MSG)
                                DPRINT("Over/Underrun - retrying\n");
                        bad = 0;
-               } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
+               } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) {
                        print_errors();
                }
                if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
@@ -2095,7 +2094,7 @@ static void bad_flp_intr(void)
                if (!next_valid_format(current_drive))
                        return;
        }
-       err_count = ++(*errors);
+       err_count = ++floppy_errors;
        INFBOUND(write_errors[current_drive].badness, err_count);
        if (err_count > drive_params[current_drive].max_errors.abort)
                cont->done(0);
@@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
                return -EINVAL;
        }
        format_req = *tmp_format_req;
-       format_errors = 0;
        cont = &format_cont;
-       errors = &format_errors;
+       floppy_errors = 0;
        ret = wait_til_done(redo_format, true);
        if (ret == -EINTR)
                return -EINTR;
@@ -2759,10 +2757,11 @@ static int set_next_request(void)
        current_req = list_first_entry_or_null(&floppy_reqs, struct request,
                                               queuelist);
        if (current_req) {
-               current_req->error_count = 0;
+               floppy_errors = 0;
                list_del_init(&current_req->queuelist);
+               return 1;
        }
-       return current_req != NULL;
+       return 0;
 }
 
 /* Starts or continues processing request. Will automatically unlock the
@@ -2821,7 +2820,6 @@ do_request:
                _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
        } else
                probing = 0;
-       errors = &(current_req->error_count);
        tmp = make_raw_rw_request();
        if (tmp < 2) {
                request_done(tmp);
@@ -2982,6 +2980,8 @@ static const char *drive_name(int type, int drive)
                return "(null)";
 }
 
+#ifdef CONFIG_BLK_DEV_FD_RAWCMD
+
 /* raw commands */
 static void raw_cmd_done(int flag)
 {
@@ -3181,6 +3181,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
        return ret;
 }
 
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+                               void __user *param)
+{
+       int ret;
+
+       pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n");
+
+       if (type)
+               return -EINVAL;
+       if (lock_fdc(drive))
+               return -EINTR;
+       set_floppy(drive);
+       ret = raw_cmd_ioctl(cmd, param);
+       if (ret == -EINTR)
+               return -EINTR;
+       process_fd_request();
+       return ret;
+}
+
+#else /* CONFIG_BLK_DEV_FD_RAWCMD */
+
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+                               void __user *param)
+{
+       return -EOPNOTSUPP;
+}
+
+#endif
+
 static int invalidate_drive(struct block_device *bdev)
 {
        /* invalidate the buffer track to force a reread */
@@ -3369,7 +3398,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 {
        int drive = (long)bdev->bd_disk->private_data;
        int type = ITYPE(drive_state[drive].fd_device);
-       int i;
        int ret;
        int size;
        union inparam {
@@ -3520,16 +3548,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                outparam = &write_errors[drive];
                break;
        case FDRAWCMD:
-               if (type)
-                       return -EINVAL;
-               if (lock_fdc(drive))
-                       return -EINTR;
-               set_floppy(drive);
-               i = raw_cmd_ioctl(cmd, (void __user *)param);
-               if (i == -EINTR)
-                       return -EINTR;
-               process_fd_request();
-               return i;
+               return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param);
        case FDTWADDLE:
                if (lock_fdc(drive))
                        return -EINTR;
index a58595f5ee2c8f450a03b651fd6650ddc79f5e0a..e2cb51810e89aa7455ff533d11fd32cf449e06e4 100644 (file)
@@ -1,54 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
- *  linux/drivers/block/loop.c
- *
- *  Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations write_begin is not available on the backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
+ * Copyright 1993 by Theodore Ts'o.
  */
-
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
@@ -59,7 +12,6 @@
 #include <linux/errno.h>
 #include <linux/major.h>
 #include <linux/wait.h>
-#include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/init.h>
 #include <linux/swap.h>
 #include <linux/blk-cgroup.h>
 #include <linux/sched/mm.h>
 #include <linux/statfs.h>
+#include <linux/uaccess.h>
+#include <linux/blk-mq.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/loop.h>
+
+/* Possible states of device */
+enum {
+       Lo_unbound,
+       Lo_bound,
+       Lo_rundown,
+       Lo_deleting,
+};
 
-#include "loop.h"
+struct loop_func_table;
+
+struct loop_device {
+       int             lo_number;
+       loff_t          lo_offset;
+       loff_t          lo_sizelimit;
+       int             lo_flags;
+       char            lo_file_name[LO_NAME_SIZE];
+
+       struct file *   lo_backing_file;
+       struct block_device *lo_device;
+
+       gfp_t           old_gfp_mask;
+
+       spinlock_t              lo_lock;
+       int                     lo_state;
+       spinlock_t              lo_work_lock;
+       struct workqueue_struct *workqueue;
+       struct work_struct      rootcg_work;
+       struct list_head        rootcg_cmd_list;
+       struct list_head        idle_worker_list;
+       struct rb_root          worker_tree;
+       struct timer_list       timer;
+       bool                    use_dio;
+       bool                    sysfs_inited;
+
+       struct request_queue    *lo_queue;
+       struct blk_mq_tag_set   tag_set;
+       struct gendisk          *lo_disk;
+       struct mutex            lo_mutex;
+       bool                    idr_visible;
+};
 
-#include <linux/uaccess.h>
+struct loop_cmd {
+       struct list_head list_entry;
+       bool use_aio; /* use AIO interface to handle I/O */
+       atomic_t ref; /* only for aio */
+       long ret;
+       struct kiocb iocb;
+       struct bio_vec *bvec;
+       struct cgroup_subsys_state *blkcg_css;
+       struct cgroup_subsys_state *memcg_css;
+};
 
 #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
 #define LOOP_DEFAULT_HW_Q_DEPTH (128)
@@ -314,15 +318,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
 
        mode |= FALLOC_FL_KEEP_SIZE;
 
-       if (!blk_queue_discard(lo->lo_queue)) {
-               ret = -EOPNOTSUPP;
-               goto out;
-       }
+       if (!bdev_max_discard_sectors(lo->lo_device))
+               return -EOPNOTSUPP;
 
        ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
        if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
-               ret = -EIO;
- out:
+               return -EIO;
        return ret;
 }
 
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
        if (!file)
                return -EBADF;
+
+       /* suppress uevents while reconfiguring the device */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
        is_loop = is_loop_device(file);
        error = loop_global_lock_killable(lo, is_loop);
        if (error)
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
        fput(old_file);
        if (partscan)
                loop_reread_partitions(lo);
-       return 0;
+
+       error = 0;
+done:
+       /* enable and uncork uevent now that we are done */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+       return error;
 
 out_err:
        loop_global_unlock(lo, is_loop);
 out_putf:
        fput(file);
-       return error;
+       goto done;
 }
 
 /* loop sysfs attributes */
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
                struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
 
                max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
-               granularity = backingq->limits.discard_granularity ?:
+               granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
                        queue_physical_block_size(backingq);
 
        /*
@@ -787,14 +797,11 @@ static void loop_config_discard(struct loop_device *lo)
                q->limits.discard_granularity = granularity;
                blk_queue_max_discard_sectors(q, max_discard_sectors);
                blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        } else {
                q->limits.discard_granularity = 0;
                blk_queue_max_discard_sectors(q, 0);
                blk_queue_max_write_zeroes_sectors(q, 0);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
        }
-       q->limits.discard_alignment = 0;
 }
 
 struct loop_worker {
@@ -808,8 +815,6 @@ struct loop_worker {
 };
 
 static void loop_workfn(struct work_struct *work);
-static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
 
 #ifdef CONFIG_BLK_CGROUP
 static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -893,6 +898,39 @@ queue_work:
        spin_unlock_irq(&lo->lo_work_lock);
 }
 
+static void loop_set_timer(struct loop_device *lo)
+{
+       timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+       struct loop_worker *pos, *worker;
+
+       spin_lock_irq(&lo->lo_work_lock);
+       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+                               idle_list) {
+               if (!delete_all &&
+                   time_is_after_jiffies(worker->last_ran_at +
+                                         LOOP_IDLE_WORKER_TIMEOUT))
+                       break;
+               list_del(&worker->idle_list);
+               rb_erase(&worker->rb_node, &lo->worker_tree);
+               css_put(worker->blkcg_css);
+               kfree(worker);
+       }
+       if (!list_empty(&lo->idle_worker_list))
+               loop_set_timer(lo);
+       spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+       struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+       return loop_free_idle_workers(lo, false);
+}
+
 static void loop_update_rotational(struct loop_device *lo)
 {
        struct file *file = lo->lo_backing_file;
@@ -903,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
 
        /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
        if (file_bdev)
-               nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+               nonrot = bdev_nonrot(file_bdev);
 
        if (nonrot)
                blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -967,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
        /* This is safe, since we have a reference from open(). */
        __module_get(THIS_MODULE);
 
+       /* suppress uevents while reconfiguring the device */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
        /*
         * If we don't hold exclusive handle for the device, upgrade to it
         * here to avoid changing device under exclusive owner.
@@ -1011,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
            !file->f_op->write_iter)
                lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-       lo->workqueue = alloc_workqueue("loop%d",
-                                       WQ_UNBOUND | WQ_FREEZABLE,
-                                       0,
-                                       lo->lo_number);
        if (!lo->workqueue) {
-               error = -ENOMEM;
-               goto out_unlock;
+               lo->workqueue = alloc_workqueue("loop%d",
+                                               WQ_UNBOUND | WQ_FREEZABLE,
+                                               0, lo->lo_number);
+               if (!lo->workqueue) {
+                       error = -ENOMEM;
+                       goto out_unlock;
+               }
        }
 
        disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
        set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-       INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
-       INIT_LIST_HEAD(&lo->rootcg_cmd_list);
-       INIT_LIST_HEAD(&lo->idle_worker_list);
-       lo->worker_tree = RB_ROOT;
-       timer_setup(&lo->timer, loop_free_idle_workers,
-               TIMER_DEFERRABLE);
        lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
        lo->lo_device = bdev;
        lo->lo_backing_file = file;
@@ -1073,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                loop_reread_partitions(lo);
        if (!(mode & FMODE_EXCL))
                bd_abort_claiming(bdev, loop_configure);
-       return 0;
+
+       error = 0;
+done:
+       /* enable and uncork uevent now that we are done */
+       dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+       return error;
 
 out_unlock:
        loop_global_unlock(lo, is_loop);
@@ -1084,53 +1125,24 @@ out_putf:
        fput(file);
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       return error;
+       goto done;
 }
 
 static void __loop_clr_fd(struct loop_device *lo, bool release)
 {
        struct file *filp;
        gfp_t gfp = lo->old_gfp_mask;
-       struct loop_worker *pos, *worker;
-
-       /*
-        * Flush loop_configure() and loop_change_fd(). It is acceptable for
-        * loop_validate_file() to succeed, for actual clear operation has not
-        * started yet.
-        */
-       mutex_lock(&loop_validate_mutex);
-       mutex_unlock(&loop_validate_mutex);
-       /*
-        * loop_validate_file() now fails because l->lo_state != Lo_bound
-        * became visible.
-        */
-
-       /*
-        * Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
-        * after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
-        * lo->lo_state has changed while waiting for lo->lo_mutex.
-        */
-       mutex_lock(&lo->lo_mutex);
-       BUG_ON(lo->lo_state != Lo_rundown);
-       mutex_unlock(&lo->lo_mutex);
 
        if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
                blk_queue_write_cache(lo->lo_queue, false, false);
 
-       /* freeze request queue during the transition */
-       blk_mq_freeze_queue(lo->lo_queue);
-
-       destroy_workqueue(lo->workqueue);
-       spin_lock_irq(&lo->lo_work_lock);
-       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
-                               idle_list) {
-               list_del(&worker->idle_list);
-               rb_erase(&worker->rb_node, &lo->worker_tree);
-               css_put(worker->blkcg_css);
-               kfree(worker);
-       }
-       spin_unlock_irq(&lo->lo_work_lock);
-       del_timer_sync(&lo->timer);
+       /*
+        * Freeze the request queue when unbinding on a live file descriptor and
+        * thus an open device.  When called from ->release we are guaranteed
+        * that there is no I/O in progress already.
+        */
+       if (!release)
+               blk_mq_freeze_queue(lo->lo_queue);
 
        spin_lock_irq(&lo->lo_lock);
        filp = lo->lo_backing_file;
@@ -1151,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
        mapping_set_gfp_mask(filp->f_mapping, gfp);
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       blk_mq_unfreeze_queue(lo->lo_queue);
+       if (!release)
+               blk_mq_unfreeze_queue(lo->lo_queue);
 
        disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
 
@@ -1202,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
 {
        int err;
 
-       err = mutex_lock_killable(&lo->lo_mutex);
+       /*
+        * Since lo_ioctl() is called without locks held, it is possible that
+        * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
+        *
+        * Therefore, use global lock when setting Lo_rundown state in order to
+        * make sure that loop_validate_file() will fail if the "struct file"
+        * which loop_configure()/loop_change_fd() found via fget() was this
+        * loop device.
+        */
+       err = loop_global_lock_killable(lo, true);
        if (err)
                return err;
        if (lo->lo_state != Lo_bound) {
-               mutex_unlock(&lo->lo_mutex);
+               loop_global_unlock(lo, true);
                return -ENXIO;
        }
        /*
@@ -1219,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
         * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
         * command to fail with EBUSY.
         */
-       if (atomic_read(&lo->lo_refcnt) > 1) {
+       if (disk_openers(lo->lo_disk) > 1) {
                lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-               mutex_unlock(&lo->lo_mutex);
+               loop_global_unlock(lo, true);
                return 0;
        }
        lo->lo_state = Lo_rundown;
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, true);
 
        __loop_clr_fd(lo, false);
        return 0;
@@ -1257,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
        /* I/O need to be drained during transfer transition */
        blk_mq_freeze_queue(lo->lo_queue);
 
-       if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
-               /* If any pages were dirtied after invalidate_bdev(), try again */
-               err = -EAGAIN;
-               pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
-                       __func__, lo->lo_number, lo->lo_file_name,
-                       lo->lo_device->bd_inode->i_mapping->nrpages);
-               goto out_unfreeze;
-       }
-
        prev_lo_flags = lo->lo_flags;
 
        err = loop_set_status_from_info(lo, info);
@@ -1476,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
        invalidate_bdev(lo->lo_device);
 
        blk_mq_freeze_queue(lo->lo_queue);
-
-       /* invalidate_bdev should have truncated all the pages */
-       if (lo->lo_device->bd_inode->i_mapping->nrpages) {
-               err = -EAGAIN;
-               pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
-                       __func__, lo->lo_number, lo->lo_file_name,
-                       lo->lo_device->bd_inode->i_mapping->nrpages);
-               goto out_unfreeze;
-       }
-
        blk_queue_logical_block_size(lo->lo_queue, arg);
        blk_queue_physical_block_size(lo->lo_queue, arg);
        blk_queue_io_min(lo->lo_queue, arg);
        loop_update_dio(lo);
-out_unfreeze:
        blk_mq_unfreeze_queue(lo->lo_queue);
 
        return err;
@@ -1720,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
-       struct loop_device *lo = bdev->bd_disk->private_data;
-       int err;
-
-       err = mutex_lock_killable(&lo->lo_mutex);
-       if (err)
-               return err;
-       if (lo->lo_state == Lo_deleting)
-               err = -ENXIO;
-       else
-               atomic_inc(&lo->lo_refcnt);
-       mutex_unlock(&lo->lo_mutex);
-       return err;
-}
-
 static void lo_release(struct gendisk *disk, fmode_t mode)
 {
        struct loop_device *lo = disk->private_data;
 
-       mutex_lock(&lo->lo_mutex);
-       if (atomic_dec_return(&lo->lo_refcnt))
-               goto out_unlock;
+       if (disk_openers(disk) > 0)
+               return;
 
-       if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
-               if (lo->lo_state != Lo_bound)
-                       goto out_unlock;
+       mutex_lock(&lo->lo_mutex);
+       if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
                lo->lo_state = Lo_rundown;
                mutex_unlock(&lo->lo_mutex);
                /*
@@ -1755,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
                 */
                __loop_clr_fd(lo, true);
                return;
-       } else if (lo->lo_state == Lo_bound) {
-               /*
-                * Otherwise keep thread (if running) and config,
-                * but flush possible ongoing bios in thread.
-                */
-               blk_mq_freeze_queue(lo->lo_queue);
-               blk_mq_unfreeze_queue(lo->lo_queue);
        }
-
-out_unlock:
        mutex_unlock(&lo->lo_mutex);
 }
 
+static void lo_free_disk(struct gendisk *disk)
+{
+       struct loop_device *lo = disk->private_data;
+
+       if (lo->workqueue)
+               destroy_workqueue(lo->workqueue);
+       loop_free_idle_workers(lo, true);
+       del_timer_sync(&lo->timer);
+       mutex_destroy(&lo->lo_mutex);
+       kfree(lo);
+}
+
 static const struct block_device_operations lo_fops = {
        .owner =        THIS_MODULE,
-       .open =         lo_open,
        .release =      lo_release,
        .ioctl =        lo_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = lo_compat_ioctl,
 #endif
+       .free_disk =    lo_free_disk,
 };
 
 /*
@@ -1834,12 +1821,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
        cmd->blkcg_css = NULL;
        cmd->memcg_css = NULL;
 #ifdef CONFIG_BLK_CGROUP
-       if (rq->bio && rq->bio->bi_blkg) {
-               cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+       if (rq->bio) {
+               cmd->blkcg_css = bio_blkcg_css(rq->bio);
 #ifdef CONFIG_MEMCG
-               cmd->memcg_css =
-                       cgroup_get_e_css(cmd->blkcg_css->cgroup,
-                                       &memory_cgrp_subsys);
+               if (cmd->blkcg_css) {
+                       cmd->memcg_css =
+                               cgroup_get_e_css(cmd->blkcg_css->cgroup,
+                                               &memory_cgrp_subsys);
+               }
 #endif
        }
 #endif
@@ -1888,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
        }
 }
 
-static void loop_set_timer(struct loop_device *lo)
-{
-       timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
 static void loop_process_work(struct loop_worker *worker,
                        struct list_head *cmd_list, struct loop_device *lo)
 {
@@ -1941,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
        loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
 }
 
-static void loop_free_idle_workers(struct timer_list *timer)
-{
-       struct loop_device *lo = container_of(timer, struct loop_device, timer);
-       struct loop_worker *pos, *worker;
-
-       spin_lock_irq(&lo->lo_work_lock);
-       list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
-                               idle_list) {
-               if (time_is_after_jiffies(worker->last_ran_at +
-                                               LOOP_IDLE_WORKER_TIMEOUT))
-                       break;
-               list_del(&worker->idle_list);
-               rb_erase(&worker->rb_node, &lo->worker_tree);
-               css_put(worker->blkcg_css);
-               kfree(worker);
-       }
-       if (!list_empty(&lo->idle_worker_list))
-               loop_set_timer(lo);
-       spin_unlock_irq(&lo->lo_work_lock);
-}
-
 static const struct blk_mq_ops loop_mq_ops = {
        .queue_rq       = loop_queue_rq,
        .complete       = lo_complete_rq,
@@ -1977,6 +1940,9 @@ static int loop_add(int i)
        lo = kzalloc(sizeof(*lo), GFP_KERNEL);
        if (!lo)
                goto out;
+       lo->worker_tree = RB_ROOT;
+       INIT_LIST_HEAD(&lo->idle_worker_list);
+       timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
        lo->lo_state = Lo_unbound;
 
        err = mutex_lock_killable(&loop_ctl_mutex);
@@ -2046,11 +2012,12 @@ static int loop_add(int i)
         */
        if (!part_shift)
                disk->flags |= GENHD_FL_NO_PART;
-       atomic_set(&lo->lo_refcnt, 0);
        mutex_init(&lo->lo_mutex);
        lo->lo_number           = i;
        spin_lock_init(&lo->lo_lock);
        spin_lock_init(&lo->lo_work_lock);
+       INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+       INIT_LIST_HEAD(&lo->rootcg_cmd_list);
        disk->major             = LOOP_MAJOR;
        disk->first_minor       = i << part_shift;
        disk->minors            = 1 << part_shift;
@@ -2090,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
 {
        /* Make this loop device unreachable from pathname. */
        del_gendisk(lo->lo_disk);
-       blk_cleanup_disk(lo->lo_disk);
+       blk_cleanup_queue(lo->lo_disk->queue);
        blk_mq_free_tag_set(&lo->tag_set);
 
        mutex_lock(&loop_ctl_mutex);
        idr_remove(&loop_index_idr, lo->lo_number);
        mutex_unlock(&loop_ctl_mutex);
-       /* There is no route which can find this loop device. */
-       mutex_destroy(&lo->lo_mutex);
-       kfree(lo);
+
+       put_disk(lo->lo_disk);
 }
 
 static void loop_probe(dev_t dev)
@@ -2137,13 +2103,12 @@ static int loop_control_remove(int idx)
        ret = mutex_lock_killable(&lo->lo_mutex);
        if (ret)
                goto mark_visible;
-       if (lo->lo_state != Lo_unbound ||
-           atomic_read(&lo->lo_refcnt) > 0) {
+       if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
                mutex_unlock(&lo->lo_mutex);
                ret = -EBUSY;
                goto mark_visible;
        }
-       /* Mark this loop device no longer open()-able. */
+       /* Mark this loop device as no more bound, but not quite unbound yet */
        lo->lo_state = Lo_deleting;
        mutex_unlock(&lo->lo_mutex);
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
deleted file mode 100644 (file)
index 082d4b6..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- */
-#ifndef _LINUX_LOOP_H
-#define _LINUX_LOOP_H
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <uapi/linux/loop.h>
-
-/* Possible states of device */
-enum {
-       Lo_unbound,
-       Lo_bound,
-       Lo_rundown,
-       Lo_deleting,
-};
-
-struct loop_func_table;
-
-struct loop_device {
-       int             lo_number;
-       atomic_t        lo_refcnt;
-       loff_t          lo_offset;
-       loff_t          lo_sizelimit;
-       int             lo_flags;
-       char            lo_file_name[LO_NAME_SIZE];
-
-       struct file *   lo_backing_file;
-       struct block_device *lo_device;
-
-       gfp_t           old_gfp_mask;
-
-       spinlock_t              lo_lock;
-       int                     lo_state;
-       spinlock_t              lo_work_lock;
-       struct workqueue_struct *workqueue;
-       struct work_struct      rootcg_work;
-       struct list_head        rootcg_cmd_list;
-       struct list_head        idle_worker_list;
-       struct rb_root          worker_tree;
-       struct timer_list       timer;
-       bool                    use_dio;
-       bool                    sysfs_inited;
-
-       struct request_queue    *lo_queue;
-       struct blk_mq_tag_set   tag_set;
-       struct gendisk          *lo_disk;
-       struct mutex            lo_mutex;
-       bool                    idr_visible;
-};
-
-struct loop_cmd {
-       struct list_head list_entry;
-       bool use_aio; /* use AIO interface to handle I/O */
-       atomic_t ref; /* only for aio */
-       long ret;
-       struct kiocb iocb;
-       struct bio_vec *bvec;
-       struct cgroup_subsys_state *blkcg_css;
-       struct cgroup_subsys_state *memcg_css;
-};
-
-#endif
index 4fbaf0b4958b7931ef27d0e6c2e2a16a7cbac827..27386a572ba490c3417489e1d3fb279543fbaed3 100644 (file)
@@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
 {
        struct mtip_port *port = dd->port;
 
-       /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+       /* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
        port->block1 =
                dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
                                        &port->block1_dma, GFP_KERNEL);
index 5a1f98494dddf9e24cc5d92d6b8f0ae618c5c516..ac8b045c777c00b5889a650e7287274a1742b5b1 100644 (file)
@@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
 
        if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
                nbd->disk->queue->limits.discard_granularity = blksize;
-               nbd->disk->queue->limits.discard_alignment = blksize;
                blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
        }
        blk_queue_logical_block_size(nbd->disk->queue, blksize);
@@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
        struct nbd_config *config = nbd->config;
        if (!config->dead_conn_timeout)
                return 0;
-       if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
+
+       if (!wait_event_timeout(config->conn_wait,
+                               test_bit(NBD_RT_DISCONNECTED,
+                                        &config->runtime_flags) ||
+                               atomic_read(&config->live_connections) > 0,
+                               config->dead_conn_timeout))
                return 0;
-       return wait_event_timeout(config->conn_wait,
-                                 atomic_read(&config->live_connections) > 0,
-                                 config->dead_conn_timeout) > 0;
+
+       return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
 }
 
 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
        return -ENOSPC;
 }
 
-static void nbd_bdev_reset(struct block_device *bdev)
+static void nbd_bdev_reset(struct nbd_device *nbd)
 {
-       if (bdev->bd_openers > 1)
+       if (disk_openers(nbd->disk) > 1)
                return;
-       set_capacity(bdev->bd_disk, 0);
+       set_capacity(nbd->disk, 0);
 }
 
 static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1231,8 +1234,6 @@ static void nbd_parse_flags(struct nbd_device *nbd)
                set_disk_ro(nbd->disk, true);
        else
                set_disk_ro(nbd->disk, false);
-       if (config->flags & NBD_FLAG_SEND_TRIM)
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
        if (config->flags & NBD_FLAG_SEND_FLUSH) {
                if (config->flags & NBD_FLAG_SEND_FUA)
                        blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1318,9 +1319,7 @@ static void nbd_config_put(struct nbd_device *nbd)
 
                nbd->tag_set.timeout = 0;
                nbd->disk->queue->limits.discard_granularity = 0;
-               nbd->disk->queue->limits.discard_alignment = 0;
-               blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+               blk_queue_max_discard_sectors(nbd->disk->queue, 0);
 
                mutex_unlock(&nbd->config_lock);
                nbd_put(nbd);
@@ -1389,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
        return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
 }
 
-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device_ioctl(struct nbd_device *nbd)
 {
        struct nbd_config *config = nbd->config;
        int ret;
@@ -1408,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
        flush_workqueue(nbd->recv_workq);
 
        mutex_lock(&nbd->config_lock);
-       nbd_bdev_reset(bdev);
+       nbd_bdev_reset(nbd);
        /* user requested, ignore socket errors */
        if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
                ret = 0;
@@ -1422,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
 {
        sock_shutdown(nbd);
        __invalidate_device(bdev, true);
-       nbd_bdev_reset(bdev);
+       nbd_bdev_reset(nbd);
        if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
@@ -1468,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                config->flags = arg;
                return 0;
        case NBD_DO_IT:
-               return nbd_start_device_ioctl(nbd, bdev);
+               return nbd_start_device_ioctl(nbd);
        case NBD_CLEAR_QUE:
                /*
                 * This is for compatibility only.  The queue is always cleared
@@ -1579,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
        struct nbd_device *nbd = disk->private_data;
 
        if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
-                       disk->part0->bd_openers == 0)
+                       disk_openers(disk) == 0)
                nbd_disconnect_and_put(nbd);
 
        nbd_config_put(nbd);
@@ -1784,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
        blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
        disk->queue->limits.discard_granularity = 0;
-       disk->queue->limits.discard_alignment = 0;
        blk_queue_max_discard_sectors(disk->queue, 0);
        blk_queue_max_segment_size(disk->queue, UINT_MAX);
        blk_queue_max_segments(disk->queue, USHRT_MAX);
@@ -2082,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
        mutex_lock(&nbd->config_lock);
        nbd_disconnect(nbd);
        sock_shutdown(nbd);
+       wake_up(&nbd->config->conn_wait);
        /*
         * Make sure recv thread has finished, we can safely call nbd_clear_que()
         * to cancel the inflight I/Os.
index 05b1120e66234d71ba18ba06428cd594c01ffe81..539cfeac263dbe1f49e0a7c1f9f77037554b65cc 100644 (file)
@@ -11,6 +11,9 @@
 #include <linux/init.h>
 #include "null_blk.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "null_blk: " fmt
+
 #define FREE_BATCH             16
 
 #define TICKS_PER_SEC          50ULL
@@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
 static void null_free_dev(struct nullb_device *dev);
 static void null_del_dev(struct nullb *nullb);
 static int null_add_dev(struct nullb_device *dev);
+static struct nullb *null_find_dev_by_name(const char *name);
 static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
 
 static inline struct nullb_device *to_nullb_device(struct config_item *item)
@@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
 {
        struct nullb_device *dev;
 
+       if (null_find_dev_by_name(name))
+               return ERR_PTR(-EEXIST);
+
        dev = null_alloc_dev();
        if (!dev)
                return ERR_PTR(-ENOMEM);
@@ -1600,7 +1607,7 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
         * Only fake timeouts need to execute blk_mq_complete_request() here.
         */
        cmd->error = BLK_STS_TIMEOUT;
-       if (cmd->fake_timeout)
+       if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
                blk_mq_complete_request(rq);
        return BLK_EH_DONE;
 }
@@ -1765,9 +1772,7 @@ static void null_config_discard(struct nullb *nullb)
        }
 
        nullb->q->limits.discard_granularity = nullb->dev->blocksize;
-       nullb->q->limits.discard_alignment = nullb->dev->blocksize;
        blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
 }
 
 static const struct block_device_operations null_bio_ops = {
@@ -2061,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
 
        null_config_discard(nullb);
 
-       sprintf(nullb->disk_name, "nullb%d", nullb->index);
+       if (config_item_name(&dev->item)) {
+               /* Use configfs dir name as the device name */
+               snprintf(nullb->disk_name, sizeof(nullb->disk_name),
+                        "%s", config_item_name(&dev->item));
+       } else {
+               sprintf(nullb->disk_name, "nullb%d", nullb->index);
+       }
 
        rv = null_gendisk_register(nullb);
        if (rv)
@@ -2071,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
        list_add_tail(&nullb->list, &nullb_list);
        mutex_unlock(&lock);
 
+       pr_info("disk %s created\n", nullb->disk_name);
+
        return 0;
 out_cleanup_zone:
        null_free_zoned_dev(dev);
@@ -2088,12 +2101,53 @@ out:
        return rv;
 }
 
+static struct nullb *null_find_dev_by_name(const char *name)
+{
+       struct nullb *nullb = NULL, *nb;
+
+       mutex_lock(&lock);
+       list_for_each_entry(nb, &nullb_list, list) {
+               if (strcmp(nb->disk_name, name) == 0) {
+                       nullb = nb;
+                       break;
+               }
+       }
+       mutex_unlock(&lock);
+
+       return nullb;
+}
+
+static int null_create_dev(void)
+{
+       struct nullb_device *dev;
+       int ret;
+
+       dev = null_alloc_dev();
+       if (!dev)
+               return -ENOMEM;
+
+       ret = null_add_dev(dev);
+       if (ret) {
+               null_free_dev(dev);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void null_destroy_dev(struct nullb *nullb)
+{
+       struct nullb_device *dev = nullb->dev;
+
+       null_del_dev(nullb);
+       null_free_dev(dev);
+}
+
 static int __init null_init(void)
 {
        int ret = 0;
        unsigned int i;
        struct nullb *nullb;
-       struct nullb_device *dev;
 
        if (g_bs > PAGE_SIZE) {
                pr_warn("invalid block size\n");
@@ -2113,19 +2167,21 @@ static int __init null_init(void)
        }
 
        if (g_queue_mode == NULL_Q_RQ) {
-               pr_err("legacy IO path no longer available\n");
+               pr_err("legacy IO path is no longer available\n");
                return -EINVAL;
        }
+
        if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
                if (g_submit_queues != nr_online_nodes) {
                        pr_warn("submit_queues param is set to %u.\n",
-                                                       nr_online_nodes);
+                               nr_online_nodes);
                        g_submit_queues = nr_online_nodes;
                }
-       } else if (g_submit_queues > nr_cpu_ids)
+       } else if (g_submit_queues > nr_cpu_ids) {
                g_submit_queues = nr_cpu_ids;
-       else if (g_submit_queues <= 0)
+       } else if (g_submit_queues <= 0) {
                g_submit_queues = 1;
+       }
 
        if (g_queue_mode == NULL_Q_MQ && shared_tags) {
                ret = null_init_tag_set(NULL, &tag_set);
@@ -2149,16 +2205,9 @@ static int __init null_init(void)
        }
 
        for (i = 0; i < nr_devices; i++) {
-               dev = null_alloc_dev();
-               if (!dev) {
-                       ret = -ENOMEM;
-                       goto err_dev;
-               }
-               ret = null_add_dev(dev);
-               if (ret) {
-                       null_free_dev(dev);
+               ret = null_create_dev();
+               if (ret)
                        goto err_dev;
-               }
        }
 
        pr_info("module loaded\n");
@@ -2167,9 +2216,7 @@ static int __init null_init(void)
 err_dev:
        while (!list_empty(&nullb_list)) {
                nullb = list_entry(nullb_list.next, struct nullb, list);
-               dev = nullb->dev;
-               null_del_dev(nullb);
-               null_free_dev(dev);
+               null_destroy_dev(nullb);
        }
        unregister_blkdev(null_major, "nullb");
 err_conf:
@@ -2190,12 +2237,8 @@ static void __exit null_exit(void)
 
        mutex_lock(&lock);
        while (!list_empty(&nullb_list)) {
-               struct nullb_device *dev;
-
                nullb = list_entry(nullb_list.next, struct nullb, list);
-               dev = nullb->dev;
-               null_del_dev(nullb);
-               null_free_dev(dev);
+               null_destroy_dev(nullb);
        }
        mutex_unlock(&lock);
 
index 78eb56b0ca55f5b0dd6917983ec0acbaa54f1255..4525a65e1b23d91bd6a756901c0d6d5064b99558 100644 (file)
 #include <linux/mutex.h>
 
 struct nullb_cmd {
-       struct request *rq;
-       struct bio *bio;
+       union {
+               struct request *rq;
+               struct bio *bio;
+       };
        unsigned int tag;
        blk_status_t error;
+       bool fake_timeout;
        struct nullb_queue *nq;
        struct hrtimer timer;
-       bool fake_timeout;
 };
 
 struct nullb_queue {
index dae54dd1aeac31df0a4757a1c52fd1df70faeadf..ed158ea4fdd1adf5f5abde17d5e7f216e9e0ca20 100644 (file)
@@ -6,6 +6,9 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "null_blk: " fmt
+
 static inline sector_t mb_to_sects(unsigned long mb)
 {
        return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
                dev->zone_capacity = dev->zone_size;
 
        if (dev->zone_capacity > dev->zone_size) {
-               pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
-                                       dev->zone_capacity, dev->zone_size);
+               pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+                      dev->zone_capacity, dev->zone_size);
                return -EINVAL;
        }
 
index 86c8794ede415c0476f6eaddf32c63c2c946fb0e..789093375344310e882c4314cdecd0bac1637d7a 100644 (file)
@@ -12,7 +12,7 @@
  * Theory of operation:
  *
  * At the lowest level, there is the standard driver for the CD/DVD device,
- * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
  * but it doesn't know anything about the special restrictions that apply to
  * packet writing. One restriction is that write requests must be aligned to
  * packet boundaries on the physical media, and the size of a write request
@@ -522,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
                goto no_pkt;
 
        pkt->frames = frames;
-       pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
+       pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
        if (!pkt->w_bio)
                goto no_bio;
 
@@ -536,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
        bio_list_init(&pkt->orig_bios);
 
        for (i = 0; i < frames; i++) {
-               struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
-               if (!bio)
+               pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+               if (!pkt->r_bios[i])
                        goto no_rd_bio;
-
-               pkt->r_bios[i] = bio;
        }
 
        return pkt;
 
 no_rd_bio:
-       for (i = 0; i < frames; i++) {
-               struct bio *bio = pkt->r_bios[i];
-               if (bio)
-                       bio_put(bio);
-       }
-
+       for (i = 0; i < frames; i++)
+               kfree(pkt->r_bios[i]);
 no_page:
        for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
                if (pkt->pages[i])
                        __free_page(pkt->pages[i]);
-       bio_put(pkt->w_bio);
+       kfree(pkt->w_bio);
 no_bio:
        kfree(pkt);
 no_pkt:
@@ -570,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt)
 {
        int i;
 
-       for (i = 0; i < pkt->frames; i++) {
-               struct bio *bio = pkt->r_bios[i];
-               if (bio)
-                       bio_put(bio);
-       }
+       for (i = 0; i < pkt->frames; i++)
+               kfree(pkt->r_bios[i]);
        for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
                __free_page(pkt->pages[i]);
-       bio_put(pkt->w_bio);
+       kfree(pkt->w_bio);
        kfree(pkt);
 }
 
@@ -951,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio)
 
        if (bio->bi_status)
                atomic_inc(&pkt->io_errors);
+       bio_uninit(bio);
        if (atomic_dec_and_test(&pkt->io_wait)) {
                atomic_inc(&pkt->run_sm);
                wake_up(&pd->wqueue);
@@ -968,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 
        pd->stats.pkt_ended++;
 
+       bio_uninit(bio);
        pkt_bio_finished(pd);
        atomic_dec(&pkt->io_wait);
        atomic_inc(&pkt->run_sm);
@@ -1022,7 +1015,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
                        continue;
 
                bio = pkt->r_bios[f];
-               bio_reset(bio, pd->bdev, REQ_OP_READ);
+               bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
                bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
                bio->bi_end_io = pkt_end_io_read;
                bio->bi_private = pkt;
@@ -1235,7 +1228,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
        int f;
 
-       bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
+       bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+                REQ_OP_WRITE);
        pkt->w_bio->bi_iter.bi_sector = pkt->sector;
        pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
        pkt->w_bio->bi_private = pkt;
index b844432bad20b4b07b7fdc89424ee0095428ec28..2b21f717cce1a71d6a2f0fc2dd912b8958fae5d4 100644 (file)
@@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
 
        if (rbd_dev->opts->trim) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
                q->limits.discard_granularity = rbd_dev->opts->alloc_size;
                blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
                blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
index b66e8840b94b88400e0cba0ea1af01b815a390e3..409c76b81aed4613f57cb3f5cb14d4db3a331ae3 100644 (file)
@@ -25,6 +25,7 @@ static int rnbd_client_major;
 static DEFINE_IDA(index_ida);
 static DEFINE_MUTEX(sess_lock);
 static LIST_HEAD(sess_list);
+static struct workqueue_struct *rnbd_clt_wq;
 
 /*
  * Maximum number of partitions an instance can have.
@@ -1364,11 +1365,9 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
        blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
        dev->queue->limits.discard_granularity  = dev->discard_granularity;
        dev->queue->limits.discard_alignment    = dev->discard_alignment;
-       if (dev->max_discard_sectors)
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue);
        if (dev->secure_discard)
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue);
-
+               blk_queue_max_secure_erase_sectors(dev->queue,
+                               dev->max_discard_sectors);
        blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
        blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
        blk_queue_max_segments(dev->queue, dev->max_segments);
@@ -1761,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
                         * procedure takes minutes.
                         */
                        INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
-                       queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
+                       queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
                }
                rnbd_clt_put_sess(sess);
        }
        /* Wait for all scheduled unmap works */
-       flush_workqueue(system_long_wq);
+       flush_workqueue(rnbd_clt_wq);
        WARN_ON(!list_empty(&sess_list));
 }
 
@@ -1791,6 +1790,14 @@ static int __init rnbd_client_init(void)
                pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
                       err);
                unregister_blkdev(rnbd_client_major, "rnbd");
+               return err;
+       }
+       rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
+       if (!rnbd_clt_wq) {
+               pr_err("Failed to load module, alloc_workqueue failed.\n");
+               rnbd_clt_destroy_sysfs_files();
+               unregister_blkdev(rnbd_client_major, "rnbd");
+               err = -ENOMEM;
        }
 
        return err;
@@ -1801,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
        rnbd_destroy_sessions();
        unregister_blkdev(rnbd_client_major, "rnbd");
        ida_destroy(&index_ida);
+       destroy_workqueue(rnbd_clt_wq);
 }
 
 module_init(rnbd_client_init);
index 2c3df02b5e8ec3aa96bd34f32aa29e9a94570607..4309e52524691b9df4df0399d99a0d98060f53fc 100644 (file)
@@ -44,16 +44,12 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
 
 static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
 {
-       return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
+       return bdev_max_secure_erase_sectors(dev->bdev);
 }
 
 static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
 {
-       if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
-               return 0;
-
-       return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
-                                        REQ_OP_DISCARD);
+       return bdev_max_discard_sectors(dev->bdev);
 }
 
 static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
@@ -63,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
 
 static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
 {
-       return bdev_get_queue(dev->bdev)->limits.discard_alignment;
+       return bdev_discard_alignment(dev->bdev);
 }
 
 #endif /* RNBD_SRV_DEV_H */
index f04df6294650b6f31b4009eb790b2ace72427b6d..beaef43a67b9deee8a6bbd7303de602f8be6939f 100644 (file)
@@ -533,7 +533,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
                                        struct rnbd_srv_sess_dev *sess_dev)
 {
        struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
-       struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
 
        rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
        rsp->device_id =
@@ -558,9 +557,9 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
        rsp->secure_discard =
                cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
        rsp->cache_policy = 0;
-       if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (bdev_write_cache(rnbd_dev->bdev))
                rsp->cache_policy |= RNBD_WRITEBACK;
-       if (blk_queue_fua(q))
+       if (bdev_fua(rnbd_dev->bdev))
                rsp->cache_policy |= RNBD_FUA;
 }
 
index a8bcf3f664af1526be4e5cb2914d0d1771a9c1dd..d624cc8eddc3c766eca2546aaff883a6f6cdf5d2 100644 (file)
@@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
                blk_queue_io_opt(q, blk_size * opt_io_size);
 
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
-               q->limits.discard_granularity = blk_size;
-
                virtio_cread(vdev, struct virtio_blk_config,
                             discard_sector_alignment, &v);
-               q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+               if (v)
+                       q->limits.discard_granularity = v << SECTOR_SHIFT;
+               else
+                       q->limits.discard_granularity = blk_size;
 
                virtio_cread(vdev, struct virtio_blk_config,
                             max_discard_sectors, &v);
@@ -888,8 +889,6 @@ static int virtblk_probe(struct virtio_device *vdev)
                        v = sg_elems;
                blk_queue_max_discard_segments(q,
                                               min(v, MAX_DISCARD_SEGMENTS));
-
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        }
 
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
index de42458195bc1c8990faf8477168ea3b0d604e20..a97f2bf5b01b96febefd0c5637897fd744eb2918 100644 (file)
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
        int status = BLKIF_RSP_OKAY;
        struct xen_blkif *blkif = ring->blkif;
        struct block_device *bdev = blkif->vbd.bdev;
-       unsigned long secure;
        struct phys_req preq;
 
        xen_blkif_get(blkif);
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
        }
        ring->st_ds_req++;
 
-       secure = (blkif->vbd.discard_secure &&
-                (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
-                BLKDEV_DISCARD_SECURE : 0;
+       if (blkif->vbd.discard_secure &&
+           (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+               err = blkdev_issue_secure_erase(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors, GFP_KERNEL);
+       else
+               err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+                               req->u.discard.nr_sectors, GFP_KERNEL);
 
-       err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
-                                  req->u.discard.nr_sectors,
-                                  GFP_KERNEL, secure);
 fail_response:
        if (err == -EOPNOTSUPP) {
                pr_debug("discard op failed, not supported\n");
index f09040435e2e541730e05748e08622ad57d0cbc1..97de13b14175eb8ced14d1649a3be461a464ee8a 100644 (file)
@@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 {
        struct xen_vbd *vbd;
        struct block_device *bdev;
-       struct request_queue *q;
 
        vbd = &blkif->vbd;
        vbd->handle   = handle;
@@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
        if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
                vbd->type |= VDISK_REMOVABLE;
 
-       q = bdev_get_queue(bdev);
-       if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (bdev_write_cache(bdev))
                vbd->flush_support = true;
-
-       if (q && blk_queue_secure_erase(q))
+       if (bdev_max_secure_erase_sectors(bdev))
                vbd->discard_secure = true;
 
        vbd->feature_gnt_persistent = feature_persistent;
@@ -578,22 +575,21 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
        int err;
        int state = 0;
        struct block_device *bdev = be->blkif->vbd.bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
 
        if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
                return;
 
-       if (blk_queue_discard(q)) {
+       if (bdev_max_discard_sectors(bdev)) {
                err = xenbus_printf(xbt, dev->nodename,
                        "discard-granularity", "%u",
-                       q->limits.discard_granularity);
+                       bdev_discard_granularity(bdev));
                if (err) {
                        dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
                        return;
                }
                err = xenbus_printf(xbt, dev->nodename,
                        "discard-alignment", "%u",
-                       q->limits.discard_alignment);
+                       bdev_discard_alignment(bdev));
                if (err) {
                        dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
                        return;
index 003056d4f7f5f078b9dfda5c48833cc2a125a9a8..0f3f5238f7bce76066d310d29df3379ab0dded44 100644 (file)
@@ -944,13 +944,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
        blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
 
        if (info->feature_discard) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
                blk_queue_max_discard_sectors(rq, get_capacity(gd));
                rq->limits.discard_granularity = info->discard_granularity ?:
                                                 info->physical_sector_size;
                rq->limits.discard_alignment = info->discard_alignment;
                if (info->feature_secdiscard)
-                       blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+                       blk_queue_max_secure_erase_sectors(rq,
+                                                          get_capacity(gd));
        }
 
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1606,8 +1606,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                blkif_req(req)->error = BLK_STS_NOTSUPP;
                                info->feature_discard = 0;
                                info->feature_secdiscard = 0;
-                               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
-                               blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+                               blk_queue_max_discard_sectors(rq, 0);
+                               blk_queue_max_secure_erase_sectors(rq, 0);
                        }
                        break;
                case BLKIF_OP_FLUSH_DISKCACHE:
index e9474b02012deb758f55df23859a986072a8dd98..6853dd3c7d3a2e4c2d51d75baf1f688ccdca155c 100644 (file)
@@ -1675,9 +1675,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
        bv.bv_len = PAGE_SIZE;
        bv.bv_offset = 0;
 
-       start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
+       start_time = bdev_start_io_acct(bdev->bd_disk->part0,
+                       SECTORS_PER_PAGE, op, jiffies);
        ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
-       disk_end_io_acct(bdev->bd_disk, op, start_time);
+       bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
 out:
        /*
         * If I/O fails, just return error(ie, non-zero) without
@@ -1786,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
        int ret;
        unsigned short do_reset;
        struct zram *zram;
-       struct block_device *bdev;
+       struct gendisk *disk;
 
        ret = kstrtou16(buf, 10, &do_reset);
        if (ret)
@@ -1796,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
                return -EINVAL;
 
        zram = dev_to_zram(dev);
-       bdev = zram->disk->part0;
+       disk = zram->disk;
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&disk->open_mutex);
        /* Do not reset an active device or claimed device */
-       if (bdev->bd_openers || zram->claim) {
-               mutex_unlock(&bdev->bd_disk->open_mutex);
+       if (disk_openers(disk) || zram->claim) {
+               mutex_unlock(&disk->open_mutex);
                return -EBUSY;
        }
 
        /* From now on, anyone can't open /dev/zram[0-9] */
        zram->claim = true;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&disk->open_mutex);
 
        /* Make sure all the pending I/O are finished */
-       sync_blockdev(bdev);
+       sync_blockdev(disk->part0);
        zram_reset_device(zram);
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&disk->open_mutex);
        zram->claim = false;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&disk->open_mutex);
 
        return len;
 }
@@ -1952,7 +1953,6 @@ static int zram_add(void)
        blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
        zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
        blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
        /*
         * zram_bio_discard() will clear all logical blocks if logical block
@@ -1987,19 +1987,18 @@ out_free_dev:
 
 static int zram_remove(struct zram *zram)
 {
-       struct block_device *bdev = zram->disk->part0;
        bool claimed;
 
-       mutex_lock(&bdev->bd_disk->open_mutex);
-       if (bdev->bd_openers) {
-               mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_lock(&zram->disk->open_mutex);
+       if (disk_openers(zram->disk)) {
+               mutex_unlock(&zram->disk->open_mutex);
                return -EBUSY;
        }
 
        claimed = zram->claim;
        if (!claimed)
                zram->claim = true;
-       mutex_unlock(&bdev->bd_disk->open_mutex);
+       mutex_unlock(&zram->disk->open_mutex);
 
        zram_debugfs_unregister(zram);
 
@@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
                ;
        } else {
                /* Make sure all the pending I/O are finished */
-               sync_blockdev(bdev);
+               sync_blockdev(zram->disk->part0);
                zram_reset_device(zram);
        }
 
index 5e0e4393ce4d4620af4d8278a74429cbf939ea59..0cfe859a4ac4d02cfadb76d14f72f097808d233e 100644 (file)
@@ -224,8 +224,12 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev,  unsigned int irq_count)
        if (error)
                return error;
 
+       msi_lock_descs(dev);
        if (msi_first_desc(dev, MSI_DESC_ALL))
-               return -EINVAL;
+               error = -EINVAL;
+       msi_unlock_descs(dev);
+       if (error)
+               return error;
 
        /*
         * NOTE: Calling this function will trigger the invocation of the
index 60fbd42041dd32a01385e41114bc60182438e25a..828c66bbaa6765b07919ed61802e3a111b128493 100644 (file)
@@ -352,8 +352,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
 
                pdev = of_find_device_by_node(rd->dn);
                if (!pdev) {
-                       dev_err(&pdev->dev,
-                               "Could not find platform device for '%pOF'\n",
+                       pr_err("Could not find platform device for '%pOF'\n",
                                rd->dn);
 
                        ret = notifier_from_errno(-EINVAL);
@@ -370,7 +369,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
        return ret;
 }
 
-struct notifier_block weim_of_notifier = {
+static struct notifier_block weim_of_notifier = {
        .notifier_call = of_weim_notify,
 };
 #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
index 9527b7d638401458a5f3a26895674825e4cbc921..541ced27d9412f8ef38885bc5e9b0171dbaea577 100644 (file)
@@ -1060,6 +1060,7 @@ static int __maybe_unused mhi_pci_freeze(struct device *dev)
         * the intermediate restore kernel reinitializes MHI device with new
         * context.
         */
+       flush_work(&mhi_pdev->recovery_work);
        if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
                mhi_power_down(mhi_cntrl, true);
                mhi_unprepare_after_power_down(mhi_cntrl);
@@ -1085,6 +1086,7 @@ static const struct dev_pm_ops mhi_pci_pm_ops = {
        .resume = mhi_pci_resume,
        .freeze = mhi_pci_freeze,
        .thaw = mhi_pci_restore,
+       .poweroff = mhi_pci_freeze,
        .restore = mhi_pci_restore,
 #endif
 };
index 4566e730ef2b81c6dc88f623644ebf657819acde..60b082fe2ed0212c99ef8a6c2bcb489cca95bb99 100644 (file)
@@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb,
 
        dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev));
 
+       return rdev;
+
 err_device_add:
        put_device(&rdev->dev);
 
index 54c0ee6dda3021e80943543662e8628743d75df5..7a1b1f9e49333761a25a40618a76eeab96c07147 100644 (file)
@@ -3232,13 +3232,27 @@ static int sysc_check_disabled_devices(struct sysc *ddata)
  */
 static int sysc_check_active_timer(struct sysc *ddata)
 {
+       int error;
+
        if (ddata->cap->type != TI_SYSC_OMAP2_TIMER &&
            ddata->cap->type != TI_SYSC_OMAP4_TIMER)
                return 0;
 
+       /*
+        * Quirk for omap3 beagleboard revision A to B4 to use gpt12.
+        * Revision C and later are fixed with commit 23885389dbbb ("ARM:
+        * dts: Fix timer regression for beagleboard revision c"). This all
+        * can be dropped if we stop supporting old beagleboard revisions
+        * A to B4 at some point.
+        */
+       if (sysc_soc->soc == SOC_3430)
+               error = -ENXIO;
+       else
+               error = -EBUSY;
+
        if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) &&
            (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE))
-               return -ENXIO;
+               return error;
 
        return 0;
 }
index 7bd10d63ddbe5904c1e32c4cb8ec6d1647b2eb14..416f723a2dbb337508222aac17f29177029c60d1 100644 (file)
    actually talk to the hardware. Suggestions are welcome.
    Patches that work are more welcome though.  ;-)
 
- To Do List:
- ----------------------------------
-
- -- Modify sysctl/proc interface. I plan on having one directory per
- drive, with entries for outputing general drive information, and sysctl
- based tunable parameters such as whether the tray should auto-close for
- that drive. Suggestions (or patches) for this welcome!
-
-
  Revision History
  ----------------------------------
  1.00  Date Unknown -- David van Leeuwen <david@tm.tno.nl>
@@ -648,6 +639,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
        mutex_unlock(&cdrom_mutex);
        return 0;
 }
+EXPORT_SYMBOL(register_cdrom);
 #undef ENSURE
 
 void unregister_cdrom(struct cdrom_device_info *cdi)
@@ -663,6 +655,7 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
 
        cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
 }
+EXPORT_SYMBOL(unregister_cdrom);
 
 int cdrom_get_media_event(struct cdrom_device_info *cdi,
                          struct media_event_desc *med)
@@ -690,6 +683,7 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
        memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
        return 0;
 }
+EXPORT_SYMBOL(cdrom_get_media_event);
 
 static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
                              struct rwrt_feature_desc *rfd)
@@ -1206,6 +1200,7 @@ err:
        cdi->use_count--;
        return ret;
 }
+EXPORT_SYMBOL(cdrom_open);
 
 /* This code is similar to that in open_for_data. The routine is called
    whenever an audio play operation is requested.
@@ -1301,6 +1296,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
                        cdo->tray_move(cdi, 1);
        }
 }
+EXPORT_SYMBOL(cdrom_release);
 
 static int cdrom_read_mech_status(struct cdrom_device_info *cdi, 
                                  struct cdrom_changer_info *buf)
@@ -1365,7 +1361,6 @@ out_free:
  */
 int cdrom_number_of_slots(struct cdrom_device_info *cdi) 
 {
-       int status;
        int nslots = 1;
        struct cdrom_changer_info *info;
 
@@ -1377,12 +1372,13 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi)
        if (!info)
                return -ENOMEM;
 
-       if ((status = cdrom_read_mech_status(cdi, info)) == 0)
+       if (cdrom_read_mech_status(cdi, info) == 0)
                nslots = info->hdr.nslots;
 
        kfree(info);
        return nslots;
 }
+EXPORT_SYMBOL(cdrom_number_of_slots);
 
 
 /* If SLOT < 0, unload the current slot.  Otherwise, try to load SLOT. */
@@ -1582,6 +1578,7 @@ void init_cdrom_command(struct packet_command *cgc, void *buf, int len,
        cgc->data_direction = type;
        cgc->timeout = CDROM_DEF_TIMEOUT;
 }
+EXPORT_SYMBOL(init_cdrom_command);
 
 /* DVD handling */
 
@@ -2000,6 +1997,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
        cgc->data_direction = CGC_DATA_READ;
        return cdo->generic_packet(cdi, cgc);
 }
+EXPORT_SYMBOL(cdrom_mode_sense);
 
 int cdrom_mode_select(struct cdrom_device_info *cdi,
                      struct packet_command *cgc)
@@ -2015,6 +2013,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
        cgc->data_direction = CGC_DATA_WRITE;
        return cdo->generic_packet(cdi, cgc);
 }
+EXPORT_SYMBOL(cdrom_mode_select);
 
 static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
                                 struct cdrom_subchnl *subchnl, int mcn)
@@ -2444,14 +2443,6 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
                        return -EINVAL;
        }
 
-       /*
-        * ->select_disc is a hook to allow a driver-specific way of
-        * seleting disc.  However, since there is no equivalent hook for
-        * cdrom_slot_status this may not actually be useful...
-        */
-       if (cdi->ops->select_disc)
-               return cdi->ops->select_disc(cdi, arg);
-
        cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n");
        return cdrom_select_disc(cdi, arg);
 }
@@ -2893,6 +2884,7 @@ use_toc:
        *last_written = toc.cdte_addr.lba;
        return 0;
 }
+EXPORT_SYMBOL(cdrom_get_last_written);
 
 /* return the next writable block. also for udf file system. */
 static int cdrom_get_next_writable(struct cdrom_device_info *cdi,
@@ -3430,18 +3422,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 
        return -ENOSYS;
 }
-
-EXPORT_SYMBOL(cdrom_get_last_written);
-EXPORT_SYMBOL(register_cdrom);
-EXPORT_SYMBOL(unregister_cdrom);
-EXPORT_SYMBOL(cdrom_open);
-EXPORT_SYMBOL(cdrom_release);
 EXPORT_SYMBOL(cdrom_ioctl);
-EXPORT_SYMBOL(cdrom_number_of_slots);
-EXPORT_SYMBOL(cdrom_mode_select);
-EXPORT_SYMBOL(cdrom_mode_sense);
-EXPORT_SYMBOL(init_cdrom_command);
-EXPORT_SYMBOL(cdrom_get_media_event);
 
 #ifdef CONFIG_SYSCTL
 
index c59265146e9c87b0bbd8e222fc557515f4b48d6a..f1827257ef0e08ecca1d62f26562b7e80c3f5f08 100644 (file)
@@ -3677,8 +3677,11 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
 void ipmi_unregister_smi(struct ipmi_smi *intf)
 {
        struct ipmi_smi_watcher *w;
-       int intf_num = intf->intf_num, index;
+       int intf_num, index;
 
+       if (!intf)
+               return;
+       intf_num = intf->intf_num;
        mutex_lock(&ipmi_interfaces_mutex);
        intf->intf_num = -1;
        intf->in_shutdown = true;
@@ -4518,6 +4521,8 @@ return_unspecified:
                } else
                        /* The message was sent, start the timer. */
                        intf_start_seq_timer(intf, msg->msgid);
+               requeue = 0;
+               goto out;
        } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))
                   || (msg->rsp[1] != msg->data[1])) {
                /*
index 64dedb3ef8ec43acf059a943e0e8cf7846450423..5604a810fb3d2d7d40d463bb2a3e4d52940bae64 100644 (file)
@@ -2220,10 +2220,7 @@ static void cleanup_one_si(struct smi_info *smi_info)
                return;
 
        list_del(&smi_info->link);
-
-       if (smi_info->intf)
-               ipmi_unregister_smi(smi_info->intf);
-
+       ipmi_unregister_smi(smi_info->intf);
        kfree(smi_info);
 }
 
index 1d82429697512a68f38514f4bc2331eed583f97b..4c9adb4f3d5d73c98bd5c1ff800b01bfde61709c 100644 (file)
@@ -318,6 +318,13 @@ static void crng_reseed(bool force)
  * the resultant ChaCha state to the user, along with the second
  * half of the block containing 32 bytes of random data that may
  * be used; random_data_len may not be greater than 32.
+ *
+ * The returned ChaCha state contains within it a copy of the old
+ * key value, at index 4, so the state should always be zeroed out
+ * immediately after using in order to maintain forward secrecy.
+ * If the state cannot be erased in a timely manner, then it is
+ * safer to set the random_data parameter to &chacha_state[4] so
+ * that this function overwrites it before returning.
  */
 static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
                                  u32 chacha_state[CHACHA_STATE_WORDS],
@@ -437,11 +444,8 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
  * This shouldn't be set by functions like add_device_randomness(),
  * where we can't trust the buffer passed to it is guaranteed to be
  * unpredictable (so it might not have any entropy at all).
- *
- * Returns the number of bytes processed from input, which is bounded
- * by CRNG_INIT_CNT_THRESH if account is true.
  */
-static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
+static void crng_pre_init_inject(const void *input, size_t len, bool account)
 {
        static int crng_init_cnt = 0;
        struct blake2s_state hash;
@@ -452,18 +456,15 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
        spin_lock_irqsave(&base_crng.lock, flags);
        if (crng_init != 0) {
                spin_unlock_irqrestore(&base_crng.lock, flags);
-               return 0;
+               return;
        }
 
-       if (account)
-               len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
-
        blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
        blake2s_update(&hash, input, len);
        blake2s_final(&hash, base_crng.key);
 
        if (account) {
-               crng_init_cnt += len;
+               crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
                if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
                        ++base_crng.generation;
                        crng_init = 1;
@@ -474,8 +475,6 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
 
        if (crng_init == 1)
                pr_notice("fast init done\n");
-
-       return len;
 }
 
 static void _get_random_bytes(void *buf, size_t nbytes)
@@ -531,49 +530,59 @@ EXPORT_SYMBOL(get_random_bytes);
 
 static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
 {
-       bool large_request = nbytes > 256;
-       ssize_t ret = 0;
-       size_t len;
+       size_t len, left, ret = 0;
        u32 chacha_state[CHACHA_STATE_WORDS];
        u8 output[CHACHA_BLOCK_SIZE];
 
        if (!nbytes)
                return 0;
 
-       len = min_t(size_t, 32, nbytes);
-       crng_make_state(chacha_state, output, len);
-
-       if (copy_to_user(buf, output, len))
-               return -EFAULT;
-       nbytes -= len;
-       buf += len;
-       ret += len;
-
-       while (nbytes) {
-               if (large_request && need_resched()) {
-                       if (signal_pending(current))
-                               break;
-                       schedule();
-               }
+       /*
+        * Immediately overwrite the ChaCha key at index 4 with random
+        * bytes, in case userspace causes copy_to_user() below to sleep
+        * forever, so that we still retain forward secrecy in that case.
+        */
+       crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
+       /*
+        * However, if we're doing a read of len <= 32, we don't need to
+        * use chacha_state after, so we can simply return those bytes to
+        * the user directly.
+        */
+       if (nbytes <= CHACHA_KEY_SIZE) {
+               ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes);
+               goto out_zero_chacha;
+       }
 
+       for (;;) {
                chacha20_block(chacha_state, output);
                if (unlikely(chacha_state[12] == 0))
                        ++chacha_state[13];
 
                len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
-               if (copy_to_user(buf, output, len)) {
-                       ret = -EFAULT;
+               left = copy_to_user(buf, output, len);
+               if (left) {
+                       ret += len - left;
                        break;
                }
 
-               nbytes -= len;
                buf += len;
                ret += len;
+               nbytes -= len;
+               if (!nbytes)
+                       break;
+
+               BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0);
+               if (ret % PAGE_SIZE == 0) {
+                       if (signal_pending(current))
+                               break;
+                       cond_resched();
+               }
        }
 
-       memzero_explicit(chacha_state, sizeof(chacha_state));
        memzero_explicit(output, sizeof(output));
-       return ret;
+out_zero_chacha:
+       memzero_explicit(chacha_state, sizeof(chacha_state));
+       return ret ? ret : -EFAULT;
 }
 
 /*
@@ -1016,7 +1025,7 @@ int __init rand_initialize(void)
  */
 void add_device_randomness(const void *buf, size_t size)
 {
-       cycles_t cycles = random_get_entropy();
+       unsigned long cycles = random_get_entropy();
        unsigned long flags, now = jiffies;
 
        if (crng_init == 0 && size)
@@ -1047,8 +1056,7 @@ struct timer_rand_state {
  */
 static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
 {
-       cycles_t cycles = random_get_entropy();
-       unsigned long flags, now = jiffies;
+       unsigned long cycles = random_get_entropy(), now = jiffies, flags;
        long delta, delta2, delta3;
 
        spin_lock_irqsave(&input_pool.lock, flags);
@@ -1141,12 +1149,9 @@ void add_hwgenerator_randomness(const void *buffer, size_t count,
                                size_t entropy)
 {
        if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) {
-               size_t ret = crng_pre_init_inject(buffer, count, true);
-               mix_pool_bytes(buffer, ret);
-               count -= ret;
-               buffer += ret;
-               if (!count || crng_init == 0)
-                       return;
+               crng_pre_init_inject(buffer, count, true);
+               mix_pool_bytes(buffer, count);
+               return;
        }
 
        /*
@@ -1340,8 +1345,7 @@ static void mix_interrupt_randomness(struct work_struct *work)
 void add_interrupt_randomness(int irq)
 {
        enum { MIX_INFLIGHT = 1U << 31 };
-       cycles_t cycles = random_get_entropy();
-       unsigned long now = jiffies;
+       unsigned long cycles = random_get_entropy(), now = jiffies;
        struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
        struct pt_regs *regs = get_irq_regs();
        unsigned int new_count;
@@ -1354,16 +1358,12 @@ void add_interrupt_randomness(int irq)
        if (cycles == 0)
                cycles = get_reg(fast_pool, regs);
 
-       if (sizeof(cycles) == 8)
+       if (sizeof(unsigned long) == 8) {
                irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq;
-       else {
+               irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
+       } else {
                irq_data.u32[0] = cycles ^ irq;
                irq_data.u32[1] = now;
-       }
-
-       if (sizeof(unsigned long) == 8)
-               irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
-       else {
                irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_;
                irq_data.u32[3] = get_reg(fast_pool, regs);
        }
@@ -1410,7 +1410,7 @@ static void entropy_timer(struct timer_list *t)
 static void try_to_generate_entropy(void)
 {
        struct {
-               cycles_t cycles;
+               unsigned long cycles;
                struct timer_list timer;
        } stack;
 
@@ -1545,6 +1545,13 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
 {
        static int maxwarn = 10;
 
+       /*
+        * Opportunistically attempt to initialize the RNG on platforms that
+        * have fast cycle counters, but don't (for now) require it to succeed.
+        */
+       if (!crng_ready())
+               try_to_generate_entropy();
+
        if (!crng_ready() && maxwarn > 0) {
                maxwarn--;
                if (__ratelimit(&urandom_warning))
index 23cc8297ec4c071bd6dd952e5966526a29df5677..d429ba52a71908b584e9ccca3dd8cf8f3a576798 100644 (file)
@@ -117,6 +117,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req,
                tmp_rate = parent_rate;
        else
                tmp_rate = parent_rate / div;
+
+       if (tmp_rate < req->min_rate || tmp_rate > req->max_rate)
+               return;
+
        tmp_diff = abs(req->rate - tmp_rate);
 
        if (*best_diff < 0 || *best_diff >= tmp_diff) {
index 3ad20e75fd23f6b7e60de0b59b87f52285f66736..48a1eb9f2d551cc17b45c45925232dedbf577f49 100644 (file)
@@ -941,6 +941,7 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
        u64 temp = (u64)parent_rate << CM_DIV_FRAC_BITS;
        u32 div, mindiv, maxdiv;
 
+       do_div(temp, rate);
        div = temp;
        div &= ~unused_frac_mask;
 
index aa1561b773d62922424ec8c8f1fd2c5daa0f7331..070c3b89655906e5960e666f30e25f53f0f5f6b0 100644 (file)
 #include <dt-bindings/clock/microchip,mpfs-clock.h>
 
 /* address offset of control registers */
+#define REG_MSSPLL_REF_CR      0x08u
+#define REG_MSSPLL_POSTDIV_CR  0x10u
+#define REG_MSSPLL_SSCG_2_CR   0x2Cu
 #define REG_CLOCK_CONFIG_CR    0x08u
+#define REG_RTC_CLOCK_CR       0x0Cu
 #define REG_SUBBLK_CLOCK_CR    0x84u
 #define REG_SUBBLK_RESET_CR    0x88u
 
+#define MSSPLL_FBDIV_SHIFT     0x00u
+#define MSSPLL_FBDIV_WIDTH     0x0Cu
+#define MSSPLL_REFDIV_SHIFT    0x08u
+#define MSSPLL_REFDIV_WIDTH    0x06u
+#define MSSPLL_POSTDIV_SHIFT   0x08u
+#define MSSPLL_POSTDIV_WIDTH   0x07u
+#define MSSPLL_FIXED_DIV       4u
+
 struct mpfs_clock_data {
        void __iomem *base;
+       void __iomem *msspll_base;
        struct clk_hw_onecell_data hw_data;
 };
 
+struct mpfs_msspll_hw_clock {
+       void __iomem *base;
+       unsigned int id;
+       u32 reg_offset;
+       u32 shift;
+       u32 width;
+       u32 flags;
+       struct clk_hw hw;
+       struct clk_init_data init;
+};
+
+#define to_mpfs_msspll_clk(_hw) container_of(_hw, struct mpfs_msspll_hw_clock, hw)
+
 struct mpfs_cfg_clock {
        const struct clk_div_table *table;
        unsigned int id;
+       u32 reg_offset;
        u8 shift;
        u8 width;
+       u8 flags;
 };
 
 struct mpfs_cfg_hw_clock {
@@ -55,7 +83,7 @@ struct mpfs_periph_hw_clock {
  */
 static DEFINE_SPINLOCK(mpfs_clk_lock);
 
-static const struct clk_parent_data mpfs_cfg_parent[] = {
+static const struct clk_parent_data mpfs_ext_ref[] = {
        { .index = 0 },
 };
 
@@ -69,6 +97,86 @@ static const struct clk_div_table mpfs_div_ahb_table[] = {
        { 0, 0 }
 };
 
+/*
+ * The only two supported reference clock frequencies for the PolarFire SoC are
+ * 100 and 125 MHz, as the rtc reference is required to be 1 MHz.
+ * It therefore only needs to have divider table entries corresponding to
+ * divide by 100 and 125.
+ */
+static const struct clk_div_table mpfs_div_rtcref_table[] = {
+       { 100, 100 }, { 125, 125 },
+       { 0, 0 }
+};
+
+static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate)
+{
+       struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw);
+       void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset;
+       void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR;
+       void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR;
+       u32 mult, ref_div, postdiv;
+
+       mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT;
+       mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH);
+       ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT;
+       ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH);
+       postdiv = readl_relaxed(postdiv_addr) >> MSSPLL_POSTDIV_SHIFT;
+       postdiv &= clk_div_mask(MSSPLL_POSTDIV_WIDTH);
+
+       return prate * mult / (ref_div * MSSPLL_FIXED_DIV * postdiv);
+}
+
+static const struct clk_ops mpfs_clk_msspll_ops = {
+       .recalc_rate = mpfs_clk_msspll_recalc_rate,
+};
+
+#define CLK_PLL(_id, _name, _parent, _shift, _width, _flags, _offset) {                        \
+       .id = _id,                                                                      \
+       .shift = _shift,                                                                \
+       .width = _width,                                                                \
+       .reg_offset = _offset,                                                          \
+       .flags = _flags,                                                                \
+       .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_msspll_ops, 0),   \
+}
+
+static struct mpfs_msspll_hw_clock mpfs_msspll_clks[] = {
+       CLK_PLL(CLK_MSSPLL, "clk_msspll", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT,
+               MSSPLL_FBDIV_WIDTH, 0, REG_MSSPLL_SSCG_2_CR),
+};
+
+static int mpfs_clk_register_msspll(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hw,
+                                   void __iomem *base)
+{
+       msspll_hw->base = base;
+
+       return devm_clk_hw_register(dev, &msspll_hw->hw);
+}
+
+static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hws,
+                                    unsigned int num_clks, struct mpfs_clock_data *data)
+{
+       void __iomem *base = data->msspll_base;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < num_clks; i++) {
+               struct mpfs_msspll_hw_clock *msspll_hw = &msspll_hws[i];
+
+               ret = mpfs_clk_register_msspll(dev, msspll_hw, base);
+               if (ret)
+                       return dev_err_probe(dev, ret, "failed to register msspll id: %d\n",
+                                            CLK_MSSPLL);
+
+               data->hw_data.hws[msspll_hw->id] = &msspll_hw->hw;
+       }
+
+       return 0;
+}
+
+/*
+ * "CFG" clocks
+ */
+
 static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long prate)
 {
        struct mpfs_cfg_hw_clock *cfg_hw = to_mpfs_cfg_clk(hw);
@@ -76,10 +184,10 @@ static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long p
        void __iomem *base_addr = cfg_hw->sys_base;
        u32 val;
 
-       val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR) >> cfg->shift;
+       val = readl_relaxed(base_addr + cfg->reg_offset) >> cfg->shift;
        val &= clk_div_mask(cfg->width);
 
-       return prate / (1u << val);
+       return divider_recalc_rate(hw, prate, val, cfg->table, cfg->flags, cfg->width);
 }
 
 static long mpfs_cfg_clk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate)
@@ -105,11 +213,10 @@ static int mpfs_cfg_clk_set_rate(struct clk_hw *hw, unsigned long rate, unsigned
                return divider_setting;
 
        spin_lock_irqsave(&mpfs_clk_lock, flags);
-
-       val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR);
+       val = readl_relaxed(base_addr + cfg->reg_offset);
        val &= ~(clk_div_mask(cfg->width) << cfg_hw->cfg.shift);
        val |= divider_setting << cfg->shift;
-       writel_relaxed(val, base_addr + REG_CLOCK_CONFIG_CR);
+       writel_relaxed(val, base_addr + cfg->reg_offset);
 
        spin_unlock_irqrestore(&mpfs_clk_lock, flags);
 
@@ -122,19 +229,33 @@ static const struct clk_ops mpfs_clk_cfg_ops = {
        .set_rate = mpfs_cfg_clk_set_rate,
 };
 
-#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags) {         \
-       .cfg.id = _id,                                                          \
-       .cfg.shift = _shift,                                                    \
-       .cfg.width = _width,                                                    \
-       .cfg.table = _table,                                                    \
-       .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_cfg_ops,  \
-                                           _flags),                            \
+#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags, _offset) {                \
+       .cfg.id = _id,                                                                  \
+       .cfg.shift = _shift,                                                            \
+       .cfg.width = _width,                                                            \
+       .cfg.table = _table,                                                            \
+       .cfg.reg_offset = _offset,                                                      \
+       .cfg.flags = _flags,                                                            \
+       .hw.init = CLK_HW_INIT(_name, _parent, &mpfs_clk_cfg_ops, 0),                   \
 }
 
 static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = {
-       CLK_CFG(CLK_CPU, "clk_cpu", mpfs_cfg_parent, 0, 2, mpfs_div_cpu_axi_table, 0),
-       CLK_CFG(CLK_AXI, "clk_axi", mpfs_cfg_parent, 2, 2, mpfs_div_cpu_axi_table, 0),
-       CLK_CFG(CLK_AHB, "clk_ahb", mpfs_cfg_parent, 4, 2, mpfs_div_ahb_table, 0),
+       CLK_CFG(CLK_CPU, "clk_cpu", "clk_msspll", 0, 2, mpfs_div_cpu_axi_table, 0,
+               REG_CLOCK_CONFIG_CR),
+       CLK_CFG(CLK_AXI, "clk_axi", "clk_msspll", 2, 2, mpfs_div_cpu_axi_table, 0,
+               REG_CLOCK_CONFIG_CR),
+       CLK_CFG(CLK_AHB, "clk_ahb", "clk_msspll", 4, 2, mpfs_div_ahb_table, 0,
+               REG_CLOCK_CONFIG_CR),
+       {
+               .cfg.id = CLK_RTCREF,
+               .cfg.shift = 0,
+               .cfg.width = 12,
+               .cfg.table = mpfs_div_rtcref_table,
+               .cfg.reg_offset = REG_RTC_CLOCK_CR,
+               .cfg.flags = CLK_DIVIDER_ONE_BASED,
+               .hw.init =
+                       CLK_HW_INIT_PARENTS_DATA("clk_rtcref", mpfs_ext_ref, &mpfs_clk_cfg_ops, 0),
+       }
 };
 
 static int mpfs_clk_register_cfg(struct device *dev, struct mpfs_cfg_hw_clock *cfg_hw,
@@ -160,13 +281,17 @@ static int mpfs_clk_register_cfgs(struct device *dev, struct mpfs_cfg_hw_clock *
                        return dev_err_probe(dev, ret, "failed to register clock id: %d\n",
                                             cfg_hw->cfg.id);
 
-               id = cfg_hws[i].cfg.id;
+               id = cfg_hw->cfg.id;
                data->hw_data.hws[id] = &cfg_hw->hw;
        }
 
        return 0;
 }
 
+/*
+ * peripheral clocks - devices connected to axi or ahb buses.
+ */
+
 static int mpfs_periph_clk_enable(struct clk_hw *hw)
 {
        struct mpfs_periph_hw_clock *periph_hw = to_mpfs_periph_clk(hw);
@@ -200,10 +325,6 @@ static void mpfs_periph_clk_disable(struct clk_hw *hw)
 
        spin_lock_irqsave(&mpfs_clk_lock, flags);
 
-       reg = readl_relaxed(base_addr + REG_SUBBLK_RESET_CR);
-       val = reg | (1u << periph->shift);
-       writel_relaxed(val, base_addr + REG_SUBBLK_RESET_CR);
-
        reg = readl_relaxed(base_addr + REG_SUBBLK_CLOCK_CR);
        val = reg & ~(1u << periph->shift);
        writel_relaxed(val, base_addr + REG_SUBBLK_CLOCK_CR);
@@ -249,8 +370,10 @@ static const struct clk_ops mpfs_periph_clk_ops = {
  *   trap handler
  * - CLK_MMUART0: reserved by the hss
  * - CLK_DDRC: provides clock to the ddr subsystem
- * - CLK_FICx: these provide clocks for sections of the fpga fabric, disabling them would
- *   cause the fabric to go into reset
+ * - CLK_FICx: these provide the processor side clocks to the "FIC" (Fabric InterConnect)
+ *   clock domain crossers which provide the interface to the FPGA fabric. Disabling them
+ *   causes the FPGA fabric to go into reset.
+ * - CLK_ATHENA: The athena clock is FIC4, which is reserved for the Athena TeraFire.
  */
 
 static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
@@ -258,7 +381,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
        CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0),
        CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0),
        CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0),
-       CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(AHB), 4, 0),
+       CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0),
        CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL),
        CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0),
        CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0),
@@ -277,11 +400,11 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
        CLK_PERIPH(CLK_GPIO1, "clk_periph_gpio1", PARENT_CLK(AHB), 21, 0),
        CLK_PERIPH(CLK_GPIO2, "clk_periph_gpio2", PARENT_CLK(AHB), 22, 0),
        CLK_PERIPH(CLK_DDRC, "clk_periph_ddrc", PARENT_CLK(AHB), 23, CLK_IS_CRITICAL),
-       CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AHB), 24, CLK_IS_CRITICAL),
-       CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AHB), 25, CLK_IS_CRITICAL),
-       CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AHB), 26, CLK_IS_CRITICAL),
-       CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AHB), 27, CLK_IS_CRITICAL),
-       CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AHB), 28, 0),
+       CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AXI), 24, CLK_IS_CRITICAL),
+       CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AXI), 25, CLK_IS_CRITICAL),
+       CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AXI), 26, CLK_IS_CRITICAL),
+       CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AXI), 27, CLK_IS_CRITICAL),
+       CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, CLK_IS_CRITICAL),
        CLK_PERIPH(CLK_CFM, "clk_periph_cfm", PARENT_CLK(AHB), 29, 0),
 };
 
@@ -322,8 +445,9 @@ static int mpfs_clk_probe(struct platform_device *pdev)
        unsigned int num_clks;
        int ret;
 
-       /* CLK_RESERVED is not part of cfg_clks nor periph_clks, so add 1 */
-       num_clks = ARRAY_SIZE(mpfs_cfg_clks) + ARRAY_SIZE(mpfs_periph_clks) + 1;
+       /* CLK_RESERVED is not part of clock arrays, so add 1 */
+       num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_cfg_clks)
+                  + ARRAY_SIZE(mpfs_periph_clks) + 1;
 
        clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws, num_clks), GFP_KERNEL);
        if (!clk_data)
@@ -333,8 +457,17 @@ static int mpfs_clk_probe(struct platform_device *pdev)
        if (IS_ERR(clk_data->base))
                return PTR_ERR(clk_data->base);
 
+       clk_data->msspll_base = devm_platform_ioremap_resource(pdev, 1);
+       if (IS_ERR(clk_data->msspll_base))
+               return PTR_ERR(clk_data->msspll_base);
+
        clk_data->hw_data.num = num_clks;
 
+       ret = mpfs_clk_register_mssplls(dev, mpfs_msspll_clks, ARRAY_SIZE(mpfs_msspll_clks),
+                                       clk_data);
+       if (ret)
+               return ret;
+
        ret = mpfs_clk_register_cfgs(dev, mpfs_cfg_clks, ARRAY_SIZE(mpfs_cfg_clks), clk_data);
        if (ret)
                return ret;
index f675fd969c4de2feff91525c7898c398c3f5756b..e9c357309fd9f75321403d95f11f4e4b58f1a40e 100644 (file)
@@ -818,7 +818,7 @@ EXPORT_SYMBOL_GPL(clk_pixel_ops);
 static int clk_gfx3d_determine_rate(struct clk_hw *hw,
                                    struct clk_rate_request *req)
 {
-       struct clk_rate_request parent_req = { };
+       struct clk_rate_request parent_req = { .min_rate = 0, .max_rate = ULONG_MAX };
        struct clk_rcg2_gfx3d *cgfx = to_clk_rcg2_gfx3d(hw);
        struct clk_hw *xo, *p0, *p1, *p2;
        unsigned long p0_rate;
index 8a10bade7e0dd459a0608890158949659a1645dd..d65398497d5f64b98565066fdfdc16a2be58d15b 100644 (file)
@@ -241,6 +241,7 @@ static struct clk_init_data rtc_32k_init_data = {
        .ops            = &ccu_mux_ops,
        .parent_hws     = rtc_32k_parents,
        .num_parents    = ARRAY_SIZE(rtc_32k_parents), /* updated during probe */
+       .flags          = CLK_IS_CRITICAL,
 };
 
 static struct ccu_mux rtc_32k_clk = {
@@ -297,10 +298,6 @@ static const struct sunxi_ccu_desc sun6i_rtc_ccu_desc = {
        .hw_clks        = &sun6i_rtc_ccu_hw_clks,
 };
 
-static const struct clk_parent_data sun50i_h6_osc32k_fanout_parents[] = {
-       { .hw = &osc32k_clk.common.hw },
-};
-
 static const struct clk_parent_data sun50i_h616_osc32k_fanout_parents[] = {
        { .hw = &osc32k_clk.common.hw },
        { .fw_name = "pll-32k" },
@@ -313,13 +310,6 @@ static const struct clk_parent_data sun50i_r329_osc32k_fanout_parents[] = {
        { .hw = &osc24M_32k_clk.common.hw }
 };
 
-static const struct sun6i_rtc_match_data sun50i_h6_rtc_ccu_data = {
-       .have_ext_osc32k        = true,
-       .have_iosc_calibration  = true,
-       .osc32k_fanout_parents  = sun50i_h6_osc32k_fanout_parents,
-       .osc32k_fanout_nparents = ARRAY_SIZE(sun50i_h6_osc32k_fanout_parents),
-};
-
 static const struct sun6i_rtc_match_data sun50i_h616_rtc_ccu_data = {
        .have_iosc_calibration  = true,
        .rtc_32k_single_parent  = true,
@@ -334,10 +324,6 @@ static const struct sun6i_rtc_match_data sun50i_r329_rtc_ccu_data = {
 };
 
 static const struct of_device_id sun6i_rtc_ccu_match[] = {
-       {
-               .compatible     = "allwinner,sun50i-h6-rtc",
-               .data           = &sun50i_h6_rtc_ccu_data,
-       },
        {
                .compatible     = "allwinner,sun50i-h616-rtc",
                .data           = &sun50i_h616_rtc_ccu_data,
@@ -346,6 +332,7 @@ static const struct of_device_id sun6i_rtc_ccu_match[] = {
                .compatible     = "allwinner,sun50i-r329-rtc",
                .data           = &sun50i_r329_rtc_ccu_data,
        },
+       {},
 };
 
 int sun6i_rtc_ccu_probe(struct device *dev, void __iomem *reg)
index 542b31d6e96dddaa5907a9602d4193231f648f0c..636bcf2439ef264c2164977c6c9655d7d069c957 100644 (file)
@@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
        spin_lock_init(&data->lock);
 
        r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r)
+               return -EINVAL;
        /* one clock/reset pair per word */
        count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH);
        data->membase = devm_ioremap_resource(&pdev->dev, r);
index f9d593ff4718300e0cca74e2f51623cf9134023d..0253731d6d25d20ad132eb4756bb634fda561781 100644 (file)
 #define CLK_HW_DIV                     2
 #define LUT_TURBO_IND                  1
 
+#define GT_IRQ_STATUS                  BIT(2)
+
 #define HZ_PER_KHZ                     1000
 
 struct qcom_cpufreq_soc_data {
        u32 reg_enable;
+       u32 reg_domain_state;
        u32 reg_dcvs_ctrl;
        u32 reg_freq_lut;
        u32 reg_volt_lut;
+       u32 reg_intr_clr;
        u32 reg_current_vote;
        u32 reg_perf_state;
        u8 lut_row_size;
@@ -280,37 +284,46 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
        }
 }
 
-static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
+static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
 {
-       unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
+       unsigned int lval;
+
+       if (data->soc_data->reg_current_vote)
+               lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff;
+       else
+               lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff;
 
-       return (val & 0x3FF) * 19200;
+       return lval * xo_rate;
 }
 
 static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
 {
        struct cpufreq_policy *policy = data->policy;
-       int cpu = cpumask_first(policy->cpus);
+       int cpu = cpumask_first(policy->related_cpus);
        struct device *dev = get_cpu_device(cpu);
        unsigned long freq_hz, throttled_freq;
        struct dev_pm_opp *opp;
-       unsigned int freq;
 
        /*
         * Get the h/w throttled frequency, normalize it using the
         * registered opp table and use it to calculate thermal pressure.
         */
-       freq = qcom_lmh_get_throttle_freq(data);
-       freq_hz = freq * HZ_PER_KHZ;
+       freq_hz = qcom_lmh_get_throttle_freq(data);
 
        opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
        if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
-               dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+               opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+       if (IS_ERR(opp)) {
+               dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp);
+       } else {
+               throttled_freq = freq_hz / HZ_PER_KHZ;
 
-       throttled_freq = freq_hz / HZ_PER_KHZ;
+               /* Update thermal pressure (the boost frequencies are accepted) */
+               arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
 
-       /* Update thermal pressure (the boost frequencies are accepted) */
-       arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
+               dev_pm_opp_put(opp);
+       }
 
        /*
         * In the unlikely case policy is unregistered do not enable
@@ -350,6 +363,10 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
        disable_irq_nosync(c_data->throttle_irq);
        schedule_delayed_work(&c_data->throttle_work, 0);
 
+       if (c_data->soc_data->reg_intr_clr)
+               writel_relaxed(GT_IRQ_STATUS,
+                              c_data->base + c_data->soc_data->reg_intr_clr);
+
        return IRQ_HANDLED;
 }
 
@@ -365,9 +382,11 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = {
 
 static const struct qcom_cpufreq_soc_data epss_soc_data = {
        .reg_enable = 0x0,
+       .reg_domain_state = 0x20,
        .reg_dcvs_ctrl = 0xb0,
        .reg_freq_lut = 0x100,
        .reg_volt_lut = 0x200,
+       .reg_intr_clr = 0x308,
        .reg_perf_state = 0x320,
        .lut_row_size = 4,
 };
@@ -417,16 +436,39 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
        return 0;
 }
 
-static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
+static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy)
+{
+       struct qcom_cpufreq_data *data = policy->driver_data;
+       struct platform_device *pdev = cpufreq_get_driver_data();
+       int ret;
+
+       ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus);
+       if (ret)
+               dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n",
+                       data->irq_name, data->throttle_irq);
+
+       return ret;
+}
+
+static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy)
 {
+       struct qcom_cpufreq_data *data = policy->driver_data;
+
        if (data->throttle_irq <= 0)
-               return;
+               return 0;
 
        mutex_lock(&data->throttle_lock);
        data->cancel_throttle = true;
        mutex_unlock(&data->throttle_lock);
 
        cancel_delayed_work_sync(&data->throttle_work);
+       irq_set_affinity_hint(data->throttle_irq, NULL);
+
+       return 0;
+}
+
+static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
+{
        free_irq(data->throttle_irq, data);
 }
 
@@ -583,6 +625,8 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = {
        .get            = qcom_cpufreq_hw_get,
        .init           = qcom_cpufreq_hw_cpu_init,
        .exit           = qcom_cpufreq_hw_cpu_exit,
+       .online         = qcom_cpufreq_hw_cpu_online,
+       .offline        = qcom_cpufreq_hw_cpu_offline,
        .register_em    = cpufreq_register_em_with_opp,
        .fast_switch    = qcom_cpufreq_hw_fast_switch,
        .name           = "qcom-cpufreq-hw",
index 2deed8d8773fa6c3ff37d0ed958dca7803aab6d8..75e1bf3a08f7cff384b6e6cc0d209600ca94859b 100644 (file)
@@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        ret = sun50i_cpufreq_get_efuse(&speed);
-       if (ret)
+       if (ret) {
+               kfree(opp_tables);
                return ret;
+       }
 
        snprintf(name, MAX_NAME_LEN, "speed%d", speed);
 
index b459eda2cd375f7f49cced69f6be831ac73c3de7..5c852e6719924bd74847c75ad6ab15e4496b17f4 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/pm_runtime.h>
 #include <asm/cpuidle.h>
 #include <asm/sbi.h>
+#include <asm/smp.h>
 #include <asm/suspend.h>
 
 #include "dt_idle_states.h"
index 11f30fd48c1414780006ec57b7fca020ce1e891d..031b5f701a0a35b40316be71ae41eeb02f06dcff 100644 (file)
@@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
                } else {
                        /* copy only remaining bytes */
                        memcpy(data, &val, max - currsize);
+                       break;
                }
        } while (currsize < max);
 
index 8a7267d116b7574262a54c7542605d1b2120bcdd..3f2182d6682929681b27adb2cbbcec313d7e6f21 100644 (file)
@@ -436,7 +436,6 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds)
 
        for (i = mbox_ready_timeout; i; i--) {
                u32 temp;
-               int rc;
 
                rc = pci_read_config_dword(
                        pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
index 511805dbeb75f785d153b07fe37c73b6ec26ed8e..4c9eb53ba3f896f2b3b7dee06b938e9c04f09053 100644 (file)
@@ -12,6 +12,7 @@ dmabuf_selftests-y := \
        selftest.o \
        st-dma-fence.o \
        st-dma-fence-chain.o \
+       st-dma-fence-unwrap.o \
        st-dma-resv.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o
index df23239b04fc219afb77041172ab84788dcb7365..53297a0d9c5735aafc75a1e69f0bf0674e0e9de1 100644 (file)
@@ -407,6 +407,7 @@ static inline int is_dma_buf_file(struct file *file)
 
 static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
 {
+       static atomic64_t dmabuf_inode = ATOMIC64_INIT(0);
        struct file *file;
        struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
 
@@ -416,6 +417,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
        inode->i_size = dmabuf->size;
        inode_set_bytes(inode, dmabuf->size);
 
+       /*
+        * The ->i_ino acquired from get_next_ino() is not unique thus
+        * not suitable for using it as dentry name by dmabuf stats.
+        * Override ->i_ino with the unique and dmabuffs specific
+        * value.
+        */
+       inode->i_ino = atomic64_add_return(1, &dmabuf_inode);
        file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
                                 flags, &dma_buf_fops);
        if (IS_ERR(file))
@@ -543,10 +551,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
        file->f_mode |= FMODE_LSEEK;
        dmabuf->file = file;
 
-       ret = dma_buf_stats_setup(dmabuf);
-       if (ret)
-               goto err_sysfs;
-
        mutex_init(&dmabuf->lock);
        INIT_LIST_HEAD(&dmabuf->attachments);
 
@@ -554,6 +558,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
        list_add(&dmabuf->list_node, &db_list.head);
        mutex_unlock(&db_list.lock);
 
+       ret = dma_buf_stats_setup(dmabuf);
+       if (ret)
+               goto err_sysfs;
+
        return dmabuf;
 
 err_sysfs:
index cb1bacb5a42b543599b35a670f023e7b777c7e41..5c8a7084577b547799890b8b75905d27e8622f96 100644 (file)
@@ -159,6 +159,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
        struct dma_fence_array *array;
        size_t size = sizeof(*array);
 
+       WARN_ON(!num_fences || !fences);
+
        /* Allocate the callback structures behind the array. */
        size += num_fences * sizeof(struct dma_fence_array_cb);
        array = kzalloc(size, GFP_KERNEL);
@@ -219,3 +221,33 @@ bool dma_fence_match_context(struct dma_fence *fence, u64 context)
        return true;
 }
 EXPORT_SYMBOL(dma_fence_match_context);
+
+struct dma_fence *dma_fence_array_first(struct dma_fence *head)
+{
+       struct dma_fence_array *array;
+
+       if (!head)
+               return NULL;
+
+       array = to_dma_fence_array(head);
+       if (!array)
+               return head;
+
+       if (!array->num_fences)
+               return NULL;
+
+       return array->fences[0];
+}
+EXPORT_SYMBOL(dma_fence_array_first);
+
+struct dma_fence *dma_fence_array_next(struct dma_fence *head,
+                                      unsigned int index)
+{
+       struct dma_fence_array *array = to_dma_fence_array(head);
+
+       if (!array || index >= array->num_fences)
+               return NULL;
+
+       return array->fences[index];
+}
+EXPORT_SYMBOL(dma_fence_array_next);
index 97d73aaa31daa1d72a96222cac7c7a5d177453b0..851965867d9c7f2251f901d499133f531981a92b 100644 (file)
@@ -12,4 +12,5 @@
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
 selftest(dma_fence_chain, dma_fence_chain)
+selftest(dma_fence_unwrap, dma_fence_unwrap)
 selftest(dma_resv, dma_resv)
diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c
new file mode 100644 (file)
index 0000000..039f016
--- /dev/null
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/dma-fence-unwrap.h>
+#if 0
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#endif
+
+#include "selftest.h"
+
+#define CHAIN_SZ (4 << 10)
+
+static inline struct mock_fence {
+       struct dma_fence base;
+       spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+       return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+       return "mock";
+}
+
+static const struct dma_fence_ops mock_ops = {
+       .get_driver_name = mock_name,
+       .get_timeline_name = mock_name,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+       struct mock_fence *f;
+
+       f = kmalloc(sizeof(*f), GFP_KERNEL);
+       if (!f)
+               return NULL;
+
+       spin_lock_init(&f->lock);
+       dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+       return &f->base;
+}
+
+static struct dma_fence *mock_array(unsigned int num_fences, ...)
+{
+       struct dma_fence_array *array;
+       struct dma_fence **fences;
+       va_list valist;
+       int i;
+
+       fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
+       if (!fences)
+               return NULL;
+
+       va_start(valist, num_fences);
+       for (i = 0; i < num_fences; ++i)
+               fences[i] = va_arg(valist, typeof(*fences));
+       va_end(valist);
+
+       array = dma_fence_array_create(num_fences, fences,
+                                      dma_fence_context_alloc(1),
+                                      1, false);
+       if (!array)
+               goto cleanup;
+       return &array->base;
+
+cleanup:
+       for (i = 0; i < num_fences; ++i)
+               dma_fence_put(fences[i]);
+       kfree(fences);
+       return NULL;
+}
+
+static struct dma_fence *mock_chain(struct dma_fence *prev,
+                                   struct dma_fence *fence)
+{
+       struct dma_fence_chain *f;
+
+       f = dma_fence_chain_alloc();
+       if (!f) {
+               dma_fence_put(prev);
+               dma_fence_put(fence);
+               return NULL;
+       }
+
+       dma_fence_chain_init(f, prev, fence, 1);
+       return &f->base;
+}
+
+static int sanitycheck(void *arg)
+{
+       struct dma_fence *f, *chain, *array;
+       int err = 0;
+
+       f = mock_fence();
+       if (!f)
+               return -ENOMEM;
+
+       array = mock_array(1, f);
+       if (!array)
+               return -ENOMEM;
+
+       chain = mock_chain(NULL, array);
+       if (!chain)
+               return -ENOMEM;
+
+       dma_fence_signal(f);
+       dma_fence_put(chain);
+       return err;
+}
+
+static int unwrap_array(void *arg)
+{
+       struct dma_fence *fence, *f1, *f2, *array;
+       struct dma_fence_unwrap iter;
+       int err = 0;
+
+       f1 = mock_fence();
+       if (!f1)
+               return -ENOMEM;
+
+       f2 = mock_fence();
+       if (!f2) {
+               dma_fence_put(f1);
+               return -ENOMEM;
+       }
+
+       array = mock_array(2, f1, f2);
+       if (!array)
+               return -ENOMEM;
+
+       dma_fence_unwrap_for_each(fence, &iter, array) {
+               if (fence == f1) {
+                       f1 = NULL;
+               } else if (fence == f2) {
+                       f2 = NULL;
+               } else {
+                       pr_err("Unexpected fence!\n");
+                       err = -EINVAL;
+               }
+       }
+
+       if (f1 || f2) {
+               pr_err("Not all fences seen!\n");
+               err = -EINVAL;
+       }
+
+       dma_fence_signal(f1);
+       dma_fence_signal(f2);
+       dma_fence_put(array);
+       return 0;
+}
+
+static int unwrap_chain(void *arg)
+{
+       struct dma_fence *fence, *f1, *f2, *chain;
+       struct dma_fence_unwrap iter;
+       int err = 0;
+
+       f1 = mock_fence();
+       if (!f1)
+               return -ENOMEM;
+
+       f2 = mock_fence();
+       if (!f2) {
+               dma_fence_put(f1);
+               return -ENOMEM;
+       }
+
+       chain = mock_chain(f1, f2);
+       if (!chain)
+               return -ENOMEM;
+
+       dma_fence_unwrap_for_each(fence, &iter, chain) {
+               if (fence == f1) {
+                       f1 = NULL;
+               } else if (fence == f2) {
+                       f2 = NULL;
+               } else {
+                       pr_err("Unexpected fence!\n");
+                       err = -EINVAL;
+               }
+       }
+
+       if (f1 || f2) {
+               pr_err("Not all fences seen!\n");
+               err = -EINVAL;
+       }
+
+       dma_fence_signal(f1);
+       dma_fence_signal(f2);
+       dma_fence_put(chain);
+       return 0;
+}
+
+static int unwrap_chain_array(void *arg)
+{
+       struct dma_fence *fence, *f1, *f2, *array, *chain;
+       struct dma_fence_unwrap iter;
+       int err = 0;
+
+       f1 = mock_fence();
+       if (!f1)
+               return -ENOMEM;
+
+       f2 = mock_fence();
+       if (!f2) {
+               dma_fence_put(f1);
+               return -ENOMEM;
+       }
+
+       array = mock_array(2, f1, f2);
+       if (!array)
+               return -ENOMEM;
+
+       chain = mock_chain(NULL, array);
+       if (!chain)
+               return -ENOMEM;
+
+       dma_fence_unwrap_for_each(fence, &iter, chain) {
+               if (fence == f1) {
+                       f1 = NULL;
+               } else if (fence == f2) {
+                       f2 = NULL;
+               } else {
+                       pr_err("Unexpected fence!\n");
+                       err = -EINVAL;
+               }
+       }
+
+       if (f1 || f2) {
+               pr_err("Not all fences seen!\n");
+               err = -EINVAL;
+       }
+
+       dma_fence_signal(f1);
+       dma_fence_signal(f2);
+       dma_fence_put(chain);
+       return 0;
+}
+
+int dma_fence_unwrap(void)
+{
+       static const struct subtest tests[] = {
+               SUBTEST(sanitycheck),
+               SUBTEST(unwrap_array),
+               SUBTEST(unwrap_chain),
+               SUBTEST(unwrap_chain_array),
+       };
+
+       return subtests(tests, NULL);
+}
index 394e6e1e968604801d0468442fd4ba6e871dc447..514d213261df3d8f579dedec1269c353b84a7f0a 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2012 Google, Inc.
  */
 
+#include <linux/dma-fence-unwrap.h>
 #include <linux/export.h>
 #include <linux/file.h>
 #include <linux/fs.h>
@@ -172,20 +173,6 @@ static int sync_file_set_fence(struct sync_file *sync_file,
        return 0;
 }
 
-static struct dma_fence **get_fences(struct sync_file *sync_file,
-                                    int *num_fences)
-{
-       if (dma_fence_is_array(sync_file->fence)) {
-               struct dma_fence_array *array = to_dma_fence_array(sync_file->fence);
-
-               *num_fences = array->num_fences;
-               return array->fences;
-       }
-
-       *num_fences = 1;
-       return &sync_file->fence;
-}
-
 static void add_fence(struct dma_fence **fences,
                      int *i, struct dma_fence *fence)
 {
@@ -210,86 +197,97 @@ static void add_fence(struct dma_fence **fences,
 static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
                                         struct sync_file *b)
 {
+       struct dma_fence *a_fence, *b_fence, **fences;
+       struct dma_fence_unwrap a_iter, b_iter;
+       unsigned int index, num_fences;
        struct sync_file *sync_file;
-       struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences;
-       int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences;
 
        sync_file = sync_file_alloc();
        if (!sync_file)
                return NULL;
 
-       a_fences = get_fences(a, &a_num_fences);
-       b_fences = get_fences(b, &b_num_fences);
-       if (a_num_fences > INT_MAX - b_num_fences)
-               goto err;
+       num_fences = 0;
+       dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence)
+               ++num_fences;
+       dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence)
+               ++num_fences;
 
-       num_fences = a_num_fences + b_num_fences;
+       if (num_fences > INT_MAX)
+               goto err_free_sync_file;
 
        fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
        if (!fences)
-               goto err;
+               goto err_free_sync_file;
 
        /*
-        * Assume sync_file a and b are both ordered and have no
-        * duplicates with the same context.
+        * We can't guarantee that fences in both a and b are ordered, but it is
+        * still quite likely.
         *
-        * If a sync_file can only be created with sync_file_merge
-        * and sync_file_create, this is a reasonable assumption.
+        * So attempt to order the fences as we pass over them and merge fences
+        * with the same context.
         */
-       for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
-               struct dma_fence *pt_a = a_fences[i_a];
-               struct dma_fence *pt_b = b_fences[i_b];
 
-               if (pt_a->context < pt_b->context) {
-                       add_fence(fences, &i, pt_a);
+       index = 0;
+       for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter),
+            b_fence = dma_fence_unwrap_first(b->fence, &b_iter);
+            a_fence || b_fence; ) {
+
+               if (!b_fence) {
+                       add_fence(fences, &index, a_fence);
+                       a_fence = dma_fence_unwrap_next(&a_iter);
+
+               } else if (!a_fence) {
+                       add_fence(fences, &index, b_fence);
+                       b_fence = dma_fence_unwrap_next(&b_iter);
+
+               } else if (a_fence->context < b_fence->context) {
+                       add_fence(fences, &index, a_fence);
+                       a_fence = dma_fence_unwrap_next(&a_iter);
 
-                       i_a++;
-               } else if (pt_a->context > pt_b->context) {
-                       add_fence(fences, &i, pt_b);
+               } else if (b_fence->context < a_fence->context) {
+                       add_fence(fences, &index, b_fence);
+                       b_fence = dma_fence_unwrap_next(&b_iter);
+
+               } else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno,
+                                               a_fence->ops)) {
+                       add_fence(fences, &index, a_fence);
+                       a_fence = dma_fence_unwrap_next(&a_iter);
+                       b_fence = dma_fence_unwrap_next(&b_iter);
 
-                       i_b++;
                } else {
-                       if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno,
-                                                pt_a->ops))
-                               add_fence(fences, &i, pt_a);
-                       else
-                               add_fence(fences, &i, pt_b);
-
-                       i_a++;
-                       i_b++;
+                       add_fence(fences, &index, b_fence);
+                       a_fence = dma_fence_unwrap_next(&a_iter);
+                       b_fence = dma_fence_unwrap_next(&b_iter);
                }
        }
 
-       for (; i_a < a_num_fences; i_a++)
-               add_fence(fences, &i, a_fences[i_a]);
-
-       for (; i_b < b_num_fences; i_b++)
-               add_fence(fences, &i, b_fences[i_b]);
-
-       if (i == 0)
-               fences[i++] = dma_fence_get(a_fences[0]);
+       if (index == 0)
+               fences[index++] = dma_fence_get_stub();
 
-       if (num_fences > i) {
-               nfences = krealloc_array(fences, i, sizeof(*fences), GFP_KERNEL);
-               if (!nfences)
-                       goto err;
+       if (num_fences > index) {
+               struct dma_fence **tmp;
 
-               fences = nfences;
+               /* Keep going even when reducing the size failed */
+               tmp = krealloc_array(fences, index, sizeof(*fences),
+                                    GFP_KERNEL);
+               if (tmp)
+                       fences = tmp;
        }
 
-       if (sync_file_set_fence(sync_file, fences, i) < 0)
-               goto err;
+       if (sync_file_set_fence(sync_file, fences, index) < 0)
+               goto err_put_fences;
 
        strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
        return sync_file;
 
-err:
-       while (i)
-               dma_fence_put(fences[--i]);
+err_put_fences:
+       while (index)
+               dma_fence_put(fences[--index]);
        kfree(fences);
+
+err_free_sync_file:
        fput(sync_file->file);
        return NULL;
-
 }
 
 static int sync_file_release(struct inode *inode, struct file *file)
@@ -398,11 +396,13 @@ static int sync_fill_fence_info(struct dma_fence *fence,
 static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
                                       unsigned long arg)
 {
-       struct sync_file_info info;
        struct sync_fence_info *fence_info = NULL;
-       struct dma_fence **fences;
+       struct dma_fence_unwrap iter;
+       struct sync_file_info info;
+       unsigned int num_fences;
+       struct dma_fence *fence;
+       int ret;
        __u32 size;
-       int num_fences, ret, i;
 
        if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
                return -EFAULT;
@@ -410,7 +410,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
        if (info.flags || info.pad)
                return -EINVAL;
 
-       fences = get_fences(sync_file, &num_fences);
+       num_fences = 0;
+       dma_fence_unwrap_for_each(fence, &iter, sync_file->fence)
+               ++num_fences;
 
        /*
         * Passing num_fences = 0 means that userspace doesn't want to
@@ -433,8 +435,11 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
        if (!fence_info)
                return -ENOMEM;
 
-       for (i = 0; i < num_fences; i++) {
-               int status = sync_fill_fence_info(fences[i], &fence_info[i]);
+       num_fences = 0;
+       dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) {
+               int status;
+
+               status = sync_fill_fence_info(fence, &fence_info[num_fences++]);
                info.status = info.status <= 0 ? info.status : status;
        }
 
index 1476156af74b44a0c6d3985d7c01ceff8c960d0c..def564d1e8faf714c167f25df26e109658117b29 100644 (file)
@@ -1453,7 +1453,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 {
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
-       struct at_xdmac_desc    *desc, *_desc;
+       struct at_xdmac_desc    *desc, *_desc, *iter;
        struct list_head        *descs_list;
        enum dma_status         ret;
        int                     residue, retry;
@@ -1568,11 +1568,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
         * microblock.
         */
        descs_list = &desc->descs_list;
-       list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
-               dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
-               residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
-               if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+       list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
+               dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
+               residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
+               if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
+                       desc = iter;
                        break;
+               }
        }
        residue += cur_ubc << dwidth;
 
index 329fc2e57b703630387a3e71d32e9ae052ef8c2c..33bc1e6c4cf2e74db9fbc54d4cfa8daadaff2ea8 100644 (file)
@@ -414,14 +414,18 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
                SET_CH_32(dw, chan->dir, chan->id, ch_control1,
                          (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
                /* Linked list */
+
                #ifdef CONFIG_64BIT
-                       SET_CH_64(dw, chan->dir, chan->id, llp.reg,
-                                 chunk->ll_region.paddr);
+               /* llp is not aligned on 64bit -> keep 32bit accesses */
+               SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+                         lower_32_bits(chunk->ll_region.paddr));
+               SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+                         upper_32_bits(chunk->ll_region.paddr));
                #else /* CONFIG_64BIT */
-                       SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
-                                 lower_32_bits(chunk->ll_region.paddr));
-                       SET_CH_32(dw, chan->dir, chan->id, llp.msb,
-                                 upper_32_bits(chunk->ll_region.paddr));
+               SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+                         lower_32_bits(chunk->ll_region.paddr));
+               SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+                         upper_32_bits(chunk->ll_region.paddr));
                #endif /* CONFIG_64BIT */
        }
        /* Doorbell */
index 3061fe857d69f53870b91ded79c81bac1d5e8d58..f652da6ab47df1b29147136fd17c390d353c9a79 100644 (file)
@@ -373,7 +373,6 @@ static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
 {
        lockdep_assert_held(&wq->wq_lock);
 
-       idxd_wq_disable_cleanup(wq);
        wq->size = 0;
        wq->group = NULL;
 }
@@ -701,14 +700,17 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
 
                if (wq->state == IDXD_WQ_ENABLED) {
                        idxd_wq_disable_cleanup(wq);
-                       idxd_wq_device_reset_cleanup(wq);
                        wq->state = IDXD_WQ_DISABLED;
                }
+               idxd_wq_device_reset_cleanup(wq);
        }
 }
 
 void idxd_device_clear_state(struct idxd_device *idxd)
 {
+       if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               return;
+
        idxd_groups_clear_state(idxd);
        idxd_engines_clear_state(idxd);
        idxd_device_wqs_clear_state(idxd);
index e289fd48711adae53f3e6d8460b7a8ddcde917f2..c01db23e3333f70316ea1bfbca99930fabfdc1cf 100644 (file)
@@ -150,14 +150,15 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
  */
 int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
 {
-       int rc, retries = 0;
+       unsigned int retries = wq->enqcmds_retries;
+       int rc;
 
        do {
                rc = enqcmds(portal, desc);
                if (rc == 0)
                        break;
                cpu_relax();
-       } while (retries++ < wq->enqcmds_retries);
+       } while (retries--);
 
        return rc;
 }
index 7e19ab92b61a879bddb19759d4a61b5a36513a63..dfd549685c467ac796c76873b4dca559f33e622c 100644 (file)
@@ -905,6 +905,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr
        u64 xfer_size;
        int rc;
 
+       if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               return -EPERM;
+
        if (wq->state != IDXD_WQ_DISABLED)
                return -EPERM;
 
@@ -939,6 +942,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu
        u64 batch_size;
        int rc;
 
+       if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               return -EPERM;
+
        if (wq->state != IDXD_WQ_DISABLED)
                return -EPERM;
 
index 70c0aa931ddf4a97406dcd74c7206c2f8ce0c8b2..6196a7b3956b1612ba5e15aafcbdf824038ba5aa 100644 (file)
@@ -198,12 +198,12 @@ struct sdma_script_start_addrs {
        s32 per_2_firi_addr;
        s32 mcu_2_firi_addr;
        s32 uart_2_per_addr;
-       s32 uart_2_mcu_ram_addr;
+       s32 uart_2_mcu_addr;
        s32 per_2_app_addr;
        s32 mcu_2_app_addr;
        s32 per_2_per_addr;
        s32 uartsh_2_per_addr;
-       s32 uartsh_2_mcu_ram_addr;
+       s32 uartsh_2_mcu_addr;
        s32 per_2_shp_addr;
        s32 mcu_2_shp_addr;
        s32 ata_2_mcu_addr;
@@ -232,8 +232,8 @@ struct sdma_script_start_addrs {
        s32 mcu_2_ecspi_addr;
        s32 mcu_2_sai_addr;
        s32 sai_2_mcu_addr;
-       s32 uart_2_mcu_addr;
-       s32 uartsh_2_mcu_addr;
+       s32 uart_2_mcu_rom_addr;
+       s32 uartsh_2_mcu_rom_addr;
        /* End of v3 array */
        s32 mcu_2_zqspi_addr;
        /* End of v4 array */
@@ -1796,17 +1796,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma,
                        saddr_arr[i] = addr_arr[i];
 
        /*
-        * get uart_2_mcu_addr/uartsh_2_mcu_addr rom script specially because
-        * they are now replaced by uart_2_mcu_ram_addr/uartsh_2_mcu_ram_addr
-        * to be compatible with legacy freescale/nxp sdma firmware, and they
-        * are located in the bottom part of sdma_script_start_addrs which are
-        * beyond the SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1.
+        * For compatibility with NXP internal legacy kernel before 4.19 which
+        * is based on uart ram script and mainline kernel based on uart rom
+        * script, both uart ram/rom scripts are present in newer sdma
+        * firmware. Use the rom versions if they are present (V3 or newer).
         */
-       if (addr->uart_2_mcu_addr)
-               sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_addr;
-       if (addr->uartsh_2_mcu_addr)
-               sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_addr;
-
+       if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) {
+               if (addr->uart_2_mcu_rom_addr)
+                       sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr;
+               if (addr->uartsh_2_mcu_rom_addr)
+                       sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr;
+       }
 }
 
 static void sdma_load_firmware(const struct firmware *fw, void *context)
@@ -1885,7 +1885,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
        u32 reg, val, shift, num_map, i;
        int ret = 0;
 
-       if (IS_ERR(np) || IS_ERR(gpr_np))
+       if (IS_ERR(np) || !gpr_np)
                goto out;
 
        event_remap = of_find_property(np, propname, NULL);
@@ -1933,7 +1933,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
        }
 
 out:
-       if (!IS_ERR(gpr_np))
+       if (gpr_np)
                of_node_put(gpr_np);
 
        return ret;
index 375e7e647df6b5093b156c2cd88e7f7f0e6798d3..a1517ef1f4a0185700343797ef05d8ef6810ed0a 100644 (file)
@@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
        unsigned int status;
        int ret;
 
-       ret = pm_runtime_get_sync(mtkd->ddev.dev);
+       ret = pm_runtime_resume_and_get(mtkd->ddev.dev);
        if (ret < 0) {
                pm_runtime_put_noidle(chan->device->dev);
                return ret;
@@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
        ret = readx_poll_timeout(readl, c->base + VFF_EN,
                          status, !status, 10, 100);
        if (ret)
-               return ret;
+               goto err_pm;
 
        ret = request_irq(c->irq, mtk_uart_apdma_irq_handler,
                          IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan);
        if (ret < 0) {
                dev_err(chan->device->dev, "Can't request dma IRQ\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err_pm;
        }
 
        if (mtkd->support_33bits)
                mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
 
+err_pm:
+       pm_runtime_put_noidle(mtkd->ddev.dev);
        return ret;
 }
 
index 58ab63642e72c22c39b904f85620eb2da2a54edf..d3e2477948c87b2b67d1fc6ec3d54f7a6480d6f0 100644 (file)
@@ -55,6 +55,7 @@ config EDAC_DECODE_MCE
 config EDAC_GHES
        bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
        depends on ACPI_APEI_GHES && (EDAC=y)
+       select UEFI_CPER
        help
          Not all machines support hardware-driven error report. Some of those
          provide a BIOS-driven error report mechanism via ACPI, using the
@@ -484,7 +485,7 @@ config EDAC_ARMADA_XP
 
 config EDAC_SYNOPSYS
        tristate "Synopsys DDR Memory Controller"
-       depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA
+       depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || ARCH_MXC
        help
          Support for error detection and correction on the Synopsys DDR
          memory controller.
index b1f46a974b9e0001b5333ae8c00fa49b4eafb271..038abbb83f4bce8f37b49f2f20e94bc690aa45da 100644 (file)
@@ -286,17 +286,10 @@ static int axp_mc_probe(struct platform_device *pdev)
        struct edac_mc_layer layers[1];
        const struct of_device_id *id;
        struct mem_ctl_info *mci;
-       struct resource *r;
        void __iomem *base;
        uint32_t config;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r) {
-               dev_err(&pdev->dev, "Unable to get mem resource\n");
-               return -ENODEV;
-       }
-
-       base = devm_ioremap_resource(&pdev->dev, r);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base)) {
                dev_err(&pdev->dev, "Unable to map regs\n");
                return PTR_ERR(base);
@@ -516,15 +509,8 @@ static int aurora_l2_probe(struct platform_device *pdev)
        const struct of_device_id *id;
        uint32_t l2x0_aux_ctrl;
        void __iomem *base;
-       struct resource *r;
-
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r) {
-               dev_err(&pdev->dev, "Unable to get mem resource\n");
-               return -ENODEV;
-       }
 
-       base = devm_ioremap_resource(&pdev->dev, r);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base)) {
                dev_err(&pdev->dev, "Unable to map regs\n");
                return PTR_ERR(base);
index b8a7d9594afd42babdb0e93e6cfd94b41f562f6c..1fa5ca57e9ec196cace56662a564ae679e37aea9 100644 (file)
@@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev)
        dev = &pdev->dev;
 
        for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
-               irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
+               irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name);
                irqs[idx] = irq;
                masks[idx] = dmc520_irq_configs[idx].mask;
                if (irq >= 0) {
index 8c4d947fb8486bab37ab82b2a4937c97d52e4043..19522c568aa5dea572f5d2c34eac44905ed1f142 100644 (file)
@@ -47,99 +47,67 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
 }
 #endif                         /* CONFIG_EDAC_DEBUG */
 
-struct edac_device_ctl_info *edac_device_alloc_ctl_info(
-       unsigned sz_private,
-       char *edac_device_name, unsigned nr_instances,
-       char *edac_block_name, unsigned nr_blocks,
-       unsigned offset_value,          /* zero, 1, or other based offset */
-       struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib,
-       int device_index)
+/*
+ * @off_val: zero, 1, or other based offset
+ */
+struct edac_device_ctl_info *
+edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances,
+                          char *blk_name, unsigned nr_blocks, unsigned off_val,
+                          struct edac_dev_sysfs_block_attribute *attrib_spec,
+                          unsigned nr_attrib, int device_index)
 {
-       struct edac_device_ctl_info *dev_ctl;
-       struct edac_device_instance *dev_inst, *inst;
-       struct edac_device_block *dev_blk, *blk_p, *blk;
        struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib;
-       unsigned total_size;
-       unsigned count;
+       struct edac_device_block *dev_blk, *blk_p, *blk;
+       struct edac_device_instance *dev_inst, *inst;
+       struct edac_device_ctl_info *dev_ctl;
        unsigned instance, block, attr;
-       void *pvt, *p;
+       void *pvt;
        int err;
 
        edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
 
-       /* Calculate the size of memory we need to allocate AND
-        * determine the offsets of the various item arrays
-        * (instance,block,attrib) from the start of an  allocated structure.
-        * We want the alignment of each item  (instance,block,attrib)
-        * to be at least as stringent as what the compiler would
-        * provide if we could simply hardcode everything into a single struct.
-        */
-       p = NULL;
-       dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
+       dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL);
+       if (!dev_ctl)
+               return NULL;
 
-       /* Calc the 'end' offset past end of ONE ctl_info structure
-        * which will become the start of the 'instance' array
-        */
-       dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
+       dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL);
+       if (!dev_inst)
+               goto free;
 
-       /* Calc the 'end' offset past the instance array within the ctl_info
-        * which will become the start of the block array
-        */
-       count = nr_instances * nr_blocks;
-       dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
+       dev_ctl->instances = dev_inst;
 
-       /* Calc the 'end' offset past the dev_blk array
-        * which will become the start of the attrib array, if any.
-        */
-       /* calc how many nr_attrib we need */
-       if (nr_attrib > 0)
-               count *= nr_attrib;
-       dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
+       dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL);
+       if (!dev_blk)
+               goto free;
 
-       /* Calc the 'end' offset past the attributes array */
-       pvt = edac_align_ptr(&p, sz_private, 1);
+       dev_ctl->blocks = dev_blk;
 
-       /* 'pvt' now points to where the private data area is.
-        * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
-        * is baselined at ZERO
-        */
-       total_size = ((unsigned long)pvt) + sz_private;
+       if (nr_attrib) {
+               dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute),
+                                    GFP_KERNEL);
+               if (!dev_attrib)
+                       goto free;
 
-       /* Allocate the amount of memory for the set of control structures */
-       dev_ctl = kzalloc(total_size, GFP_KERNEL);
-       if (dev_ctl == NULL)
-               return NULL;
+               dev_ctl->attribs = dev_attrib;
+       }
 
-       /* Adjust pointers so they point within the actual memory we
-        * just allocated rather than an imaginary chunk of memory
-        * located at address 0.
-        * 'dev_ctl' points to REAL memory, while the others are
-        * ZERO based and thus need to be adjusted to point within
-        * the allocated memory.
-        */
-       dev_inst = (struct edac_device_instance *)
-               (((char *)dev_ctl) + ((unsigned long)dev_inst));
-       dev_blk = (struct edac_device_block *)
-               (((char *)dev_ctl) + ((unsigned long)dev_blk));
-       dev_attrib = (struct edac_dev_sysfs_block_attribute *)
-               (((char *)dev_ctl) + ((unsigned long)dev_attrib));
-       pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL;
-
-       /* Begin storing the information into the control info structure */
-       dev_ctl->dev_idx = device_index;
-       dev_ctl->nr_instances = nr_instances;
-       dev_ctl->instances = dev_inst;
-       dev_ctl->pvt_info = pvt;
+       if (pvt_sz) {
+               pvt = kzalloc(pvt_sz, GFP_KERNEL);
+               if (!pvt)
+                       goto free;
+
+               dev_ctl->pvt_info = pvt;
+       }
+
+       dev_ctl->dev_idx        = device_index;
+       dev_ctl->nr_instances   = nr_instances;
 
        /* Default logging of CEs and UEs */
        dev_ctl->log_ce = 1;
        dev_ctl->log_ue = 1;
 
        /* Name of this edac device */
-       snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name);
-
-       edac_dbg(4, "edac_dev=%p next after end=%p\n",
-                dev_ctl, pvt + sz_private);
+       snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name);
 
        /* Initialize every Instance */
        for (instance = 0; instance < nr_instances; instance++) {
@@ -150,15 +118,14 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
                inst->blocks = blk_p;
 
                /* name of this instance */
-               snprintf(inst->name, sizeof(inst->name),
-                        "%s%u", edac_device_name, instance);
+               snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance);
 
                /* Initialize every block in each instance */
                for (block = 0; block < nr_blocks; block++) {
                        blk = &blk_p[block];
                        blk->instance = inst;
                        snprintf(blk->name, sizeof(blk->name),
-                                "%s%d", edac_block_name, block+offset_value);
+                                "%s%d", blk_name, block + off_val);
 
                        edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
                                 instance, inst, block, blk, blk->name);
@@ -210,10 +177,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
         * Initialize the 'root' kobj for the edac_device controller
         */
        err = edac_device_register_sysfs_main_kobj(dev_ctl);
-       if (err) {
-               kfree(dev_ctl);
-               return NULL;
-       }
+       if (err)
+               goto free;
 
        /* at this point, the root kobj is valid, and in order to
         * 'free' the object, then the function:
@@ -223,6 +188,11 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
         */
 
        return dev_ctl;
+
+free:
+       __edac_device_free_ctl_info(dev_ctl);
+
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
 
index fc2d2c2180649d239ad3befdb75a27782249e769..3f44e6b9d387f961d615db4a435edcfdc2faef11 100644 (file)
@@ -216,6 +216,8 @@ struct edac_device_ctl_info {
         */
        u32 nr_instances;
        struct edac_device_instance *instances;
+       struct edac_device_block *blocks;
+       struct edac_dev_sysfs_block_attribute *attribs;
 
        /* Event counters for the this whole EDAC Device */
        struct edac_device_counter counters;
@@ -348,4 +350,16 @@ edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
  */
 extern int edac_device_alloc_index(void);
 extern const char *edac_layer_name[];
+
+/* Free the actual struct */
+static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci)
+{
+       if (ci) {
+               kfree(ci->pvt_info);
+               kfree(ci->attribs);
+               kfree(ci->blocks);
+               kfree(ci->instances);
+               kfree(ci);
+       }
+}
 #endif
index 9a61d92bdf42045e6d02d62ee10b4febc1864e45..ac678b4a21fcbfd4d933d6be602594a80be81151 100644 (file)
@@ -208,10 +208,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
        /* decrement the EDAC CORE module ref count */
        module_put(edac_dev->owner);
 
-       /* free the control struct containing the 'main' kobj
-        * passed in to this routine
-        */
-       kfree(edac_dev);
+       __edac_device_free_ctl_info(edac_dev);
 }
 
 /* ktype for the main (master) kobject */
index d2715774af6fb3a0b07ff2c6ce6bb510091c0a68..eb58644bb0190630af007f2ad10363b3bb5cde6c 100644 (file)
@@ -170,61 +170,6 @@ const char * const edac_mem_types[] = {
 };
 EXPORT_SYMBOL_GPL(edac_mem_types);
 
-/**
- * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
- * @p:         pointer to a pointer with the memory offset to be used. At
- *             return, this will be incremented to point to the next offset
- * @size:      Size of the data structure to be reserved
- * @n_elems:   Number of elements that should be reserved
- *
- * If 'size' is a constant, the compiler will optimize this whole function
- * down to either a no-op or the addition of a constant to the value of '*p'.
- *
- * The 'p' pointer is absolutely needed to keep the proper advancing
- * further in memory to the proper offsets when allocating the struct along
- * with its embedded structs, as edac_device_alloc_ctl_info() does it
- * above, for example.
- *
- * At return, the pointer 'p' will be incremented to be used on a next call
- * to this function.
- */
-void *edac_align_ptr(void **p, unsigned int size, int n_elems)
-{
-       unsigned int align, r;
-       void *ptr = *p;
-
-       *p += size * n_elems;
-
-       /*
-        * 'p' can possibly be an unaligned item X such that sizeof(X) is
-        * 'size'.  Adjust 'p' so that its alignment is at least as
-        * stringent as what the compiler would provide for X and return
-        * the aligned result.
-        * Here we assume that the alignment of a "long long" is the most
-        * stringent alignment that the compiler will ever provide by default.
-        * As far as I know, this is a reasonable assumption.
-        */
-       if (size > sizeof(long))
-               align = sizeof(long long);
-       else if (size > sizeof(int))
-               align = sizeof(long);
-       else if (size > sizeof(short))
-               align = sizeof(int);
-       else if (size > sizeof(char))
-               align = sizeof(short);
-       else
-               return ptr;
-
-       r = (unsigned long)ptr % align;
-
-       if (r == 0)
-               return ptr;
-
-       *p += align - r;
-
-       return (void *)(((unsigned long)ptr) + align - r);
-}
-
 static void _edac_mc_free(struct mem_ctl_info *mci)
 {
        put_device(&mci->dev);
@@ -257,6 +202,8 @@ static void mci_release(struct device *dev)
                }
                kfree(mci->csrows);
        }
+       kfree(mci->pvt_info);
+       kfree(mci->layers);
        kfree(mci);
 }
 
@@ -392,9 +339,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 {
        struct mem_ctl_info *mci;
        struct edac_mc_layer *layer;
-       unsigned int idx, size, tot_dimms = 1;
+       unsigned int idx, tot_dimms = 1;
        unsigned int tot_csrows = 1, tot_channels = 1;
-       void *pvt, *ptr = NULL;
        bool per_rank = false;
 
        if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -416,41 +362,25 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
                        per_rank = true;
        }
 
-       /* Figure out the offsets of the various items from the start of an mc
-        * structure.  We want the alignment of each item to be at least as
-        * stringent as what the compiler would provide if we could simply
-        * hardcode everything into a single struct.
-        */
-       mci     = edac_align_ptr(&ptr, sizeof(*mci), 1);
-       layer   = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
-       pvt     = edac_align_ptr(&ptr, sz_pvt, 1);
-       size    = ((unsigned long)pvt) + sz_pvt;
-
-       edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
-                size,
-                tot_dimms,
-                per_rank ? "ranks" : "dimms",
-                tot_csrows * tot_channels);
-
-       mci = kzalloc(size, GFP_KERNEL);
-       if (mci == NULL)
+       mci = kzalloc(sizeof(struct mem_ctl_info), GFP_KERNEL);
+       if (!mci)
                return NULL;
 
+       mci->layers = kcalloc(n_layers, sizeof(struct edac_mc_layer), GFP_KERNEL);
+       if (!mci->layers)
+               goto error;
+
+       mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
+       if (!mci->pvt_info)
+               goto error;
+
        mci->dev.release = mci_release;
        device_initialize(&mci->dev);
 
-       /* Adjust pointers so they point within the memory we just allocated
-        * rather than an imaginary chunk of memory located at address 0.
-        */
-       layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
-       pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
-
        /* setup index and various internal pointers */
        mci->mc_idx = mc_num;
        mci->tot_dimms = tot_dimms;
-       mci->pvt_info = pvt;
        mci->n_layers = n_layers;
-       mci->layers = layer;
        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
        mci->nr_csrows = tot_csrows;
        mci->num_cschannel = tot_channels;
index aa1f91688eb8e0dacd482eda115881252ae5ad0e..96f6de0c8ff6f221bbdbdbc47d8e40b8fb58c9c2 100644 (file)
@@ -59,8 +59,6 @@ extern void edac_device_reset_delay_period(struct edac_device_ctl_info
                                           *edac_dev, unsigned long value);
 extern void edac_mc_reset_delay_period(unsigned long value);
 
-extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
-
 /*
  * EDAC debugfs functions
  */
index 48c844a72a278bc13e72e0af0674b53f3fe06e04..2205d7e731dbaec4f9998c3f0fa06fe6c6160a1e 100644 (file)
@@ -29,32 +29,31 @@ static LIST_HEAD(edac_pci_list);
 static atomic_t pci_indexes = ATOMIC_INIT(0);
 
 struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
-                                               const char *edac_pci_name)
+                                                 const char *edac_pci_name)
 {
        struct edac_pci_ctl_info *pci;
-       void *p = NULL, *pvt;
-       unsigned int size;
 
        edac_dbg(1, "\n");
 
-       pci = edac_align_ptr(&p, sizeof(*pci), 1);
-       pvt = edac_align_ptr(&p, 1, sz_pvt);
-       size = ((unsigned long)pvt) + sz_pvt;
-
-       /* Alloc the needed control struct memory */
-       pci = kzalloc(size, GFP_KERNEL);
-       if (pci  == NULL)
+       pci = kzalloc(sizeof(struct edac_pci_ctl_info), GFP_KERNEL);
+       if (!pci)
                return NULL;
 
-       /* Now much private space */
-       pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL;
+       if (sz_pvt) {
+               pci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL);
+               if (!pci->pvt_info)
+                       goto free;
+       }
 
-       pci->pvt_info = pvt;
        pci->op_state = OP_ALLOC;
 
        snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name);
 
        return pci;
+
+free:
+       kfree(pci);
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
 
index 6d1ddecbf0da3606c73e1139cd01c92af1ede5ba..59b0bedc9c242ab70cad0a21ad5700f72abaca76 100644 (file)
 #include "edac_module.h"
 #include <ras/ras_event.h>
 
+#define OTHER_DETAIL_LEN       400
+
 struct ghes_pvt {
        struct mem_ctl_info *mci;
 
        /* Buffers for the error handling routine */
-       char other_detail[400];
+       char other_detail[OTHER_DETAIL_LEN];
        char msg[80];
 };
 
@@ -36,7 +38,7 @@ static struct ghes_pvt *ghes_pvt;
  * This driver's representation of the system hardware, as collected
  * from DMI.
  */
-struct ghes_hw_desc {
+static struct ghes_hw_desc {
        int num_dimms;
        struct dimm_info *dimms;
 } ghes_hw;
@@ -235,8 +237,34 @@ static void ghes_scan_system(void)
        system_scanned = true;
 }
 
+static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg,
+                                       const char *location, unsigned int len)
+{
+       u32 n;
+
+       if (!msg)
+               return 0;
+
+       n = 0;
+       len -= 1;
+
+       n += scnprintf(msg + n, len - n, "APEI location: %s ", location);
+
+       if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS))
+               goto out;
+
+       n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status);
+       n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status));
+
+out:
+       msg[n] = '\0';
+
+       return n;
+}
+
 void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 {
+       struct cper_mem_err_compact cmem;
        struct edac_raw_error_desc *e;
        struct mem_ctl_info *mci;
        struct ghes_pvt *pvt;
@@ -292,60 +320,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* Error type, mapped on e->msg */
        if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+               u8 etype = mem_err->error_type;
+
                p = pvt->msg;
-               switch (mem_err->error_type) {
-               case 0:
-                       p += sprintf(p, "Unknown");
-                       break;
-               case 1:
-                       p += sprintf(p, "No error");
-                       break;
-               case 2:
-                       p += sprintf(p, "Single-bit ECC");
-                       break;
-               case 3:
-                       p += sprintf(p, "Multi-bit ECC");
-                       break;
-               case 4:
-                       p += sprintf(p, "Single-symbol ChipKill ECC");
-                       break;
-               case 5:
-                       p += sprintf(p, "Multi-symbol ChipKill ECC");
-                       break;
-               case 6:
-                       p += sprintf(p, "Master abort");
-                       break;
-               case 7:
-                       p += sprintf(p, "Target abort");
-                       break;
-               case 8:
-                       p += sprintf(p, "Parity Error");
-                       break;
-               case 9:
-                       p += sprintf(p, "Watchdog timeout");
-                       break;
-               case 10:
-                       p += sprintf(p, "Invalid address");
-                       break;
-               case 11:
-                       p += sprintf(p, "Mirror Broken");
-                       break;
-               case 12:
-                       p += sprintf(p, "Memory Sparing");
-                       break;
-               case 13:
-                       p += sprintf(p, "Scrub corrected error");
-                       break;
-               case 14:
-                       p += sprintf(p, "Scrub uncorrected error");
-                       break;
-               case 15:
-                       p += sprintf(p, "Physical Memory Map-out event");
-                       break;
-               default:
-                       p += sprintf(p, "reserved error (%d)",
-                                    mem_err->error_type);
-               }
+               p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
        } else {
                strcpy(pvt->msg, "unknown error");
        }
@@ -362,52 +340,19 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* Memory error location, mapped on e->location */
        p = e->location;
-       if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
-               p += sprintf(p, "node:%d ", mem_err->node);
-       if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
-               p += sprintf(p, "card:%d ", mem_err->card);
-       if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
-               p += sprintf(p, "module:%d ", mem_err->module);
-       if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-               p += sprintf(p, "rank:%d ", mem_err->rank);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
-               p += sprintf(p, "bank:%d ", mem_err->bank);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
-               p += sprintf(p, "bank_group:%d ",
-                            mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
-               p += sprintf(p, "bank_address:%d ",
-                            mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
-       if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
-               u32 row = mem_err->row;
-
-               row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
-               p += sprintf(p, "row:%d ", row);
-       }
-       if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
-               p += sprintf(p, "col:%d ", mem_err->column);
-       if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-               p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+       cper_mem_err_pack(mem_err, &cmem);
+       p += cper_mem_err_location(&cmem, p);
+
        if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
-               const char *bank = NULL, *device = NULL;
                struct dimm_info *dimm;
 
-               dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
-               if (bank != NULL && device != NULL)
-                       p += sprintf(p, "DIMM location:%s %s ", bank, device);
-               else
-                       p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
-                                    mem_err->mem_dev_handle);
-
+               p += cper_dimm_err_location(&cmem, p);
                dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
                if (dimm) {
                        e->top_layer = dimm->idx;
                        strcpy(e->label, dimm->label);
                }
        }
-       if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
-               p += sprintf(p, "chipID: %d ",
-                            mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
        if (p > e->location)
                *(p - 1) = '\0';
 
@@ -416,78 +361,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
        /* All other fields are mapped on e->other_detail */
        p = pvt->other_detail;
-       p += snprintf(p, sizeof(pvt->other_detail),
-               "APEI location: %s ", e->location);
-       if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
-               u64 status = mem_err->error_status;
-
-               p += sprintf(p, "status(0x%016llx): ", (long long)status);
-               switch ((status >> 8) & 0xff) {
-               case 1:
-                       p += sprintf(p, "Error detected internal to the component ");
-                       break;
-               case 16:
-                       p += sprintf(p, "Error detected in the bus ");
-                       break;
-               case 4:
-                       p += sprintf(p, "Storage error in DRAM memory ");
-                       break;
-               case 5:
-                       p += sprintf(p, "Storage error in TLB ");
-                       break;
-               case 6:
-                       p += sprintf(p, "Storage error in cache ");
-                       break;
-               case 7:
-                       p += sprintf(p, "Error in one or more functional units ");
-                       break;
-               case 8:
-                       p += sprintf(p, "component failed self test ");
-                       break;
-               case 9:
-                       p += sprintf(p, "Overflow or undervalue of internal queue ");
-                       break;
-               case 17:
-                       p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
-                       break;
-               case 18:
-                       p += sprintf(p, "Improper access error ");
-                       break;
-               case 19:
-                       p += sprintf(p, "Access to a memory address which is not mapped to any component ");
-                       break;
-               case 20:
-                       p += sprintf(p, "Loss of Lockstep ");
-                       break;
-               case 21:
-                       p += sprintf(p, "Response not associated with a request ");
-                       break;
-               case 22:
-                       p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
-                       break;
-               case 23:
-                       p += sprintf(p, "Detection of a PATH_ERROR ");
-                       break;
-               case 25:
-                       p += sprintf(p, "Bus operation timeout ");
-                       break;
-               case 26:
-                       p += sprintf(p, "A read was issued to data that has been poisoned ");
-                       break;
-               default:
-                       p += sprintf(p, "reserved ");
-                       break;
-               }
-       }
-       if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-               p += sprintf(p, "requestorID: 0x%016llx ",
-                            (long long)mem_err->requestor_id);
-       if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-               p += sprintf(p, "responderID: 0x%016llx ",
-                            (long long)mem_err->responder_id);
-       if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
-               p += sprintf(p, "targetID: 0x%016llx ",
-                            (long long)mem_err->responder_id);
+       p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN);
        if (p > pvt->other_detail)
                *(p - 1) = '\0';
 
index 324a46b8479b0f96879ac2067b56a0c456621ebc..f5d82518c15e5b0138cef472b0de1349c5198171 100644 (file)
@@ -244,11 +244,6 @@ static inline u32 i5100_nrecmema_rank(u32 a)
        return a >>  8 & ((1 << 3) - 1);
 }
 
-static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
-{
-       return a & ((1 << 8) - 1);
-}
-
 static inline u32 i5100_nrecmemb_cas(u32 a)
 {
        return a >> 16 & ((1 << 13) - 1);
index 67f7bc3fe5b3b623b108fddc8cb57ec6ba304ade..5bf92298554d6c2d51a44622d8bf33d320d77826 100644 (file)
@@ -609,13 +609,6 @@ static int mpc85xx_l2_err_remove(struct platform_device *op)
 }
 
 static const struct of_device_id mpc85xx_l2_err_of_match[] = {
-/* deprecate the fsl,85.. forms in the future, 2.6.30? */
-       { .compatible = "fsl,8540-l2-cache-controller", },
-       { .compatible = "fsl,8541-l2-cache-controller", },
-       { .compatible = "fsl,8544-l2-cache-controller", },
-       { .compatible = "fsl,8548-l2-cache-controller", },
-       { .compatible = "fsl,8555-l2-cache-controller", },
-       { .compatible = "fsl,8568-l2-cache-controller", },
        { .compatible = "fsl,mpc8536-l2-cache-controller", },
        { .compatible = "fsl,mpc8540-l2-cache-controller", },
        { .compatible = "fsl,mpc8541-l2-cache-controller", },
@@ -644,13 +637,6 @@ static struct platform_driver mpc85xx_l2_err_driver = {
 };
 
 static const struct of_device_id mpc85xx_mc_err_of_match[] = {
-/* deprecate the fsl,85.. forms in the future, 2.6.30? */
-       { .compatible = "fsl,8540-memory-controller", },
-       { .compatible = "fsl,8541-memory-controller", },
-       { .compatible = "fsl,8544-memory-controller", },
-       { .compatible = "fsl,8548-memory-controller", },
-       { .compatible = "fsl,8555-memory-controller", },
-       { .compatible = "fsl,8568-memory-controller", },
        { .compatible = "fsl,mpc8536-memory-controller", },
        { .compatible = "fsl,mpc8540-memory-controller", },
        { .compatible = "fsl,mpc8541-memory-controller", },
index f05ff02c0656ea3b785e2f70289c3fe05f63408d..1cee64b80a7e085ee0c3503aa71d30a7f3cf4f23 100644 (file)
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Synopsys DDR ECC Driver
  * This driver is based on ppc4xx_edac.c drivers
  *
  * Copyright (C) 2012 - 2014 Xilinx, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details
  */
 
 #include <linux/edac.h>
 #define ECC_STAT_CECNT_SHIFT           8
 #define ECC_STAT_BITNUM_MASK           0x7F
 
+/* ECC error count register definitions */
+#define ECC_ERRCNT_UECNT_MASK          0xFFFF0000
+#define ECC_ERRCNT_UECNT_SHIFT         16
+#define ECC_ERRCNT_CECNT_MASK          0xFFFF
+
 /* DDR QOS Interrupt register definitions */
 #define DDR_QOS_IRQ_STAT_OFST          0x20200
 #define DDR_QOSUE_MASK                 0x4
@@ -423,15 +415,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
        base = priv->baseaddr;
        p = &priv->stat;
 
+       regval = readl(base + ECC_ERRCNT_OFST);
+       p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
+       p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
+       if (!p->ce_cnt)
+               goto ue_err;
+
        regval = readl(base + ECC_STAT_OFST);
        if (!regval)
                return 1;
 
-       p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
-       p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
-       if (!p->ce_cnt)
-               goto ue_err;
-
        p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
 
        regval = readl(base + ECC_CEADDR0_OFST);
index 7197f9fa024572f72aec324af64cd382820ffac1..54081403db4fd20352202e77161704f725e3b434 100644 (file)
@@ -501,7 +501,7 @@ static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET          0x0804
 
 /*
- * Processor Module Domain (PMD) context - Context for a pair of processsors.
+ * Processor Module Domain (PMD) context - Context for a pair of processors.
  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
  * its own L1 cache.
  */
index 54be88167c60bd3be3f159cfba6fa19dc76fb77c..f3b3953cac834472bfb329ddd480e279d789ddec 100644 (file)
@@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release);
 void fw_core_remove_card(struct fw_card *card)
 {
        struct fw_card_driver dummy_driver = dummy_driver_template;
+       unsigned long flags;
 
        card->driver->update_phy_reg(card, 4,
                                     PHY_LINK_ACTIVE | PHY_CONTENDER, 0);
@@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card)
        dummy_driver.stop_iso           = card->driver->stop_iso;
        card->driver = &dummy_driver;
 
+       spin_lock_irqsave(&card->lock, flags);
        fw_destroy_nodes(card);
+       spin_unlock_irqrestore(&card->lock, flags);
 
        /* Wait for all users, especially device workqueue jobs, to finish. */
        fw_card_put(card);
index 9f89c17730b124efe5e20ffb7d3cc6bba061cc38..708e417200f4675c7e3c1f3cc0152c5695a6fda7 100644 (file)
@@ -1500,6 +1500,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
 {
        struct outbound_phy_packet_event *e =
                container_of(packet, struct outbound_phy_packet_event, p);
+       struct client *e_client;
 
        switch (status) {
        /* expected: */
@@ -1516,9 +1517,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
        }
        e->phy_packet.data[0] = packet->timestamp;
 
+       e_client = e->client;
        queue_event(e->client, &e->event, &e->phy_packet,
                    sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0);
-       client_put(e->client);
+       client_put(e_client);
 }
 
 static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
index b63d55f5ebd336c37c1bd0f3de9643be5b891ad5..f40c81534381219b1d968a4351391eb27ca02758 100644 (file)
@@ -375,16 +375,13 @@ static void report_found_node(struct fw_card *card,
        card->bm_retries = 0;
 }
 
+/* Must be called with card->lock held */
 void fw_destroy_nodes(struct fw_card *card)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&card->lock, flags);
        card->color++;
        if (card->local_node != NULL)
                for_each_fw_node(card, card->local_node, report_lost_node);
        card->local_node = NULL;
-       spin_unlock_irqrestore(&card->lock, flags);
 }
 
 static void move_tree(struct fw_node *node0, struct fw_node *node1, int port)
@@ -510,6 +507,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
        struct fw_node *local_node;
        unsigned long flags;
 
+       spin_lock_irqsave(&card->lock, flags);
+
        /*
         * If the selfID buffer is not the immediate successor of the
         * previously processed one, we cannot reliably compare the
@@ -521,8 +520,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
                card->bm_retries = 0;
        }
 
-       spin_lock_irqsave(&card->lock, flags);
-
        card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated;
        card->node_id = node_id;
        /*
index ac487c96bb717f9e76f2a6d092c6ff0af8515dd2..6c20815cc8d1689198ee9259f5db4d97bfae0c32 100644 (file)
@@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t)
 static int close_transaction(struct fw_transaction *transaction,
                             struct fw_card *card, int rcode)
 {
-       struct fw_transaction *t;
+       struct fw_transaction *t = NULL, *iter;
        unsigned long flags;
 
        spin_lock_irqsave(&card->lock, flags);
-       list_for_each_entry(t, &card->transaction_list, link) {
-               if (t == transaction) {
-                       if (!try_cancel_split_timeout(t)) {
+       list_for_each_entry(iter, &card->transaction_list, link) {
+               if (iter == transaction) {
+                       if (!try_cancel_split_timeout(iter)) {
                                spin_unlock_irqrestore(&card->lock, flags);
                                goto timed_out;
                        }
-                       list_del_init(&t->link);
-                       card->tlabel_mask &= ~(1ULL << t->tlabel);
+                       list_del_init(&iter->link);
+                       card->tlabel_mask &= ~(1ULL << iter->tlabel);
+                       t = iter;
                        break;
                }
        }
        spin_unlock_irqrestore(&card->lock, flags);
 
-       if (&t->link != &card->transaction_list) {
+       if (t) {
                t->callback(card, rcode, NULL, 0, t->callback_data);
                return 0;
        }
@@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
 {
-       struct fw_transaction *t;
+       struct fw_transaction *t = NULL, *iter;
        unsigned long flags;
        u32 *data;
        size_t data_length;
@@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
        rcode   = HEADER_GET_RCODE(p->header[1]);
 
        spin_lock_irqsave(&card->lock, flags);
-       list_for_each_entry(t, &card->transaction_list, link) {
-               if (t->node_id == source && t->tlabel == tlabel) {
-                       if (!try_cancel_split_timeout(t)) {
+       list_for_each_entry(iter, &card->transaction_list, link) {
+               if (iter->node_id == source && iter->tlabel == tlabel) {
+                       if (!try_cancel_split_timeout(iter)) {
                                spin_unlock_irqrestore(&card->lock, flags);
                                goto timed_out;
                        }
-                       list_del_init(&t->link);
-                       card->tlabel_mask &= ~(1ULL << t->tlabel);
+                       list_del_init(&iter->link);
+                       card->tlabel_mask &= ~(1ULL << iter->tlabel);
+                       t = iter;
                        break;
                }
        }
        spin_unlock_irqrestore(&card->lock, flags);
 
-       if (&t->link == &card->transaction_list) {
+       if (!t) {
  timed_out:
                fw_notice(card, "unsolicited response (source %x, tlabel %x)\n",
                          source, tlabel);
index 85cd379fd383883aeec4cbd58961282f5b71d1a4..60051c0cabeaa88063f505ed8fdc222e64d62912 100644 (file)
@@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
                              void *payload, size_t length, void *callback_data)
 {
        struct sbp2_logical_unit *lu = callback_data;
-       struct sbp2_orb *orb;
+       struct sbp2_orb *orb = NULL, *iter;
        struct sbp2_status status;
        unsigned long flags;
 
@@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
 
        /* Lookup the orb corresponding to this status write. */
        spin_lock_irqsave(&lu->tgt->lock, flags);
-       list_for_each_entry(orb, &lu->orb_list, link) {
+       list_for_each_entry(iter, &lu->orb_list, link) {
                if (STATUS_GET_ORB_HIGH(status) == 0 &&
-                   STATUS_GET_ORB_LOW(status) == orb->request_bus) {
-                       orb->rcode = RCODE_COMPLETE;
-                       list_del(&orb->link);
+                   STATUS_GET_ORB_LOW(status) == iter->request_bus) {
+                       iter->rcode = RCODE_COMPLETE;
+                       list_del(&iter->link);
+                       orb = iter;
                        break;
                }
        }
        spin_unlock_irqrestore(&lu->tgt->lock, flags);
 
-       if (&orb->link != &lu->orb_list) {
+       if (orb) {
                orb->callback(orb, &status);
                kref_put(&orb->kref, free_orb); /* orb callback reference */
        } else {
index cf6fed6dec773b74c810309fc7c564f50abed93c..45600acc0f455e55be81b75ec0cbebe3fc9a119e 100644 (file)
@@ -49,7 +49,7 @@ struct scmi_msg_resp_clock_describe_rates {
        struct {
                __le32 value_low;
                __le32 value_high;
-       } rate[0];
+       } rate[];
 #define RATE_TO_U64(X)         \
 ({                             \
        typeof(X) x = (X);      \
@@ -210,7 +210,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id,
 
        if (rate_discrete && rate) {
                clk->list.num_rates = tot_rate_cnt;
-               sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL);
+               sort(clk->list.rates, tot_rate_cnt, sizeof(*rate),
+                    rate_cmp_func, NULL);
        }
 
        clk->rate_discrete = rate_discrete;
index 46118300a4d1f8be0f5dcedc8d769f571b44dafe..e17c6568344d5bed9717d978ed245a11dce509ba 100644 (file)
@@ -679,7 +679,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo,
 
        xfer = scmi_xfer_command_acquire(cinfo, msg_hdr);
        if (IS_ERR(xfer)) {
-               scmi_clear_channel(info, cinfo);
+               if (MSG_XTRACT_TYPE(msg_hdr) == MSG_TYPE_DELAYED_RESP)
+                       scmi_clear_channel(info, cinfo);
                return;
        }
 
index 734f1eeee1617300b9f866ddf345d5f72478028b..8302a2b4aeeb175c10f0ec92ec78d71fb7c345e4 100644 (file)
@@ -405,8 +405,8 @@ static int scmi_optee_chan_free(int id, void *p, void *data)
        return 0;
 }
 
-static struct scmi_shared_mem *get_channel_shm(struct scmi_optee_channel *chan,
-                                              struct scmi_xfer *xfer)
+static struct scmi_shared_mem __iomem *
+get_channel_shm(struct scmi_optee_channel *chan, struct scmi_xfer *xfer)
 {
        if (!chan)
                return NULL;
@@ -419,7 +419,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo,
                                   struct scmi_xfer *xfer)
 {
        struct scmi_optee_channel *channel = cinfo->transport_info;
-       struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer);
+       struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer);
        int ret;
 
        mutex_lock(&channel->mu);
@@ -436,7 +436,7 @@ static void scmi_optee_fetch_response(struct scmi_chan_info *cinfo,
                                      struct scmi_xfer *xfer)
 {
        struct scmi_optee_channel *channel = cinfo->transport_info;
-       struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer);
+       struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer);
 
        shmem_fetch_response(shmem, xfer);
 }
index e48108e694f8dcb08f5e91fcb2d89de9a78946a7..7dad6f57d97042cc1a13b48a58c58e3938465e0f 100644 (file)
@@ -955,8 +955,7 @@ static int cs_dsp_create_control(struct cs_dsp *dsp,
        ctl->alg_region = *alg_region;
        if (subname && dsp->fw_ver >= 2) {
                ctl->subname_len = subname_len;
-               ctl->subname = kmemdup(subname,
-                                      strlen(subname) + 1, GFP_KERNEL);
+               ctl->subname = kasprintf(GFP_KERNEL, "%.*s", subname_len, subname);
                if (!ctl->subname) {
                        ret = -ENOMEM;
                        goto err_ctl;
index 2c3dac5ecb36d577a7f6b0a5c2ac9fb8bbd09a37..4720ba98cec312e775c8bcb298855fd60ccb38c9 100644 (file)
@@ -91,6 +91,18 @@ config EFI_SOFT_RESERVE
 
          If unsure, say Y.
 
+config EFI_DXE_MEM_ATTRIBUTES
+       bool "Adjust memory attributes in EFISTUB"
+       depends on EFI && EFI_STUB && X86
+       default y
+       help
+         UEFI specification does not guarantee all memory to be
+         accessible for both write and execute as the kernel expects
+         it to be.
+         Use DXE services to check and alter memory protection
+         attributes during boot via EFISTUB to ensure that memory
+         ranges used by the kernel are writable and executable.
+
 config EFI_PARAMS_FROM_FDT
        bool
        help
@@ -284,3 +296,34 @@ config EFI_CUSTOM_SSDT_OVERLAYS
 
          See Documentation/admin-guide/acpi/ssdt-overlays.rst for more
          information.
+
+config EFI_DISABLE_RUNTIME
+       bool "Disable EFI runtime services support by default"
+       default y if PREEMPT_RT
+       help
+         Allow to disable the EFI runtime services support by default. This can
+         already be achieved by using the efi=noruntime option, but it could be
+         useful to have this default without any kernel command line parameter.
+
+         The EFI runtime services are disabled by default when PREEMPT_RT is
+         enabled, because measurements have shown that some EFI functions calls
+         might take too much time to complete, causing large latencies which is
+         an issue for Real-Time kernels.
+
+         This default can be overridden by using the efi=runtime option.
+
+config EFI_COCO_SECRET
+       bool "EFI Confidential Computing Secret Area Support"
+       depends on EFI
+       help
+         Confidential Computing platforms (such as AMD SEV) allow the
+         Guest Owner to securely inject secrets during guest VM launch.
+         The secrets are placed in a designated EFI reserved memory area.
+
+         In order to use the secrets in the kernel, the location of the secret
+         area (as published in the EFI config table) must be kept.
+
+         If you say Y here, the address of the EFI secret area will be kept
+         for usage inside the kernel.  This will allow the
+         virt/coco/efi_secret module to access the secrets, which in turn
+         allows userspace programs to access the injected secrets.
index 6ec8edec63296b435389531bbc567d8703125201..e4e5ea7ce910a53642a4f8cb9f9e9fe11fca96d8 100644 (file)
@@ -211,7 +211,33 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
+const char *cper_mem_err_status_str(u64 status)
+{
+       switch ((status >> 8) & 0xff) {
+       case  1:        return "Error detected internal to the component";
+       case  4:        return "Storage error in DRAM memory";
+       case  5:        return "Storage error in TLB";
+       case  6:        return "Storage error in cache";
+       case  7:        return "Error in one or more functional units";
+       case  8:        return "Component failed self test";
+       case  9:        return "Overflow or undervalue of internal queue";
+       case 16:        return "Error detected in the bus";
+       case 17:        return "Virtual address not found on IO-TLB or IO-PDIR";
+       case 18:        return "Improper access error";
+       case 19:        return "Access to a memory address which is not mapped to any component";
+       case 20:        return "Loss of Lockstep";
+       case 21:        return "Response not associated with a request";
+       case 22:        return "Bus parity error - must also set the A, C, or D Bits";
+       case 23:        return "Detection of a protocol error";
+       case 24:        return "Detection of a PATH_ERROR";
+       case 25:        return "Bus operation timeout";
+       case 26:        return "A read was issued to data that has been poisoned";
+       default:        return "Reserved";
+       }
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_status_str);
+
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
        u32 len, n;
 
@@ -221,51 +247,51 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
        n = 0;
        len = CPER_REC_LEN;
        if (mem->validation_bits & CPER_MEM_VALID_NODE)
-               n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
+               n += scnprintf(msg + n, len - n, "node:%d ", mem->node);
        if (mem->validation_bits & CPER_MEM_VALID_CARD)
-               n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
+               n += scnprintf(msg + n, len - n, "card:%d ", mem->card);
        if (mem->validation_bits & CPER_MEM_VALID_MODULE)
-               n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
+               n += scnprintf(msg + n, len - n, "module:%d ", mem->module);
        if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-               n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
+               n += scnprintf(msg + n, len - n, "rank:%d ", mem->rank);
        if (mem->validation_bits & CPER_MEM_VALID_BANK)
-               n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+               n += scnprintf(msg + n, len - n, "bank:%d ", mem->bank);
        if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
-               n += scnprintf(msg + n, len - n, "bank_group: %d ",
+               n += scnprintf(msg + n, len - n, "bank_group:%d ",
                               mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
        if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
-               n += scnprintf(msg + n, len - n, "bank_address: %d ",
+               n += scnprintf(msg + n, len - n, "bank_address:%d ",
                               mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
-               n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
+               n += scnprintf(msg + n, len - n, "device:%d ", mem->device);
        if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
                u32 row = mem->row;
 
                row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
-               n += scnprintf(msg + n, len - n, "row: %d ", row);
+               n += scnprintf(msg + n, len - n, "row:%d ", row);
        }
        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
-               n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
+               n += scnprintf(msg + n, len - n, "column:%d ", mem->column);
        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-               n += scnprintf(msg + n, len - n, "bit_position: %d ",
+               n += scnprintf(msg + n, len - n, "bit_position:%d ",
                               mem->bit_pos);
        if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-               n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "requestor_id:0x%016llx ",
                               mem->requestor_id);
        if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-               n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "responder_id:0x%016llx ",
                               mem->responder_id);
        if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
-               n += scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+               n += scnprintf(msg + n, len - n, "target_id:0x%016llx ",
                               mem->target_id);
        if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
-               n += scnprintf(msg + n, len - n, "chip_id: %d ",
+               n += scnprintf(msg + n, len - n, "chip_id:%d ",
                               mem->extended >> CPER_MEM_CHIP_ID_SHIFT);
 
        return n;
 }
 
-static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
        u32 len, n;
        const char *bank = NULL, *device = NULL;
@@ -334,7 +360,9 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
                return;
        }
        if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
-               printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+               printk("%s error_status: %s (0x%016llx)\n",
+                      pfx, cper_mem_err_status_str(mem->error_status),
+                      mem->error_status);
        if (mem->validation_bits & CPER_MEM_VALID_PA)
                printk("%s""physical_address: 0x%016llx\n",
                       pfx, mem->physical_addr);
index 5502e176d51bee14deba8d37bc240244d3e9878b..860534bcfdac2aa989f5721ec045740e94748af7 100644 (file)
@@ -46,6 +46,9 @@ struct efi __read_mostly efi = {
 #ifdef CONFIG_LOAD_UEFI_KEYS
        .mokvar_table           = EFI_INVALID_TABLE_ADDR,
 #endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       .coco_secret            = EFI_INVALID_TABLE_ADDR,
+#endif
 };
 EXPORT_SYMBOL(efi);
 
@@ -66,7 +69,7 @@ struct mm_struct efi_mm = {
 
 struct workqueue_struct *efi_rts_wq;
 
-static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
+static bool disable_runtime = IS_ENABLED(CONFIG_EFI_DISABLE_RUNTIME);
 static int __init setup_noefi(char *arg)
 {
        disable_runtime = true;
@@ -422,6 +425,11 @@ static int __init efisubsys_init(void)
        if (efi_enabled(EFI_DBG) && efi_enabled(EFI_PRESERVE_BS_REGIONS))
                efi_debugfs_init();
 
+#ifdef CONFIG_EFI_COCO_SECRET
+       if (efi.coco_secret != EFI_INVALID_TABLE_ADDR)
+               platform_device_register_simple("efi_secret", 0, NULL, 0);
+#endif
+
        return 0;
 
 err_remove_group:
@@ -528,6 +536,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
 #endif
 #ifdef CONFIG_LOAD_UEFI_KEYS
        {LINUX_EFI_MOK_VARIABLE_TABLE_GUID,     &efi.mokvar_table,      "MOKvar"        },
+#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+       {LINUX_EFI_COCO_SECRET_AREA_GUID,       &efi.coco_secret,       "CocoSecret"    },
 #endif
        {},
 };
index 4b5b2403b3a07759269e7a1cacf8ceb1ecd49e8b..0131e3aaa6055317993267ca2d354c09cc95f310 100644 (file)
@@ -117,7 +117,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        const int slack = TEXT_OFFSET - 5 * PAGE_SIZE;
        int alloc_size = MAX_UNCOMP_KERNEL_SIZE + EFI_PHYS_ALIGN;
index 9cc556013d085991a4825643de486547069e35e2..577173ee1f83d4eebca0100e2941cd47e3ae34b2 100644 (file)
@@ -83,7 +83,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        efi_status_t status;
        unsigned long kernel_size, kernel_memsize = 0;
@@ -100,7 +101,15 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
        u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-               if (!efi_nokaslr) {
+               efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID;
+               void *p;
+
+               if (efi_nokaslr) {
+                       efi_info("KASLR disabled on kernel command line\n");
+               } else if (efi_bs_call(handle_protocol, image_handle,
+                                      &li_fixed_proto, &p) == EFI_SUCCESS) {
+                       efi_info("Image placement fixed by loader\n");
+               } else {
                        status = efi_get_random_bytes(sizeof(phys_seed),
                                                      (u8 *)&phys_seed);
                        if (status == EFI_NOT_FOUND) {
@@ -111,8 +120,6 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                        status);
                                efi_nokaslr = true;
                        }
-               } else {
-                       efi_info("KASLR disabled on kernel command line\n");
                }
        }
 
index da93864d7abcd104495e653b7d3079f31c1b7ed0..f515394cce6e23295e93f018a57b4f1aa4c59a37 100644 (file)
@@ -198,7 +198,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
        status = handle_kernel_image(&image_addr, &image_size,
                                     &reserve_addr,
                                     &reserve_size,
-                                    image);
+                                    image, handle);
        if (status != EFI_SUCCESS) {
                efi_err("Failed to relocate kernel\n");
                goto fail_free_screeninfo;
index edb77b0621ea3f5da7a2f2a0f2bd066c1925210d..b0ae0a454404b83944ec75465fe0fd890b45f95e 100644 (file)
@@ -36,6 +36,9 @@ extern bool efi_novamap;
 
 extern const efi_system_table_t *efi_system_table;
 
+typedef union efi_dxe_services_table efi_dxe_services_table_t;
+extern const efi_dxe_services_table_t *efi_dxe_table;
+
 efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
                                   efi_system_table_t *sys_table_arg);
 
@@ -44,6 +47,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 #define efi_is_native()                (true)
 #define efi_bs_call(func, ...) efi_system_table->boottime->func(__VA_ARGS__)
 #define efi_rt_call(func, ...) efi_system_table->runtime->func(__VA_ARGS__)
+#define efi_dxe_call(func, ...)        efi_dxe_table->func(__VA_ARGS__)
 #define efi_table_attr(inst, attr)     (inst->attr)
 #define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
 
@@ -329,6 +333,76 @@ union efi_boot_services {
        } mixed_mode;
 };
 
+typedef enum {
+       EfiGcdMemoryTypeNonExistent,
+       EfiGcdMemoryTypeReserved,
+       EfiGcdMemoryTypeSystemMemory,
+       EfiGcdMemoryTypeMemoryMappedIo,
+       EfiGcdMemoryTypePersistent,
+       EfiGcdMemoryTypeMoreReliable,
+       EfiGcdMemoryTypeMaximum
+} efi_gcd_memory_type_t;
+
+typedef struct {
+       efi_physical_addr_t base_address;
+       u64 length;
+       u64 capabilities;
+       u64 attributes;
+       efi_gcd_memory_type_t gcd_memory_type;
+       void *image_handle;
+       void *device_handle;
+} efi_gcd_memory_space_desc_t;
+
+/*
+ * EFI DXE Services table
+ */
+union efi_dxe_services_table {
+       struct {
+               efi_table_hdr_t hdr;
+               void *add_memory_space;
+               void *allocate_memory_space;
+               void *free_memory_space;
+               void *remove_memory_space;
+               efi_status_t (__efiapi *get_memory_space_descriptor)(efi_physical_addr_t,
+                                                                    efi_gcd_memory_space_desc_t *);
+               efi_status_t (__efiapi *set_memory_space_attributes)(efi_physical_addr_t,
+                                                                    u64, u64);
+               void *get_memory_space_map;
+               void *add_io_space;
+               void *allocate_io_space;
+               void *free_io_space;
+               void *remove_io_space;
+               void *get_io_space_descriptor;
+               void *get_io_space_map;
+               void *dispatch;
+               void *schedule;
+               void *trust;
+               void *process_firmware_volume;
+               void *set_memory_space_capabilities;
+       };
+       struct {
+               efi_table_hdr_t hdr;
+               u32 add_memory_space;
+               u32 allocate_memory_space;
+               u32 free_memory_space;
+               u32 remove_memory_space;
+               u32 get_memory_space_descriptor;
+               u32 set_memory_space_attributes;
+               u32 get_memory_space_map;
+               u32 add_io_space;
+               u32 allocate_io_space;
+               u32 free_io_space;
+               u32 remove_io_space;
+               u32 get_io_space_descriptor;
+               u32 get_io_space_map;
+               u32 dispatch;
+               u32 schedule;
+               u32 trust;
+               u32 process_firmware_volume;
+               u32 set_memory_space_capabilities;
+       } mixed_mode;
+};
+
 typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
 
 union efi_uga_draw_protocol {
@@ -720,6 +794,13 @@ union efi_tcg2_protocol {
        } mixed_mode;
 };
 
+struct riscv_efi_boot_protocol {
+       u64 revision;
+
+       efi_status_t (__efiapi *get_boot_hartid)(struct riscv_efi_boot_protocol *,
+                                                unsigned long *boot_hartid);
+};
+
 typedef union efi_load_file_protocol efi_load_file_protocol_t;
 typedef union efi_load_file_protocol efi_load_file2_protocol_t;
 
@@ -865,7 +946,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image);
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle);
 
 asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint,
                                            unsigned long fdt_addr,
index 724155b9e10dcf84a44a836efed036574daaa728..715f374791542bb477bdbb9e613862c6c5b861c7 100644 (file)
@@ -56,6 +56,7 @@ efi_status_t efi_random_alloc(unsigned long size,
                              unsigned long random_seed)
 {
        unsigned long map_size, desc_size, total_slots = 0, target_slot;
+       unsigned long total_mirrored_slots = 0;
        unsigned long buff_size;
        efi_status_t status;
        efi_memory_desc_t *memory_map;
@@ -86,8 +87,14 @@ efi_status_t efi_random_alloc(unsigned long size,
                slots = get_entry_num_slots(md, size, ilog2(align));
                MD_NUM_SLOTS(md) = slots;
                total_slots += slots;
+               if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
+                       total_mirrored_slots += slots;
        }
 
+       /* consider only mirrored slots for randomization if any exist */
+       if (total_mirrored_slots > 0)
+               total_slots = total_mirrored_slots;
+
        /* find a random number between 0 and total_slots */
        target_slot = (total_slots * (u64)(random_seed & U32_MAX)) >> 32;
 
@@ -107,6 +114,10 @@ efi_status_t efi_random_alloc(unsigned long size,
                efi_physical_addr_t target;
                unsigned long pages;
 
+               if (total_mirrored_slots > 0 &&
+                   !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+                       continue;
+
                if (target_slot >= MD_NUM_SLOTS(md)) {
                        target_slot -= MD_NUM_SLOTS(md);
                        continue;
index 9c460843442f5ad56a8865a19088d7a2cb3dbcbd..9e85e58d1f27ff45c3a8c37599bd5ffe648e1a80 100644 (file)
@@ -21,9 +21,9 @@
 #define MIN_KIMG_ALIGN         SZ_4M
 #endif
 
-typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+typedef void __noreturn (*jump_kernel_func)(unsigned long, unsigned long);
 
-static u32 hartid;
+static unsigned long hartid;
 
 static int get_boot_hartid_from_fdt(void)
 {
@@ -47,14 +47,31 @@ static int get_boot_hartid_from_fdt(void)
        return 0;
 }
 
+static efi_status_t get_boot_hartid_from_efi(void)
+{
+       efi_guid_t boot_protocol_guid = RISCV_EFI_BOOT_PROTOCOL_GUID;
+       struct riscv_efi_boot_protocol *boot_protocol;
+       efi_status_t status;
+
+       status = efi_bs_call(locate_protocol, &boot_protocol_guid, NULL,
+                            (void **)&boot_protocol);
+       if (status != EFI_SUCCESS)
+               return status;
+       return efi_call_proto(boot_protocol, get_boot_hartid, &hartid);
+}
+
 efi_status_t check_platform_features(void)
 {
+       efi_status_t status;
        int ret;
 
-       ret = get_boot_hartid_from_fdt();
-       if (ret) {
-               efi_err("/chosen/boot-hartid missing or invalid!\n");
-               return EFI_UNSUPPORTED;
+       status = get_boot_hartid_from_efi();
+       if (status != EFI_SUCCESS) {
+               ret = get_boot_hartid_from_fdt();
+               if (ret) {
+                       efi_err("Failed to get boot hartid!\n");
+                       return EFI_UNSUPPORTED;
+               }
        }
        return EFI_SUCCESS;
 }
@@ -80,7 +97,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
                                 unsigned long *reserve_size,
-                                efi_loaded_image_t *image)
+                                efi_loaded_image_t *image,
+                                efi_handle_t image_handle)
 {
        unsigned long kernel_size = 0;
        unsigned long preferred_addr;
index 01ddd4502e28a116f553ffb5b3383e76def9fded..b14e88ccefcab5cedeca8ad5d1ce68be98e7d4b0 100644 (file)
@@ -22,6 +22,7 @@
 #define MAXMEM_X86_64_4LEVEL (1ull << 46)
 
 const efi_system_table_t *efi_system_table;
+const efi_dxe_services_table_t *efi_dxe_table;
 extern u32 image_offset;
 static efi_loaded_image_t *image = NULL;
 
@@ -211,9 +212,110 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
        }
 }
 
+static void
+adjust_memory_range_protection(unsigned long start, unsigned long size)
+{
+       efi_status_t status;
+       efi_gcd_memory_space_desc_t desc;
+       unsigned long end, next;
+       unsigned long rounded_start, rounded_end;
+       unsigned long unprotect_start, unprotect_size;
+       int has_system_memory = 0;
+
+       if (efi_dxe_table == NULL)
+               return;
+
+       rounded_start = rounddown(start, EFI_PAGE_SIZE);
+       rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+
+       /*
+        * Don't modify memory region attributes, they are
+        * already suitable, to lower the possibility to
+        * encounter firmware bugs.
+        */
+
+       for (end = start + size; start < end; start = next) {
+
+               status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
+
+               if (status != EFI_SUCCESS)
+                       return;
+
+               next = desc.base_address + desc.length;
+
+               /*
+                * Only system memory is suitable for trampoline/kernel image placement,
+                * so only this type of memory needs its attributes to be modified.
+                */
+
+               if (desc.gcd_memory_type != EfiGcdMemoryTypeSystemMemory ||
+                   (desc.attributes & (EFI_MEMORY_RO | EFI_MEMORY_XP)) == 0)
+                       continue;
+
+               unprotect_start = max(rounded_start, (unsigned long)desc.base_address);
+               unprotect_size = min(rounded_end, next) - unprotect_start;
+
+               status = efi_dxe_call(set_memory_space_attributes,
+                                     unprotect_start, unprotect_size,
+                                     EFI_MEMORY_WB);
+
+               if (status != EFI_SUCCESS) {
+                       efi_warn("Unable to unprotect memory range [%08lx,%08lx]: %d\n",
+                                unprotect_start,
+                                unprotect_start + unprotect_size,
+                                (int)status);
+               }
+       }
+}
+
+/*
+ * Trampoline takes 2 pages and can be loaded in first megabyte of memory
+ * with its end placed between 128k and 640k where BIOS might start.
+ * (see arch/x86/boot/compressed/pgtable_64.c)
+ *
+ * We cannot find exact trampoline placement since memory map
+ * can be modified by UEFI, and it can alter the computed address.
+ */
+
+#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
+#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
+
+void startup_32(struct boot_params *boot_params);
+
+static void
+setup_memory_protection(unsigned long image_base, unsigned long image_size)
+{
+       /*
+        * Allow execution of possible trampoline used
+        * for switching between 4- and 5-level page tables
+        * and relocated kernel image.
+        */
+
+       adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
+                                      TRAMPOLINE_PLACEMENT_SIZE);
+
+#ifdef CONFIG_64BIT
+       if (image_base != (unsigned long)startup_32)
+               adjust_memory_range_protection(image_base, image_size);
+#else
+       /*
+        * Clear protection flags on a whole range of possible
+        * addresses used for KASLR. We don't need to do that
+        * on x86_64, since KASLR/extraction is performed after
+        * dedicated identity page tables are built and we only
+        * need to remove possible protection on relocated image
+        * itself disregarding further relocations.
+        */
+       adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+                                      KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+#endif
+}
+
 static const efi_char16_t apple[] = L"Apple";
 
-static void setup_quirks(struct boot_params *boot_params)
+static void setup_quirks(struct boot_params *boot_params,
+                        unsigned long image_base,
+                        unsigned long image_size)
 {
        efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
                efi_table_attr(efi_system_table, fw_vendor);
@@ -222,6 +324,9 @@ static void setup_quirks(struct boot_params *boot_params)
                if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
                        retrieve_apple_device_properties(boot_params);
        }
+
+       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
+               setup_memory_protection(image_base, image_size);
 }
 
 /*
@@ -341,8 +446,6 @@ static void __noreturn efi_exit(efi_handle_t handle, efi_status_t status)
                asm("hlt");
 }
 
-void startup_32(struct boot_params *boot_params);
-
 void __noreturn efi_stub_entry(efi_handle_t handle,
                               efi_system_table_t *sys_table_arg,
                               struct boot_params *boot_params);
@@ -677,11 +780,17 @@ unsigned long efi_main(efi_handle_t handle,
        efi_status_t status;
 
        efi_system_table = sys_table_arg;
-
        /* Check if we were booted by the EFI firmware */
        if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
                efi_exit(handle, EFI_INVALID_PARAMETER);
 
+       efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+       if (efi_dxe_table &&
+           efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+               efi_warn("Ignoring DXE services table: invalid signature\n");
+               efi_dxe_table = NULL;
+       }
+
        /*
         * If the kernel isn't already loaded at a suitable address,
         * relocate it.
@@ -791,7 +900,7 @@ unsigned long efi_main(efi_handle_t handle,
 
        setup_efi_pci(boot_params);
 
-       setup_quirks(boot_params);
+       setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
 
        status = exit_boot(boot_params, handle);
        if (status != EFI_SUCCESS) {
index b8b1473a5b1e9670c0d19027965ac89e440c6c85..f87ff3fa8a531d27ac537d7f1a3eb4dd894dcad2 100644 (file)
@@ -178,3 +178,22 @@ discussed but the idea is to provide a low-level access point
 for debugging and hacking and to expose all lines without the
 need of any exporting. Also provide ample ammunition to shoot
 oneself in the foot, because this is debugfs after all.
+
+
+Moving over to immutable irq_chip structures
+
+Most of the gpio chips implementing interrupt support rely on gpiolib
+intercepting some of the irq_chip callbacks, preventing the structures
+from being made read-only and forcing duplication of structures that
+should otherwise be unique.
+
+The solution is to call into the gpiolib code when needed (resource
+management, enable/disable or unmask/mask callbacks), and to let the
+core code know about that by exposing a flag (IRQCHIP_IMMUTABLE) in
+the irq_chip structure. The irq_chip structure can then be made unique
+and const.
+
+A small number of drivers have been converted (pl061, tegra186, msm,
+amd, apple), and can be used as examples of how to proceed with this
+conversion. Note that drivers using the generic irqchip framework
+cannot be converted yet, but watch this space!
index 4c1f9e1091b7f05d45c431406af083a7b3d1564e..2db19cd640a43a4265975e06466d0556dc8ebcaa 100644 (file)
@@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        unsigned long flags;
        unsigned int on, off;
 
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
        val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle;
        do_div(val, NSEC_PER_SEC);
        if (val > UINT_MAX + 1ULL)
@@ -871,13 +874,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
        mvpwm->chip.dev = dev;
        mvpwm->chip.ops = &mvebu_pwm_ops;
        mvpwm->chip.npwm = mvchip->chip.ngpio;
-       /*
-        * There may already be some PWM allocated, so we can't force
-        * mvpwm->chip.base to a fixed point like mvchip->chip.base.
-        * So, we let pwmchip_add() do the numbering and take the next free
-        * region.
-        */
-       mvpwm->chip.base = -1;
 
        spin_lock_init(&mvpwm->lock);
 
index d2fe76f3f34fd4930c62bc1b5213422bb04386b9..8726921a11294505157edac3764d2177b513a050 100644 (file)
@@ -762,11 +762,11 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
        bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
        bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
 
+       bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
+
        if (bitmap_empty(trigger, gc->ngpio))
                return false;
 
-       bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
-
        bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
        bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
        bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
index 4ecab700f23f6cc43c79a0bb05a0f1844e499faa..6464056cb6ae523a232e4c1f433110f5ebadaf1c 100644 (file)
@@ -52,7 +52,6 @@ struct pl061 {
 
        void __iomem            *base;
        struct gpio_chip        gc;
-       struct irq_chip         irq_chip;
        int                     parent_irq;
 
 #ifdef CONFIG_PM
@@ -241,6 +240,8 @@ static void pl061_irq_mask(struct irq_data *d)
        gpioie = readb(pl061->base + GPIOIE) & ~mask;
        writeb(gpioie, pl061->base + GPIOIE);
        raw_spin_unlock(&pl061->lock);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 static void pl061_irq_unmask(struct irq_data *d)
@@ -250,6 +251,8 @@ static void pl061_irq_unmask(struct irq_data *d)
        u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR);
        u8 gpioie;
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        raw_spin_lock(&pl061->lock);
        gpioie = readb(pl061->base + GPIOIE) | mask;
        writeb(gpioie, pl061->base + GPIOIE);
@@ -283,6 +286,24 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned int state)
        return irq_set_irq_wake(pl061->parent_irq, state);
 }
 
+static void pl061_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+       seq_printf(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip pl061_irq_chip = {
+       .irq_ack                = pl061_irq_ack,
+       .irq_mask               = pl061_irq_mask,
+       .irq_unmask             = pl061_irq_unmask,
+       .irq_set_type           = pl061_irq_type,
+       .irq_set_wake           = pl061_irq_set_wake,
+       .irq_print_chip         = pl061_irq_print_chip,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
 static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
 {
        struct device *dev = &adev->dev;
@@ -315,13 +336,6 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
        /*
         * irq_chip support
         */
-       pl061->irq_chip.name = dev_name(dev);
-       pl061->irq_chip.irq_ack = pl061_irq_ack;
-       pl061->irq_chip.irq_mask = pl061_irq_mask;
-       pl061->irq_chip.irq_unmask = pl061_irq_unmask;
-       pl061->irq_chip.irq_set_type = pl061_irq_type;
-       pl061->irq_chip.irq_set_wake = pl061_irq_set_wake;
-
        writeb(0, pl061->base + GPIOIE); /* disable irqs */
        irq = adev->irq[0];
        if (!irq)
@@ -329,7 +343,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
        pl061->parent_irq = irq;
 
        girq = &pl061->gc.irq;
-       girq->chip = &pl061->irq_chip;
+       gpio_irq_chip_set_chip(girq, &pl061_irq_chip);
        girq->parent_handler = pl061_irq_handler;
        girq->num_parents = 1;
        girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
index 8e5d87984a48957d9a7c4856511a490e0978f45a..41c31b10ae8488c5a897bdced43f99afa60bf985 100644 (file)
@@ -134,7 +134,7 @@ static int gpio_sim_get_multiple(struct gpio_chip *gc,
        struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
        mutex_lock(&chip->lock);
-       bitmap_copy(bits, chip->value_map, gc->ngpio);
+       bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio);
        mutex_unlock(&chip->lock);
 
        return 0;
@@ -146,7 +146,7 @@ static void gpio_sim_set_multiple(struct gpio_chip *gc,
        struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
        mutex_lock(&chip->lock);
-       bitmap_copy(chip->value_map, bits, gc->ngpio);
+       bitmap_replace(chip->value_map, chip->value_map, bits, mask, gc->ngpio);
        mutex_unlock(&chip->lock);
 }
 
index 031fe105b58ed34a2d54f381eef189093fcfc078..84c4f1e9fb0c580a24c87ff320c6f5ccdceb3326 100644 (file)
@@ -80,7 +80,6 @@ struct tegra_gpio_soc {
 
 struct tegra_gpio {
        struct gpio_chip gpio;
-       struct irq_chip intc;
        unsigned int num_irq;
        unsigned int *irq;
 
@@ -372,6 +371,8 @@ static void tegra186_irq_mask(struct irq_data *data)
        value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
        value &= ~TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT;
        writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG);
+
+       gpiochip_disable_irq(&gpio->gpio, data->hwirq);
 }
 
 static void tegra186_irq_unmask(struct irq_data *data)
@@ -385,6 +386,8 @@ static void tegra186_irq_unmask(struct irq_data *data)
        if (WARN_ON(base == NULL))
                return;
 
+       gpiochip_enable_irq(&gpio->gpio, data->hwirq);
+
        value = readl(base + TEGRA186_GPIO_ENABLE_CONFIG);
        value |= TEGRA186_GPIO_ENABLE_CONFIG_INTERRUPT;
        writel(value, base + TEGRA186_GPIO_ENABLE_CONFIG);
@@ -456,6 +459,24 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static void tegra186_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+       seq_printf(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip tegra186_gpio_irq_chip = {
+       .irq_ack                = tegra186_irq_ack,
+       .irq_mask               = tegra186_irq_mask,
+       .irq_unmask             = tegra186_irq_unmask,
+       .irq_set_type           = tegra186_irq_set_type,
+       .irq_set_wake           = tegra186_irq_set_wake,
+       .irq_print_chip         = tegra186_irq_print_chip,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
 static void tegra186_gpio_irq(struct irq_desc *desc)
 {
        struct tegra_gpio *gpio = irq_desc_get_handler_data(desc);
@@ -760,15 +781,8 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
        gpio->gpio.of_xlate = tegra186_gpio_of_xlate;
 #endif /* CONFIG_OF_GPIO */
 
-       gpio->intc.name = dev_name(&pdev->dev);
-       gpio->intc.irq_ack = tegra186_irq_ack;
-       gpio->intc.irq_mask = tegra186_irq_mask;
-       gpio->intc.irq_unmask = tegra186_irq_unmask;
-       gpio->intc.irq_set_type = tegra186_irq_set_type;
-       gpio->intc.irq_set_wake = tegra186_irq_set_wake;
-
        irq = &gpio->gpio.irq;
-       irq->chip = &gpio->intc;
+       gpio_irq_chip_set_chip(irq, &tegra186_gpio_irq_chip);
        irq->fwnode = of_node_to_fwnode(pdev->dev.of_node);
        irq->child_to_parent_hwirq = tegra186_gpio_child_to_parent_hwirq;
        irq->populate_parent_alloc_arg = tegra186_gpio_populate_parent_fwspec;
index 20780c35da1b4150fe0c94ad3901fcf1f8b0a8d7..23cddb265a0dc3b6d5b6ed9108496ab6449dc209 100644 (file)
@@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
 {
        struct vf610_gpio_port *port = gpiochip_get_data(chip);
        unsigned long mask = BIT(gpio);
+       u32 val;
 
-       if (port->sdata && port->sdata->have_paddr)
-               vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR);
+       if (port->sdata && port->sdata->have_paddr) {
+               val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR);
+               val |= mask;
+               vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
+       }
 
        vf610_gpio_set(chip, gpio, value);
 
index 47455810bdb91ad35eaf50d2087f35f009732792..e6534ea1eaa7a21968d1793c172a0f54daad87ea 100644 (file)
@@ -130,7 +130,6 @@ static int visconti_gpio_probe(struct platform_device *pdev)
        struct gpio_irq_chip *girq;
        struct irq_domain *parent;
        struct device_node *irq_parent;
-       struct fwnode_handle *fwnode;
        int ret;
 
        priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -150,14 +149,12 @@ static int visconti_gpio_probe(struct platform_device *pdev)
        }
 
        parent = irq_find_host(irq_parent);
+       of_node_put(irq_parent);
        if (!parent) {
                dev_err(dev, "No IRQ parent domain\n");
                return -ENODEV;
        }
 
-       fwnode = of_node_to_fwnode(irq_parent);
-       of_node_put(irq_parent);
-
        ret = bgpio_init(&priv->gpio_chip, dev, 4,
                         priv->base + GPIO_IDATA,
                         priv->base + GPIO_OSET,
@@ -180,7 +177,7 @@ static int visconti_gpio_probe(struct platform_device *pdev)
 
        girq = &priv->gpio_chip.irq;
        girq->chip = irq_chip;
-       girq->fwnode = fwnode;
+       girq->fwnode = of_node_to_fwnode(dev->of_node);
        girq->parent_domain = parent;
        girq->child_to_parent_hwirq = visconti_gpio_child_to_parent_hwirq;
        girq->populate_parent_alloc_arg = visconti_gpio_populate_parent_fwspec;
index a5495ad31c9ce6ad8b523fadeba6b83a3e9403a1..c2523ac26facdd4a0dc69bb0a6fa6fbd2186c893 100644 (file)
@@ -108,7 +108,7 @@ static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
  * controller does not have GPIO chip registered at the moment. This is to
  * support probe deferral.
  */
-static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
+static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin)
 {
        struct gpio_chip *chip;
        acpi_handle handle;
@@ -136,7 +136,7 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
  * as it is intended for use outside of the GPIO layer (in a similar fashion to
  * gpiod_get_index() for example) it also holds a reference to the GPIO device.
  */
-struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label)
+struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label)
 {
        struct gpio_desc *gpio;
        int ret;
@@ -317,11 +317,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
        return desc;
 }
 
-static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in)
+static bool acpi_gpio_in_ignore_list(const char *controller_in, unsigned int pin_in)
 {
        const char *controller, *pin_str;
-       int len, pin;
+       unsigned int pin;
        char *endp;
+       int len;
 
        controller = ignore_wake;
        while (controller) {
@@ -354,13 +355,13 @@ err:
 static bool acpi_gpio_irq_is_wake(struct device *parent,
                                  struct acpi_resource_gpio *agpio)
 {
-       int pin = agpio->pin_table[0];
+       unsigned int pin = agpio->pin_table[0];
 
        if (agpio->wake_capable != ACPI_WAKE_CAPABLE)
                return false;
 
        if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) {
-               dev_info(parent, "Ignoring wakeup on pin %d\n", pin);
+               dev_info(parent, "Ignoring wakeup on pin %u\n", pin);
                return false;
        }
 
@@ -378,7 +379,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
        struct acpi_gpio_event *event;
        irq_handler_t handler = NULL;
        struct gpio_desc *desc;
-       int ret, pin, irq;
+       unsigned int pin;
+       int ret, irq;
 
        if (!acpi_gpio_get_irq_resource(ares, &agpio))
                return AE_OK;
@@ -387,8 +389,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
        pin = agpio->pin_table[0];
 
        if (pin <= 255) {
-               char ev_name[5];
-               sprintf(ev_name, "_%c%02hhX",
+               char ev_name[8];
+               sprintf(ev_name, "_%c%02X",
                        agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L',
                        pin);
                if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle)))
@@ -1098,7 +1100,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 
        length = min_t(u16, agpio->pin_table_length, pin_index + bits);
        for (i = pin_index; i < length; ++i) {
-               int pin = agpio->pin_table[i];
+               unsigned int pin = agpio->pin_table[i];
                struct acpi_gpio_connection *conn;
                struct gpio_desc *desc;
                bool found;
index ae1ce319cd78e0ba3925002a4dc154296abca85d..7e5e51d49d09e0879fec1a01254cdf853aaa04ad 100644 (file)
@@ -910,7 +910,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip)
                                           i, &start);
                of_property_read_u32_index(np, "gpio-reserved-ranges",
                                           i + 1, &count);
-               if (start >= chip->ngpio || start + count >= chip->ngpio)
+               if (start >= chip->ngpio || start + count > chip->ngpio)
                        continue;
 
                bitmap_clear(chip->valid_mask, start, count);
index e59884cc12a718f16df09981b9fda2fffd81e9c4..690035124faa3522fcee7a9d32ffa50166bcc767 100644 (file)
@@ -1404,6 +1404,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
 {
        struct irq_domain *domain = gc->irq.domain;
 
+#ifdef CONFIG_GPIOLIB_IRQCHIP
+       /*
+        * Avoid race condition with other code, which tries to lookup
+        * an IRQ before the irqchip has been properly registered,
+        * i.e. while gpiochip is still being brought up.
+        */
+       if (!gc->irq.initialized)
+               return -EPROBE_DEFER;
+#endif
+
        if (!gpiochip_irqchip_irq_valid(gc, offset))
                return -ENXIO;
 
@@ -1423,19 +1433,21 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
        return irq_create_mapping(domain, offset);
 }
 
-static int gpiochip_irq_reqres(struct irq_data *d)
+int gpiochip_irq_reqres(struct irq_data *d)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 
        return gpiochip_reqres_irq(gc, d->hwirq);
 }
+EXPORT_SYMBOL(gpiochip_irq_reqres);
 
-static void gpiochip_irq_relres(struct irq_data *d)
+void gpiochip_irq_relres(struct irq_data *d)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 
        gpiochip_relres_irq(gc, d->hwirq);
 }
+EXPORT_SYMBOL(gpiochip_irq_relres);
 
 static void gpiochip_irq_mask(struct irq_data *d)
 {
@@ -1475,6 +1487,11 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc)
 {
        struct irq_chip *irqchip = gc->irq.chip;
 
+       if (irqchip->flags & IRQCHIP_IMMUTABLE)
+               return;
+
+       chip_warn(gc, "not an immutable chip, please consider fixing it!\n");
+
        if (!irqchip->irq_request_resources &&
            !irqchip->irq_release_resources) {
                irqchip->irq_request_resources = gpiochip_irq_reqres;
@@ -1591,6 +1608,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 
        gpiochip_set_irq_hooks(gc);
 
+       /*
+        * Using barrier() here to prevent compiler from reordering
+        * gc->irq.initialized before initialization of above
+        * GPIO chip irq members.
+        */
+       barrier();
+
+       gc->irq.initialized = true;
+
        acpi_gpiochip_request_interrupts(gc);
 
        return 0;
@@ -1633,7 +1659,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
                irq_domain_remove(gc->irq.domain);
        }
 
-       if (irqchip) {
+       if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) {
                if (irqchip->irq_request_resources == gpiochip_irq_reqres) {
                        irqchip->irq_request_resources = NULL;
                        irqchip->irq_release_resources = NULL;
index 5b393622f59205700acf687179841e7e0b0b8f5b..a0f0a17e224fe554aeff766a5dc3dcc5b4b2144d 100644 (file)
 #define CONNECTOR_OBJECT_ID_eDP                   0x14
 #define CONNECTOR_OBJECT_ID_MXM                   0x15
 #define CONNECTOR_OBJECT_ID_LVDS_eDP              0x16
+#define CONNECTOR_OBJECT_ID_USBC                  0x17
 
 /* deleted */
 
index cdf0818088b3de86633cab519473213fc5c6310a..7606e3b6361eaa18ce76b17dbc625845f5d18ba2 100644 (file)
@@ -1342,9 +1342,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
 bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
 #else
 static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
 static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
 #endif
 
index 0e12315fa0cb87c643426bda0fa3a8f09feb3820..98ac53ee6bb55f0d9fdda1ceeeb7c0efc73a0567 100644 (file)
@@ -1045,6 +1045,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
                (pm_suspend_target_state == PM_SUSPEND_MEM);
 }
 
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+       if (adev->flags & AMD_IS_APU)
+               return false;
+       return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+}
+
 /**
  * amdgpu_acpi_is_s0ix_active
  *
index 970b065e9a6b13929ed6f29bf1fc28e413e8b756..d0d0ea565e3df48b3cccd65425cf36968dce009c 100644 (file)
@@ -128,6 +128,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
                goto free_chunk;
        }
 
+       mutex_lock(&p->ctx->lock);
+
        /* skip guilty context job */
        if (atomic_read(&p->ctx->guilty) == 1) {
                ret = -ECANCELED;
@@ -709,6 +711,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
        dma_fence_put(parser->fence);
 
        if (parser->ctx) {
+               mutex_unlock(&parser->ctx->lock);
                amdgpu_ctx_put(parser->ctx);
        }
        if (parser->bo_list)
@@ -1157,6 +1160,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 {
        int i, r;
 
+       /* TODO: Investigate why we still need the context lock */
+       mutex_unlock(&p->ctx->lock);
+
        for (i = 0; i < p->nchunks; ++i) {
                struct amdgpu_cs_chunk *chunk;
 
@@ -1167,32 +1173,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
                        r = amdgpu_cs_process_fence_dep(p, chunk);
                        if (r)
-                               return r;
+                               goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
                        r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
                        if (r)
-                               return r;
+                               goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
                        r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
                        if (r)
-                               return r;
+                               goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
                        r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
                        if (r)
-                               return r;
+                               goto out;
                        break;
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
                        r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
                        if (r)
-                               return r;
+                               goto out;
                        break;
                }
        }
 
-       return 0;
+out:
+       mutex_lock(&p->ctx->lock);
+       return r;
 }
 
 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1368,6 +1376,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                goto out;
 
        r = amdgpu_cs_submit(&parser, cs);
+
 out:
        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 
index 5981c7d9bd48fa616cc66b49a36bc7cc3cbac308..c317078d1afd0d3e7cab7e8c9251e94c6f21f483 100644 (file)
@@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
        kref_init(&ctx->refcount);
        spin_lock_init(&ctx->ring_lock);
+       mutex_init(&ctx->lock);
 
        ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
        ctx->reset_counter_query = ctx->reset_counter;
@@ -295,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 {
        struct amdgpu_device *adev = ctx->adev;
        enum amd_dpm_forced_level level;
+       u32 current_stable_pstate;
        int r;
 
        mutex_lock(&adev->pm.stable_pstate_ctx_lock);
@@ -303,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
                goto done;
        }
 
+       r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+       if (r || (stable_pstate == current_stable_pstate))
+               goto done;
+
        switch (stable_pstate) {
        case AMDGPU_CTX_STABLE_PSTATE_NONE:
                level = AMD_DPM_FORCED_LEVEL_AUTO;
@@ -357,6 +363,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
                drm_dev_exit(idx);
        }
 
+       mutex_destroy(&ctx->lock);
        kfree(ctx);
 }
 
index d0cbfcea90f72abed197e97575d849eb727d2249..142f2f87d44cea617f01825b44d32beee162e5bc 100644 (file)
@@ -49,6 +49,7 @@ struct amdgpu_ctx {
        bool                            preamble_presented;
        int32_t                         init_priority;
        int32_t                         override_priority;
+       struct mutex                    lock;
        atomic_t                        guilty;
        unsigned long                   ras_counter_ce;
        unsigned long                   ras_counter_ue;
index 3987ecb24ef4fd50749c7944b4cec7d8f8346517..49f734137f158a6aecc6f362f9d62e1ef601fde1 100644 (file)
@@ -5733,7 +5733,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
                struct amdgpu_ring *ring)
 {
 #ifdef CONFIG_X86_64
-       if (adev->flags & AMD_IS_APU)
+       if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
                return;
 #endif
        if (adev->gmc.xgmi.connected_to_cpu)
@@ -5749,7 +5749,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
                struct amdgpu_ring *ring)
 {
 #ifdef CONFIG_X86_64
-       if (adev->flags & AMD_IS_APU)
+       if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
                return;
 #endif
        if (adev->gmc.xgmi.connected_to_cpu)
index bb1c025d90019a443d8f53cf596520b08988cf17..46ef57b07c151743d6cee9bab8325bfe75a5834f 100644 (file)
@@ -680,7 +680,7 @@ MODULE_PARM_DESC(sched_policy,
  * Maximum number of processes that HWS can schedule concurrently. The maximum is the
  * number of VMIDs assigned to the HWS, which is also the default.
  */
-int hws_max_conc_proc = 8;
+int hws_max_conc_proc = -1;
 module_param(hws_max_conc_proc, int, 0444);
 MODULE_PARM_DESC(hws_max_conc_proc,
        "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
@@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
 {
        struct drm_device *drm_dev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(drm_dev);
-       int r;
 
        if (amdgpu_acpi_is_s0ix_active(adev))
                adev->in_s0ix = true;
        else
                adev->in_s3 = true;
-       r = amdgpu_device_suspend(drm_dev, true);
-       if (r)
-               return r;
-       if (!adev->in_s0ix)
-               r = amdgpu_asic_reset(adev);
-       return r;
+       return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+       struct drm_device *drm_dev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+       if (amdgpu_acpi_should_gpu_reset(adev))
+               return amdgpu_asic_reset(adev);
+
+       return 0;
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
@@ -2390,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev)
        return amdgpu_device_resume(drm_dev, true);
 }
 
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct drm_device *drm_dev = pci_get_drvdata(pdev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+       if (adev->mode_info.num_crtc) {
+               struct drm_connector *list_connector;
+               struct drm_connector_list_iter iter;
+               int ret = 0;
+
+               /* XXX: Return busy if any displays are connected to avoid
+                * possible display wakeups after runtime resume due to
+                * hotplug events in case any displays were connected while
+                * the GPU was in suspend.  Remove this once that is fixed.
+                */
+               mutex_lock(&drm_dev->mode_config.mutex);
+               drm_connector_list_iter_begin(drm_dev, &iter);
+               drm_for_each_connector_iter(list_connector, &iter) {
+                       if (list_connector->status == connector_status_connected) {
+                               ret = -EBUSY;
+                               break;
+                       }
+               }
+               drm_connector_list_iter_end(&iter);
+               mutex_unlock(&drm_dev->mode_config.mutex);
+
+               if (ret)
+                       return ret;
+
+               if (amdgpu_device_has_dc_support(adev)) {
+                       struct drm_crtc *crtc;
+
+                       drm_for_each_crtc(crtc, drm_dev) {
+                               drm_modeset_lock(&crtc->mutex, NULL);
+                               if (crtc->state->active)
+                                       ret = -EBUSY;
+                               drm_modeset_unlock(&crtc->mutex);
+                               if (ret < 0)
+                                       break;
+                       }
+               } else {
+                       mutex_lock(&drm_dev->mode_config.mutex);
+                       drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+                       drm_connector_list_iter_begin(drm_dev, &iter);
+                       drm_for_each_connector_iter(list_connector, &iter) {
+                               if (list_connector->dpms ==  DRM_MODE_DPMS_ON) {
+                                       ret = -EBUSY;
+                                       break;
+                               }
+                       }
+
+                       drm_connector_list_iter_end(&iter);
+
+                       drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+                       mutex_unlock(&drm_dev->mode_config.mutex);
+               }
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int amdgpu_pmops_runtime_suspend(struct device *dev)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
@@ -2402,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
                return -EBUSY;
        }
 
+       ret = amdgpu_runtime_idle_check_display(dev);
+       if (ret)
+               return ret;
+
        /* wait for all rings to drain before suspending */
        for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
                struct amdgpu_ring *ring = adev->rings[i];
@@ -2511,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
                return -EBUSY;
        }
 
-       if (amdgpu_device_has_dc_support(adev)) {
-               struct drm_crtc *crtc;
-
-               drm_for_each_crtc(crtc, drm_dev) {
-                       drm_modeset_lock(&crtc->mutex, NULL);
-                       if (crtc->state->active)
-                               ret = -EBUSY;
-                       drm_modeset_unlock(&crtc->mutex);
-                       if (ret < 0)
-                               break;
-               }
-
-       } else {
-               struct drm_connector *list_connector;
-               struct drm_connector_list_iter iter;
-
-               mutex_lock(&drm_dev->mode_config.mutex);
-               drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
-
-               drm_connector_list_iter_begin(drm_dev, &iter);
-               drm_for_each_connector_iter(list_connector, &iter) {
-                       if (list_connector->dpms ==  DRM_MODE_DPMS_ON) {
-                               ret = -EBUSY;
-                               break;
-                       }
-               }
-
-               drm_connector_list_iter_end(&iter);
-
-               drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
-               mutex_unlock(&drm_dev->mode_config.mutex);
-       }
-
-       if (ret == -EBUSY)
-               DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
+       ret = amdgpu_runtime_idle_check_display(dev);
 
        pm_runtime_mark_last_busy(dev);
        pm_runtime_autosuspend(dev);
@@ -2575,6 +2615,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
        .prepare = amdgpu_pmops_prepare,
        .complete = amdgpu_pmops_complete,
        .suspend = amdgpu_pmops_suspend,
+       .suspend_noirq = amdgpu_pmops_suspend_noirq,
        .resume = amdgpu_pmops_resume,
        .freeze = amdgpu_pmops_freeze,
        .thaw = amdgpu_pmops_thaw,
index 8fe9399762242185c0d1e6a597beff027507ed6e..28a736c507bb3f84e956203d2c115b8afdc160de 100644 (file)
@@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
                    * adev->gfx.mec.num_pipe_per_mec
                    * adev->gfx.mec.num_queue_per_pipe;
 
-       while (queue_bit-- >= 0) {
+       while (--queue_bit >= 0) {
                if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
                        continue;
 
index ca2cfb65f9763b640d8d90a9bd71d0cebc0bd620..a66a0881a934bb0ff3a8d2e551a262be02df739e 100644 (file)
@@ -561,9 +561,15 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
 
        switch (adev->ip_versions[GC_HWIP][0]) {
        case IP_VERSION(9, 0, 1):
+       case IP_VERSION(9, 3, 0):
        case IP_VERSION(9, 4, 0):
        case IP_VERSION(9, 4, 1):
        case IP_VERSION(9, 4, 2):
+       case IP_VERSION(10, 3, 3):
+       case IP_VERSION(10, 3, 4):
+       case IP_VERSION(10, 3, 5):
+       case IP_VERSION(10, 3, 6):
+       case IP_VERSION(10, 3, 7):
                /*
                 * noretry = 0 will cause kfd page fault tests fail
                 * for some ASICs, so set default to 1 for these ASICs.
index 25731719c627d6cd7f8ae8b85fb093e521596a5a..940752488330f64f8a3cdc267d399843d3158d5a 100644 (file)
@@ -1284,6 +1284,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
  */
 void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
        struct dma_fence *fence = NULL;
        struct amdgpu_bo *abo;
        int r;
@@ -1303,7 +1304,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
                amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
 
        if (bo->resource->mem_type != TTM_PL_VRAM ||
-           !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
+           !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
+           adev->in_suspend || adev->shutdown)
                return;
 
        if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
index 5320bb0883d85f352867289911f67eaf64740926..317d80209e9581bb7d3f767306959b6a199ba763 100644 (file)
@@ -300,8 +300,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
-                    unsigned int ring_size, struct amdgpu_irq_src *irq_src,
-                    unsigned int irq_type, unsigned int prio,
+                    unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+                    unsigned int irq_type, unsigned int hw_prio,
                     atomic_t *sched_score);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
index f99093f2ebc71ce12ee596c97688f33521711a71..a0ee828a4a97804cb7e4863fca2ef9ae339560b2 100644 (file)
@@ -52,7 +52,7 @@
 #define FIRMWARE_ALDEBARAN     "amdgpu/aldebaran_vcn.bin"
 #define FIRMWARE_BEIGE_GOBY    "amdgpu/beige_goby_vcn.bin"
 #define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2     "amdgpu/vcn_3_1_2_vcn.bin"
+#define FIRMWARE_VCN_3_1_2     "amdgpu/vcn_3_1_2.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
 MODULE_FIRMWARE(FIRMWARE_PICASSO);
index e2fde88aaf5e3d1f5aed612ce3c813c4fecc9acb..f06fb7f882e2ec125dd4ef8316e5cf9e4cf382dd 100644 (file)
 #define AMDGPU_VCN_MULTI_QUEUE_FLAG    (1 << 8)
 #define AMDGPU_VCN_SW_RING_FLAG                (1 << 9)
 #define AMDGPU_VCN_FW_LOGGING_FLAG     (1 << 10)
+#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
 
 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER       0x00000001
 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER         0x00000001
@@ -279,6 +280,11 @@ struct amdgpu_fw_shared_fw_logging {
        uint32_t size;
 };
 
+struct amdgpu_fw_shared_smu_interface_info {
+       uint8_t smu_interface_type;
+       uint8_t padding[3];
+};
+
 struct amdgpu_fw_shared {
        uint32_t present_flag_0;
        uint8_t pad[44];
@@ -287,6 +293,7 @@ struct amdgpu_fw_shared {
        struct amdgpu_fw_shared_multi_queue multi_queue;
        struct amdgpu_fw_shared_sw_ring sw_ring;
        struct amdgpu_fw_shared_fw_logging fw_log;
+       struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
 };
 
 struct amdgpu_vcn_fwlog {
index a025f080aa6a63df0b905082c7cf9059e4231d4d..5e3756643da3fb49c21b7bf1e615e49ce9347212 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 
 #include <drm/drm_drv.h>
+#include <xen/xen.h>
 
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
@@ -710,7 +711,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
                adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
 
        if (!reg) {
-               if (is_virtual_machine())       /* passthrough mode exclus sriov mod */
+               /* passthrough mode exclus sriov mod */
+               if (is_virtual_machine() && !xen_initial_domain())
                        adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
        }
 
index f4c6accd32263c537dabbd10fbdf6d8ea874778e..9426e252d8aa6c67618b8244d3791e3e37ce0e28 100644 (file)
@@ -3293,7 +3293,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
@@ -3429,7 +3429,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_6[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
@@ -3454,7 +3454,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
@@ -7689,6 +7689,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
        switch (adev->ip_versions[GC_HWIP][0]) {
        case IP_VERSION(10, 3, 1):
        case IP_VERSION(10, 3, 3):
+       case IP_VERSION(10, 3, 7):
                preempt_disable();
                clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
                clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
index 46d4bf27ebbbb5ab84b13ce4ba3c35d7ff0a113e..b8cfcc6b1125ccfe99b262fb03ae26dd295f19fa 100644 (file)
@@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
        { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
        /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
        { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+       /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+       { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
        { 0, 0, 0, 0, 0 },
 };
 
index 3c1d440824a73c30db584d73857509a9134cbdda..7c956cf21bc7ceab131a1e1d56a5430294fd09a9 100644 (file)
@@ -814,7 +814,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
        adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 #ifdef CONFIG_X86_64
-       if (adev->flags & AMD_IS_APU) {
+       if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
                adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
                adev->gmc.aper_size = adev->gmc.real_vram_size;
        }
@@ -1151,6 +1151,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       /*
+        * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+        * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+        * seen any issue on the DF 3.0.2 series platform.
+        */
+       if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+               dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+               return 0;
+       }
+
        r = adev->mmhub.funcs->set_clockgating(adev, state);
        if (r)
                return r;
index 344d819b4c1b6e9b03d772adefee87ac2ac956ef..979da6f510e886ffba16b323d5c19c72035869c2 100644 (file)
@@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
        adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 #ifdef CONFIG_X86_64
-       if (adev->flags & AMD_IS_APU &&
-           adev->gmc.real_vram_size > adev->gmc.aper_size) {
+       if ((adev->flags & AMD_IS_APU) &&
+           adev->gmc.real_vram_size > adev->gmc.aper_size &&
+           !amdgpu_passthrough(adev)) {
                adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
                adev->gmc.aper_size = adev->gmc.real_vram_size;
        }
index ca9841d5669fb9829cf6471b51b918d43835a195..1932a3e4af7e2e75dd530e416459e85993470b04 100644 (file)
@@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
        adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
 
 #ifdef CONFIG_X86_64
-       if (adev->flags & AMD_IS_APU) {
+       if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
                adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
                adev->gmc.aper_size = adev->gmc.real_vram_size;
        }
index 431742eb78110acd28507e85673cf4e94eda4a9f..6009fbfdcc198bcdf82aa5a3bc127163741bd27d 100644 (file)
@@ -1456,7 +1456,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
         */
 
        /* check whether both host-gpu and gpu-gpu xgmi links exist */
-       if ((adev->flags & AMD_IS_APU) ||
+       if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
            (adev->gmc.xgmi.supported &&
             adev->gmc.xgmi.connected_to_cpu)) {
                adev->gmc.aper_base =
@@ -1721,7 +1721,7 @@ static int gmc_v9_0_sw_fini(void *handle)
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
        amdgpu_gart_table_vram_free(adev);
-       amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+       amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
        amdgpu_bo_fini(adev);
 
        return 0;
index dff54190b96c7971463dc0bc479ce35db86f0dbc..f0fbcda76f5e398e40464b1a3b949160056d6d5e 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/firmware.h>
 
 #include "amdgpu.h"
+#include "amdgpu_cs.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_pm.h"
 #include "soc15.h"
@@ -1900,6 +1901,75 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
        .set_powergating_state = vcn_v1_0_set_powergating_state,
 };
 
+/*
+ * It is a hardware issue that VCN can't handle a GTT TMZ buffer on
+ * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain
+ * before command submission as a workaround.
+ */
+static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser,
+                               struct amdgpu_job *job,
+                               uint64_t addr)
+{
+       struct ttm_operation_ctx ctx = { false, false };
+       struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+       struct amdgpu_vm *vm = &fpriv->vm;
+       struct amdgpu_bo_va_mapping *mapping;
+       struct amdgpu_bo *bo;
+       int r;
+
+       addr &= AMDGPU_GMC_HOLE_MASK;
+       if (addr & 0x7) {
+               DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+               return -EINVAL;
+       }
+
+       mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE);
+       if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+               return -EINVAL;
+
+       bo = mapping->bo_va->base.bo;
+       if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED))
+               return 0;
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (r) {
+               DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r);
+               return r;
+       }
+
+       return r;
+}
+
+static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+                                          struct amdgpu_job *job,
+                                          struct amdgpu_ib *ib)
+{
+       uint32_t msg_lo = 0, msg_hi = 0;
+       int i, r;
+
+       if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE))
+               return 0;
+
+       for (i = 0; i < ib->length_dw; i += 2) {
+               uint32_t reg = amdgpu_ib_get_value(ib, i);
+               uint32_t val = amdgpu_ib_get_value(ib, i + 1);
+
+               if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+                       msg_lo = val;
+               } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+                       msg_hi = val;
+               } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) {
+                       r = vcn_v1_0_validate_bo(p, job,
+                                                ((u64)msg_hi) << 32 | msg_lo);
+                       if (r)
+                               return r;
+               }
+       }
+
+       return 0;
+}
+
 static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
        .type = AMDGPU_RING_TYPE_VCN_DEC,
        .align_mask = 0xf,
@@ -1910,6 +1980,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
        .get_rptr = vcn_v1_0_dec_ring_get_rptr,
        .get_wptr = vcn_v1_0_dec_ring_get_wptr,
        .set_wptr = vcn_v1_0_dec_ring_set_wptr,
+       .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place,
        .emit_frame_size =
                6 + 6 + /* hdp invalidate / flush */
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
index c87263ed20ecb8a17d3b8bfaa9ca1d352035ae43..cb5f0a12333f308d2ec358014ec5cf206f305d11 100644 (file)
@@ -219,6 +219,11 @@ static int vcn_v3_0_sw_init(void *handle)
                                             cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
                                             cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
                fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
+               fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
+               if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2))
+                       fw_shared->smu_interface_info.smu_interface_type = 2;
+               else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1))
+                       fw_shared->smu_interface_info.smu_interface_type = 1;
 
                if (amdgpu_vcnfw_log)
                        amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
@@ -575,8 +580,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
                        AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
 
        /* VCN global tiling registers */
-       WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
-               UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+       WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+               UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
 static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
@@ -1480,8 +1485,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
 
 static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 {
+       struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
        uint32_t tmp;
 
+       vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
+
        /* Wait for power status to be 1 */
        SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
                UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
index 039b90cdc3bcac9f22167d7d25489a3ffa113535..45f0188c42739b7d9a80803848af12e6f55929b2 100644 (file)
 #include "mxgpu_vi.h"
 #include "amdgpu_dm.h"
 
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
 #define ixPCIE_LC_L1_PM_SUBSTATE       0x100100C6
 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK       0x00000001L
 #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK   0x00000002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
                WREG32_PCIE(ixPCIE_LC_CNTL, data);
 }
 
+static bool aspm_support_quirk_check(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+       struct cpuinfo_x86 *c = &cpu_data(0);
+
+       return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+       return true;
+#endif
+}
+
 static void vi_program_aspm(struct amdgpu_device *adev)
 {
        u32 data, data1, orig;
        bool bL1SS = false;
        bool bClkReqSupport = true;
 
-       if (!amdgpu_device_should_use_aspm(adev))
+       if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
                return;
 
        if (adev->flags & AMD_IS_APU ||
index 339e12c94cfff496be946947397dcfc4a91a3f8e..62aa6c9d5123df2a36d65df29910dd6a13b0d14c 100644 (file)
@@ -483,15 +483,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
        }
 
        /* Verify module parameters regarding mapped process number*/
-       if ((hws_max_conc_proc < 0)
-                       || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
-               dev_err(kfd_device,
-                       "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
-                       hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
-                       kfd->vm_info.vmid_num_kfd);
+       if (hws_max_conc_proc >= 0)
+               kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
+       else
                kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
-       } else
-               kfd->max_proc_per_quantum = hws_max_conc_proc;
 
        /* calculate max size of mqds needed for queues */
        size = max_num_of_queues_per_device *
@@ -536,7 +531,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                goto kfd_doorbell_error;
        }
 
-       kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
+       if (amdgpu_use_xgmi_p2p)
+               kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
 
        kfd->noretry = kfd->adev->gmc.noretry;
 
index acf4f79758501367b6b63733c392ca33e4e23010..198672264492891b4ac4b3dd968706a22bcf208b 100644 (file)
@@ -130,19 +130,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
 }
 
 static void increment_queue_count(struct device_queue_manager *dqm,
-                       enum kfd_queue_type type)
+                                 struct qcm_process_device *qpd,
+                                 struct queue *q)
 {
        dqm->active_queue_count++;
-       if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+       if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+           q->properties.type == KFD_QUEUE_TYPE_DIQ)
                dqm->active_cp_queue_count++;
+
+       if (q->properties.is_gws) {
+               dqm->gws_queue_count++;
+               qpd->mapped_gws_queue = true;
+       }
 }
 
 static void decrement_queue_count(struct device_queue_manager *dqm,
-                       enum kfd_queue_type type)
+                                 struct qcm_process_device *qpd,
+                                 struct queue *q)
 {
        dqm->active_queue_count--;
-       if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+       if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+           q->properties.type == KFD_QUEUE_TYPE_DIQ)
                dqm->active_cp_queue_count--;
+
+       if (q->properties.is_gws) {
+               dqm->gws_queue_count--;
+               qpd->mapped_gws_queue = false;
+       }
 }
 
 /*
@@ -412,7 +426,7 @@ add_queue_to_list:
        list_add(&q->list, &qpd->queues_list);
        qpd->queue_count++;
        if (q->properties.is_active)
-               increment_queue_count(dqm, q->properties.type);
+               increment_queue_count(dqm, qpd, q);
 
        /*
         * Unconditionally increment this counter, regardless of the queue's
@@ -601,13 +615,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
                deallocate_vmid(dqm, qpd, q);
        }
        qpd->queue_count--;
-       if (q->properties.is_active) {
-               decrement_queue_count(dqm, q->properties.type);
-               if (q->properties.is_gws) {
-                       dqm->gws_queue_count--;
-                       qpd->mapped_gws_queue = false;
-               }
-       }
+       if (q->properties.is_active)
+               decrement_queue_count(dqm, qpd, q);
 
        return retval;
 }
@@ -700,12 +709,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
         * dqm->active_queue_count to determine whether a new runlist must be
         * uploaded.
         */
-       if (q->properties.is_active && !prev_active)
-               increment_queue_count(dqm, q->properties.type);
-       else if (!q->properties.is_active && prev_active)
-               decrement_queue_count(dqm, q->properties.type);
-
-       if (q->gws && !q->properties.is_gws) {
+       if (q->properties.is_active && !prev_active) {
+               increment_queue_count(dqm, &pdd->qpd, q);
+       } else if (!q->properties.is_active && prev_active) {
+               decrement_queue_count(dqm, &pdd->qpd, q);
+       } else if (q->gws && !q->properties.is_gws) {
                if (q->properties.is_active) {
                        dqm->gws_queue_count++;
                        pdd->qpd.mapped_gws_queue = true;
@@ -767,11 +775,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
                                q->properties.type)];
                q->properties.is_active = false;
-               decrement_queue_count(dqm, q->properties.type);
-               if (q->properties.is_gws) {
-                       dqm->gws_queue_count--;
-                       qpd->mapped_gws_queue = false;
-               }
+               decrement_queue_count(dqm, qpd, q);
 
                if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
                        continue;
@@ -817,7 +821,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
                        continue;
 
                q->properties.is_active = false;
-               decrement_queue_count(dqm, q->properties.type);
+               decrement_queue_count(dqm, qpd, q);
        }
        pdd->last_evict_timestamp = get_jiffies_64();
        retval = execute_queues_cpsch(dqm,
@@ -888,11 +892,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
                                q->properties.type)];
                q->properties.is_active = true;
-               increment_queue_count(dqm, q->properties.type);
-               if (q->properties.is_gws) {
-                       dqm->gws_queue_count++;
-                       qpd->mapped_gws_queue = true;
-               }
+               increment_queue_count(dqm, qpd, q);
 
                if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
                        continue;
@@ -950,7 +950,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
                        continue;
 
                q->properties.is_active = true;
-               increment_queue_count(dqm, q->properties.type);
+               increment_queue_count(dqm, &pdd->qpd, q);
        }
        retval = execute_queues_cpsch(dqm,
                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1378,7 +1378,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
                        dqm->total_queue_count);
 
        list_add(&kq->list, &qpd->priv_queue_list);
-       increment_queue_count(dqm, kq->queue->properties.type);
+       increment_queue_count(dqm, qpd, kq->queue);
        qpd->is_debug = true;
        execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
        dqm_unlock(dqm);
@@ -1392,7 +1392,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 {
        dqm_lock(dqm);
        list_del(&kq->list);
-       decrement_queue_count(dqm, kq->queue->properties.type);
+       decrement_queue_count(dqm, qpd, kq->queue);
        qpd->is_debug = false;
        execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
        /*
@@ -1467,7 +1467,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        qpd->queue_count++;
 
        if (q->properties.is_active) {
-               increment_queue_count(dqm, q->properties.type);
+               increment_queue_count(dqm, qpd, q);
 
                execute_queues_cpsch(dqm,
                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1683,15 +1683,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
        list_del(&q->list);
        qpd->queue_count--;
        if (q->properties.is_active) {
-               decrement_queue_count(dqm, q->properties.type);
+               decrement_queue_count(dqm, qpd, q);
                retval = execute_queues_cpsch(dqm,
                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
                if (retval == -ETIME)
                        qpd->reset_wavefronts = true;
-               if (q->properties.is_gws) {
-                       dqm->gws_queue_count--;
-                       qpd->mapped_gws_queue = false;
-               }
        }
 
        /*
@@ -1932,7 +1928,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
        /* Clean all kernel queues */
        list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
                list_del(&kq->list);
-               decrement_queue_count(dqm, kq->queue->properties.type);
+               decrement_queue_count(dqm, qpd, kq->queue);
                qpd->is_debug = false;
                dqm->total_queue_count--;
                filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1945,13 +1941,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                        deallocate_sdma_queue(dqm, q);
 
-               if (q->properties.is_active) {
-                       decrement_queue_count(dqm, q->properties.type);
-                       if (q->properties.is_gws) {
-                               dqm->gws_queue_count--;
-                               qpd->mapped_gws_queue = false;
-                       }
-               }
+               if (q->properties.is_active)
+                       decrement_queue_count(dqm, qpd, q);
 
                dqm->total_queue_count--;
        }
index deecccebe5b64ceb78f152aac6ad3fbfdff82766..64f4a51cc880e77f80268d2c947c9dc8c94cfa70 100644 (file)
@@ -749,6 +749,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
        event_waiters = kmalloc_array(num_events,
                                        sizeof(struct kfd_event_waiter),
                                        GFP_KERNEL);
+       if (!event_waiters)
+               return NULL;
 
        for (i = 0; (event_waiters) && (i < num_events) ; i++) {
                init_wait(&event_waiters[i].wait);
index 9967a73d5b0f2e41a402a178957b3d8ad5f957f6..8f58fc491b289cd7ccbdf6f227bd8bc8a7a3d81e 100644 (file)
@@ -1103,7 +1103,7 @@ struct kfd_criu_queue_priv_data {
        uint32_t priority;
        uint32_t q_percent;
        uint32_t doorbell_id;
-       uint32_t is_gws;
+       uint32_t gws;
        uint32_t sdma_id;
        uint32_t eop_ring_buffer_size;
        uint32_t ctx_save_restore_area_size;
index 6eca9509f2e38472bae67cb91bcac20696be1bb9..4f58e671d39b05c84be5685b3a305e4fc5a1719f 100644 (file)
@@ -636,6 +636,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd,
        q_data->ctx_save_restore_area_size =
                q->properties.ctx_save_restore_area_size;
 
+       q_data->gws = !!q->gws;
+
        ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
        if (ret) {
                pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
@@ -743,7 +745,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
                                          struct kfd_criu_queue_priv_data *q_data)
 {
        qp->is_interop = false;
-       qp->is_gws = q_data->is_gws;
        qp->queue_percent = q_data->q_percent;
        qp->priority = q_data->priority;
        qp->queue_address = q_data->q_address;
@@ -826,12 +827,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
                                NULL);
        if (ret) {
                pr_err("Failed to create new queue err:%d\n", ret);
-               ret = -EINVAL;
+               goto exit;
        }
 
+       if (q_data->gws)
+               ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
+
 exit:
        if (ret)
-               pr_err("Failed to create queue (%d)\n", ret);
+               pr_err("Failed to restore queue (%d)\n", ret);
        else
                pr_debug("Queue id %d was restored successfully\n", queue_id);
 
index e4beebb1c80a21ed62b5c0abca0a373aed9bce35..f2e1d506ba211f04c35104e692d5bb197dc0df10 100644 (file)
@@ -247,15 +247,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
                return ret;
        }
 
-       ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
-                              O_RDWR);
-       if (ret < 0) {
-               kfifo_free(&client->fifo);
-               kfree(client);
-               return ret;
-       }
-       *fd = ret;
-
        init_waitqueue_head(&client->wait_queue);
        spin_lock_init(&client->lock);
        client->events = 0;
@@ -265,5 +256,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
        list_add_rcu(&client->list, &dev->smi_clients);
        spin_unlock(&dev->smi_lock);
 
+       ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+                              O_RDWR);
+       if (ret < 0) {
+               spin_lock(&dev->smi_lock);
+               list_del_rcu(&client->list);
+               spin_unlock(&dev->smi_lock);
+
+               synchronize_rcu();
+
+               kfifo_free(&client->fifo);
+               kfree(client);
+               return ret;
+       }
+       *fd = ret;
+
        return 0;
 }
index b30656959fd862324591d3d3ad73ff465ab642b6..62139ff35476c3a94f1fb31a3393d8ec75b0450e 100644 (file)
@@ -2714,7 +2714,8 @@ static int dm_resume(void *handle)
                 * this is the case when traversing through already created
                 * MST connectors, should be skipped
                 */
-               if (aconnector->mst_port)
+               if (aconnector->dc_link &&
+                   aconnector->dc_link->type == dc_connection_mst_branch)
                        continue;
 
                mutex_lock(&aconnector->hpd_lock);
@@ -3972,7 +3973,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap
                                 max - min);
 }
 
-static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
                                         int bl_idx,
                                         u32 user_brightness)
 {
@@ -4003,7 +4004,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
                        DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx);
        }
 
-       return rc ? 0 : 1;
+       if (rc)
+               dm->actual_brightness[bl_idx] = user_brightness;
 }
 
 static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
@@ -9947,7 +9949,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
        /* restore the backlight level */
        for (i = 0; i < dm->num_of_edps; i++) {
                if (dm->backlight_dev[i] &&
-                   (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i]))
+                   (dm->actual_brightness[i] != dm->brightness[i]))
                        amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
        }
 #endif
index 6a908d736d6ac9065fc340e6e6151fff03d036f9..7e44b04294488a39560da4a8d4da1033f492bef4 100644 (file)
@@ -540,6 +540,12 @@ struct amdgpu_display_manager {
         * cached backlight values.
         */
        u32 brightness[AMDGPU_DM_MAX_NUM_EDP];
+       /**
+        * @actual_brightness:
+        *
+        * last successfully applied backlight values.
+        */
+       u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
 };
 
 enum dsc_clock_force_state {
index dfba6138f53839ec7ca93e1a616515327a65d542..26feefbb8990ae52481fa6af7f044962d7ea60ba 100644 (file)
@@ -374,7 +374,7 @@ void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce)
                                clk_mgr_dce->dprefclk_ss_percentage =
                                                info.spread_spectrum_percentage;
                        }
-                       if (clk_mgr_dce->base.ctx->dc->debug.ignore_dpref_ss)
+                       if (clk_mgr_dce->base.ctx->dc->config.ignore_dpref_ss)
                                clk_mgr_dce->dprefclk_ss_percentage = 0;
                }
        }
index edda572dc57017520e4e2e19cc78c67de6331479..8be4c19706285ffbaa933cdd3dae9bdd7f768957 100644 (file)
@@ -436,57 +436,84 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
                struct integrated_info *bios_info,
                const DpmClocks_315_t *clock_table)
 {
-       int i, j;
+       int i;
        struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
-       uint32_t max_dispclk = 0, max_dppclk = 0;
-
-       j = -1;
-
-       ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
-       /* Find lowest DPM, FCLK is filled in reverse order*/
-
-       for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
-               if (clock_table->DfPstateTable[i].FClk != 0) {
-                       j = i;
-                       break;
+       uint32_t max_dispclk, max_dppclk, max_pstate, max_socclk, max_fclk = 0, min_pstate = 0;
+       struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+
+       max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
+       max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
+       max_socclk = find_max_clk_value(clock_table->SocClocks, clock_table->NumSocClkLevelsEnabled);
+
+       /* Find highest fclk pstate */
+       for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+               if (clock_table->DfPstateTable[i].FClk > max_fclk) {
+                       max_fclk = clock_table->DfPstateTable[i].FClk;
+                       max_pstate = i;
                }
        }
 
-       if (j == -1) {
-               /* clock table is all 0s, just use our own hardcode */
-               ASSERT(0);
-               return;
-       }
-
-       bw_params->clk_table.num_entries = j + 1;
-
-       /* dispclk and dppclk can be max at any voltage, same number of levels for both */
-       if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
-           clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
-               max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
-               max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
-       } else {
-               ASSERT(0);
-       }
+       /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */
+       for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+               int j;
+               uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
 
-       for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
-               int temp;
+               for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+                       if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]
+                                       && clock_table->DfPstateTable[j].FClk < min_fclk) {
+                               min_fclk = clock_table->DfPstateTable[j].FClk;
+                               min_pstate = j;
+                       }
+               }
 
-               bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
-               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
-               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
+               bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+               bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+               bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+               bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+               bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
                bw_params->clk_table.entries[i].wck_ratio = 1;
-               temp = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
-               if (temp)
-                       bw_params->clk_table.entries[i].dcfclk_mhz = temp;
-               temp = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
-               if (temp)
-                       bw_params->clk_table.entries[i].socclk_mhz = temp;
+       };
+
+       /* Make sure to include at least one entry and highest pstate */
+       if (max_pstate != min_pstate) {
+               bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+               bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(
+                               clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[max_pstate].Voltage);
+               bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(
+                               clock_table, clock_table->SocClocks, clock_table->DfPstateTable[max_pstate].Voltage);
                bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
                bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+               bw_params->clk_table.entries[i].wck_ratio = 1;
+               i++;
        }
+       bw_params->clk_table.num_entries = i;
+
+       /* Include highest socclk */
+       if (bw_params->clk_table.entries[i-1].socclk_mhz < max_socclk)
+               bw_params->clk_table.entries[i-1].socclk_mhz = max_socclk;
 
+       /* Set any 0 clocks to max default setting. Not an issue for
+        * power since we aren't doing switching in such case anyway
+        */
+       for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+               if (!bw_params->clk_table.entries[i].fclk_mhz) {
+                       bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+                       bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+                       bw_params->clk_table.entries[i].voltage = def_max.voltage;
+               }
+               if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+                       bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+               if (!bw_params->clk_table.entries[i].socclk_mhz)
+                       bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+               if (!bw_params->clk_table.entries[i].dispclk_mhz)
+                       bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+               if (!bw_params->clk_table.entries[i].dppclk_mhz)
+                       bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+       }
        bw_params->vram_type = bios_info->memory_type;
        bw_params->num_channels = bios_info->ma_channel_number;
 
index 880ffea2afc6a1df3822a12c5ec3d8e673ea6354..2600313fea5797321a81688a20f028721cd2fcfb 100644 (file)
@@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
 #define VBIOSSMC_MSG_SetDppclkFreq                0x06 ///< Set DPP clock frequency in MHZ
 #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq       0x07 ///< Set DCF clock frequency hard min in MHZ
 #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk        0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ
-#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq       0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency
-#define VBIOSSMC_MSG_GetFclkFrequency             0x0A ///< Get FCLK frequency, return frequemcy in MHZ
+#define VBIOSSMC_MSG_GetDtbclkFreq                0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency
+#define VBIOSSMC_MSG_SetDtbClk                    0x0A ///< Set dtb clock frequency, return frequemcy in MHZ
 #define VBIOSSMC_MSG_SetDisplayCount              0x0B ///< Inform PMFW of number of display connected
 #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power
 #define VBIOSSMC_MSG_UpdatePmeRestore             0x0D ///< To ask PMFW to write into Azalia for PME wake up event
@@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr)
        return (dprefclk_get_mhz * 1000);
 }
 
-int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr)
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
 {
        int fclk_get_mhz = -1;
 
        if (clk_mgr->smu_present) {
                fclk_get_mhz = dcn315_smu_send_msg_with_param(
                        clk_mgr,
-                       VBIOSSMC_MSG_GetFclkFrequency,
+                       VBIOSSMC_MSG_GetDtbclkFreq,
                        0);
        }
        return (fclk_get_mhz * 1000);
 }
+
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+       if (!clk_mgr->smu_present)
+               return;
+
+       dcn315_smu_send_msg_with_param(
+                       clk_mgr,
+                       VBIOSSMC_MSG_SetDtbClk,
+                       enable);
+}
index 66fa42f8dd189768e414d42cd8d2506ce4d63553..5aa3275ac7d88c879c86958fafcd793be746386d 100644 (file)
@@ -37,6 +37,7 @@
 #define NUM_SOC_VOLTAGE_LEVELS  4
 #define NUM_DF_PSTATE_LEVELS    4
 
+
 typedef struct {
   uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
   uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
@@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
 void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
 void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
 int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr);
-int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr);
+int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr);
+void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
 #endif /* DAL_DC_315_SMU_H_ */
index 702d00ce7da4c439971e26eeb5a3e2d6516af1c6..3121dd2d2a91113e3aa449fd7619a35d4c101578 100644 (file)
@@ -686,8 +686,8 @@ void dcn316_clk_mgr_construct(
        clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base);
        clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
        dce_clock_read_ss_info(&clk_mgr->base);
-       clk_mgr->base.dccg->ref_dtbclk_khz =
-       dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);
+       /*clk_mgr->base.dccg->ref_dtbclk_khz =
+       dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/
 
        clk_mgr->base.base.bw_params = &dcn316_bw_params;
 
index f6e19efea7568de940803065da576aab11b85fc9..c436db416708e3f8f857e9cc5d8abbcc6f34d77d 100644 (file)
@@ -2389,6 +2389,8 @@ static enum surface_update_type check_update_surfaces_for_stream(
 
                if (stream_update->mst_bw_update)
                        su_flags->bits.mst_bw = 1;
+               if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc))
+                       su_flags->bits.crtc_timing_adjust = 1;
 
                if (su_flags->raw != 0)
                        overall_type = UPDATE_TYPE_FULL;
@@ -2650,6 +2652,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
        if (update->vrr_infopacket)
                stream->vrr_infopacket = *update->vrr_infopacket;
 
+       if (update->crtc_timing_adjust)
+               stream->adjust = *update->crtc_timing_adjust;
+
        if (update->dpms_off)
                stream->dpms_off = *update->dpms_off;
 
@@ -4051,3 +4056,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo
        if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause)
                pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst);
 }
+/*
+ * dc_extended_blank_supported: Decide whether extended blank is supported
+ *
+ * Extended blank is a freesync optimization feature to be enabled in the future.
+ * During the extra vblank period gained from freesync, we have the ability to enter z9/z10.
+ *
+ * @param [in] dc: Current DC state
+ * @return: Indicate whether extended blank is supported (true or false)
+ */
+bool dc_extended_blank_supported(struct dc *dc)
+{
+       return dc->debug.extended_blank_optimization && !dc->debug.disable_z10
+               && dc->caps.zstate_support && dc->caps.is_apu;
+}
index cb87dd643180876011b4855557a56a410db5a713..bbaa5abdf88859a3d77cec38acaeabd2a1c400c4 100644 (file)
@@ -983,8 +983,7 @@ static bool should_verify_link_capability_destructively(struct dc_link *link,
                                destrictive = false;
                        }
                }
-       } else if (dc_is_hdmi_signal(link->local_sink->sink_signal))
-               destrictive = true;
+       }
 
        return destrictive;
 }
index 351081f574cb7bb6755412c69b9fc1271cdf41eb..95b5b5bfa1ffa407d93ed8c487347bb768abe868 100644 (file)
@@ -4440,7 +4440,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video
                &dpcd_pattern_type.value,
                sizeof(dpcd_pattern_type));
 
-       channel_count = dpcd_test_mode.bits.channel_count + 1;
+       channel_count = min(dpcd_test_mode.bits.channel_count + 1, AUDIO_CHANNELS_COUNT);
 
        // read pattern periods for requested channels when sawTooth pattern is requested
        if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH ||
@@ -5216,6 +5216,62 @@ static void retrieve_cable_id(struct dc_link *link)
                                &link->dpcd_caps.cable_id, &usbc_cable_id);
 }
 
+/* DPRX may take some time to respond to AUX messages after HPD asserted.
+ * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600).
+ */
+static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms)
+{
+       enum dc_status status = DC_ERROR_UNEXPECTED;
+       uint8_t dpcd_data = 0;
+       uint64_t start_ts = 0;
+       uint64_t current_ts = 0;
+       uint64_t time_taken_ms = 0;
+       enum dc_connection_type type = dc_connection_none;
+
+       status = core_link_read_dpcd(
+                       link,
+                       DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
+                       &dpcd_data,
+                       sizeof(dpcd_data));
+
+       if (status != DC_OK) {
+               DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.",
+                               __func__,
+                               timeout_ms);
+               start_ts = dm_get_timestamp(link->ctx);
+
+               do {
+                       if (!dc_link_detect_sink(link, &type) || type == dc_connection_none)
+                               break;
+
+                       dpcd_data = DP_SET_POWER_D3;
+                       status = core_link_write_dpcd(
+                                       link,
+                                       DP_SET_POWER,
+                                       &dpcd_data,
+                                       sizeof(dpcd_data));
+
+                       dpcd_data = DP_SET_POWER_D0;
+                       status = core_link_write_dpcd(
+                                       link,
+                                       DP_SET_POWER,
+                                       &dpcd_data,
+                                       sizeof(dpcd_data));
+
+                       current_ts = dm_get_timestamp(link->ctx);
+                       time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000);
+               } while (status != DC_OK && time_taken_ms < timeout_ms);
+
+               DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s",
+                               __func__,
+                               (status == DC_OK) ? "succeeded" : "failed",
+                               time_taken_ms,
+                               (type == dc_connection_none) ? ". Unplugged." : ".");
+       }
+
+       return status;
+}
+
 static bool retrieve_link_cap(struct dc_link *link)
 {
        /* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16,
@@ -5251,6 +5307,15 @@ static bool retrieve_link_cap(struct dc_link *link)
        dc_link_aux_try_to_configure_timeout(link->ddc,
                        LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD);
 
+       /* Try to ensure AUX channel active before proceeding. */
+       if (link->dc->debug.aux_wake_wa.bits.enable_wa) {
+               uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms;
+
+               if (link->dc->debug.aux_wake_wa.bits.use_default_timeout)
+                       timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS;
+               status = wa_try_to_wake_dprx(link, timeout_ms);
+       }
+
        is_lttpr_present = dp_retrieve_lttpr_cap(link);
        /* Read DP tunneling information. */
        status = dpcd_get_tunneling_device_data(link);
index 7af153434e9e4578b7217d83db2cd0b109d70cb7..d251c3f3a7140434078a7f12443f3d049ba726bb 100644 (file)
@@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged(
        if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
                return false;
 
-       // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks
-       if (old_stream->audio_info.mode_count != stream->audio_info.mode_count)
+       /*compare audio info*/
+       if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0)
                return false;
 
        return true;
index 4ffab7bb1098b6548f092f5f6ac6f7d664097b2a..9e79f60e6129fe1c5b7caef511383e33b4a7b88f 100644 (file)
@@ -188,6 +188,7 @@ struct dc_caps {
        bool psp_setup_panel_mode;
        bool extended_aux_timeout_support;
        bool dmcub_support;
+       bool zstate_support;
        uint32_t num_of_internal_disp;
        enum dp_protocol_version max_dp_protocol_version;
        unsigned int mall_size_per_mem_channel;
@@ -339,6 +340,7 @@ struct dc_config {
        bool is_asymmetric_memory;
        bool is_single_rank_dimm;
        bool use_pipe_ctx_sync_logic;
+       bool ignore_dpref_ss;
 };
 
 enum visual_confirm {
@@ -525,6 +527,22 @@ union dpia_debug_options {
        uint32_t raw;
 };
 
+/* AUX wake work around options
+ * 0: enable/disable work around
+ * 1: use default timeout LINK_AUX_WAKE_TIMEOUT_MS
+ * 15-2: reserved
+ * 31-16: timeout in ms
+ */
+union aux_wake_wa_options {
+       struct {
+               uint32_t enable_wa : 1;
+               uint32_t use_default_timeout : 1;
+               uint32_t rsvd: 14;
+               uint32_t timeout_ms : 16;
+       } bits;
+       uint32_t raw;
+};
+
 struct dc_debug_data {
        uint32_t ltFailCount;
        uint32_t i2cErrorCount;
@@ -703,14 +721,15 @@ struct dc_debug_options {
        bool enable_driver_sequence_debug;
        enum det_size crb_alloc_policy;
        int crb_alloc_policy_min_disp_count;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
        bool disable_z10;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
        bool enable_z9_disable_interface;
        bool enable_sw_cntl_psr;
        union dpia_debug_options dpia_debug;
 #endif
        bool apply_vendor_specific_lttpr_wa;
-       bool ignore_dpref_ss;
+       bool extended_blank_optimization;
+       union aux_wake_wa_options aux_wake_wa;
        uint8_t psr_power_use_phy_fsm;
 };
 
@@ -1369,6 +1388,8 @@ struct dc_sink_init_data {
        bool converter_disable_audio;
 };
 
+bool dc_extended_blank_supported(struct dc *dc);
+
 struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params);
 
 /* Newer interfaces  */
index 99a750f561f81cb71a7092a329e9505bc4eb7653..c4168c11257c310778ef1e08e00635bed6aabffc 100644 (file)
@@ -131,6 +131,7 @@ union stream_update_flags {
                uint32_t wb_update:1;
                uint32_t dsc_changed : 1;
                uint32_t mst_bw : 1;
+               uint32_t crtc_timing_adjust : 1;
        } bits;
 
        uint32_t raw;
@@ -289,6 +290,7 @@ struct dc_stream_update {
        struct dc_3dlut *lut3d_func;
 
        struct test_pattern *pending_test_pattern;
+       struct dc_crtc_timing_adjust *crtc_timing_adjust;
 };
 
 bool dc_is_stream_unchanged(
index c3e141c19a77e4957eb3a9debdddcb4ba8ac8ed3..83fbea2df410908fe029cd0592092c5ddb393481 100644 (file)
@@ -1497,16 +1497,12 @@ void dcn10_init_hw(struct dc *dc)
                        link->link_status.link_active = true;
        }
 
-       /* Power gate DSCs */
-       if (!is_optimized_init_done) {
-               for (i = 0; i < res_pool->res_cap->num_dsc; i++)
-                       if (hws->funcs.dsc_pg_control != NULL)
-                               hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false);
-       }
-
        /* we want to turn off all dp displays before doing detection */
        dc_link_blank_all_dp_displays(dc);
 
+       if (hws->funcs.enable_power_gating_plane)
+               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
        /* If taking control over from VBIOS, we may want to optimize our first
         * mode set, so we need to skip powering down pipes until we know which
         * pipes we want to use.
@@ -1559,8 +1555,6 @@ void dcn10_init_hw(struct dc *dc)
 
                REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
        }
-       if (hws->funcs.enable_power_gating_plane)
-               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
 
        if (dc->clk_mgr->funcs->notify_wm_ranges)
                dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
@@ -2056,7 +2050,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
 {
        struct dc_context *dc_ctx = dc->ctx;
        int i, master = -1, embedded = -1;
-       struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0};
+       struct dc_crtc_timing *hw_crtc_timing;
        uint64_t phase[MAX_PIPES];
        uint64_t modulo[MAX_PIPES];
        unsigned int pclk;
@@ -2067,6 +2061,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
        uint32_t dp_ref_clk_100hz =
                dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10;
 
+       hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL);
+       if (!hw_crtc_timing)
+               return master;
+
        if (dc->config.vblank_alignment_dto_params &&
                dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) {
                embedded_h_total =
@@ -2130,6 +2128,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
                }
 
        }
+
+       kfree(hw_crtc_timing);
        return master;
 }
 
@@ -2522,14 +2522,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
        struct mpc *mpc = dc->res_pool->mpc;
        struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
 
-       if (per_pixel_alpha)
-               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
-       else
-               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
        blnd_cfg.overlap_only = false;
        blnd_cfg.global_gain = 0xff;
 
+       if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+               blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+       } else if (per_pixel_alpha) {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+       } else {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+       }
+
        if (pipe_ctx->plane_state->global_alpha)
                blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
        else
index ab910deed4812bbcd227b80ac266942488e4adf6..b627c41713cc2d16147f099507c08bc3ce03a18b 100644 (file)
@@ -1857,6 +1857,7 @@ void dcn20_optimize_bandwidth(
                struct dc_state *context)
 {
        struct hubbub *hubbub = dc->res_pool->hubbub;
+       int i;
 
        /* program dchubbub watermarks */
        hubbub->funcs->program_watermarks(hubbub,
@@ -1873,6 +1874,17 @@ void dcn20_optimize_bandwidth(
                        dc->clk_mgr,
                        context,
                        true);
+       if (dc_extended_blank_supported(dc) && context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+               for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+                       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+                       if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank
+                               && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max
+                               && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total)
+                                       pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp,
+                                               pipe_ctx->dlg_regs.optimized_min_dst_y_next_start);
+               }
+       }
        /* increase compbuf size */
        if (hubbub->funcs->program_compbuf_size)
                hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true);
@@ -2332,14 +2344,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
        struct mpc *mpc = dc->res_pool->mpc;
        struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
 
-       if (per_pixel_alpha)
-               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
-       else
-               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
-
        blnd_cfg.overlap_only = false;
        blnd_cfg.global_gain = 0xff;
 
+       if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
+               blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
+       } else if (per_pixel_alpha) {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+       } else {
+               blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+       }
+
        if (pipe_ctx->plane_state->global_alpha)
                blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
        else
index d473708d53999d108304987abfa6c390870b2279..7802d603f79600d2cc53324b867dbdafc9d38319 100644 (file)
@@ -1976,7 +1976,6 @@ int dcn20_validate_apply_pipe_split_flags(
                                /*If need split for odm but 4 way split already*/
                                if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
                                                || !pipe->next_odm_pipe)) {
-                                       ASSERT(0); /* NOT expected yet */
                                        merge[i] = true; /* 4 -> 2 ODM */
                                } else if (split[i] == 0 && pipe->prev_odm_pipe) {
                                        ASSERT(0); /* NOT expected yet */
index 61273265677223494031329f18d186f7b5d424d6..faab59508d8292a74954ba59b602b4322647b05f 100644 (file)
@@ -644,7 +644,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                .clock_trace = true,
                .disable_pplib_clock_request = true,
                .min_disp_clk_khz = 100000,
-               .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+               .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
                .force_single_disp_pipe_split = false,
                .disable_dcc = DCC_ENABLE,
                .vsr_support = true,
@@ -997,6 +997,7 @@ static struct clock_source *dcn21_clock_source_create(
                return &clk_src->base;
        }
 
+       kfree(clk_src);
        BREAK_TO_DEBUGGER();
        return NULL;
 }
index ed0a0e5fd80539e46c184c6d6de036ba2e9ae349..f61ec87638443e377922fdd7c58865f9e23c5b6d 100644 (file)
@@ -547,6 +547,9 @@ void dcn30_init_hw(struct dc *dc)
        /* we want to turn off all dp displays before doing detection */
        dc_link_blank_all_dp_displays(dc);
 
+       if (hws->funcs.enable_power_gating_plane)
+               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
        /* If taking control over from VBIOS, we may want to optimize our first
         * mode set, so we need to skip powering down pipes until we know which
         * pipes we want to use.
@@ -624,8 +627,6 @@ void dcn30_init_hw(struct dc *dc)
 
                REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
        }
-       if (hws->funcs.enable_power_gating_plane)
-               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
 
        if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
                dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
index 3e6d6ebd199ee79b7c2c72c57d6de6ca7e20641b..51c5f3685470a3aa158a122850095406a3f524fa 100644 (file)
@@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31,
        hubbub31->detile_buf_size = det_size_kb * 1024;
        hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024;
        hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB;
+
+       hubbub31->debug_test_index_pstate = 0x6;
 }
 
index 53b792b997b7e82b14535cc138791bef13dc8a2e..8ae6117953ca001ee1bb9244b09a129458e11f18 100644 (file)
@@ -54,6 +54,13 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset)
        REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset);
 }
 
+void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized)
+{
+       struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+       REG_SET(BLANK_OFFSET_1, 0, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized);
+}
+
 static struct hubp_funcs dcn31_hubp_funcs = {
        .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
        .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
@@ -80,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = {
        .set_unbounded_requesting = hubp31_set_unbounded_requesting,
        .hubp_soft_reset = hubp31_soft_reset,
        .hubp_in_blank = hubp1_in_blank,
+       .program_extended_blank = hubp31_program_extended_blank,
 };
 
 bool hubp31_construct(
index 4be2286809093be0ce48747d59fc3ea2edf95ae4..631d8ac63aa41ba07dbff59c1dd7dfcd0f3a9b20 100644 (file)
@@ -199,6 +199,9 @@ void dcn31_init_hw(struct dc *dc)
        /* we want to turn off all dp displays before doing detection */
        dc_link_blank_all_dp_displays(dc);
 
+       if (hws->funcs.enable_power_gating_plane)
+               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
        /* If taking control over from VBIOS, we may want to optimize our first
         * mode set, so we need to skip powering down pipes until we know which
         * pipes we want to use.
@@ -248,8 +251,6 @@ void dcn31_init_hw(struct dc *dc)
 
                REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
        }
-       if (hws->funcs.enable_power_gating_plane)
-               hws->funcs.enable_power_gating_plane(dc->hwseq, true);
 
        if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
                dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
@@ -338,20 +339,20 @@ void dcn31_enable_power_gating_plane(
        bool enable)
 {
        bool force_on = true; /* disable power gating */
+       uint32_t org_ip_request_cntl = 0;
 
        if (enable && !hws->ctx->dc->debug.disable_hubp_power_gate)
                force_on = false;
 
+       REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+       if (org_ip_request_cntl == 0)
+               REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
        /* DCHUBP0/1/2/3/4/5 */
        REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
        REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
        /* DPP0/1/2/3/4/5 */
        REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
        REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
 
        force_on = true; /* disable power gating */
        if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate)
@@ -359,11 +360,11 @@ void dcn31_enable_power_gating_plane(
 
        /* DCS0/1/2/3/4/5 */
        REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
        REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
        REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-       REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000);
+
+       if (org_ip_request_cntl == 0)
+               REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0);
 }
 
 void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx)
index d7559e5a99ce8282d292934e1bfbe549692be2eb..e708f07fe75af1b40d45c66a841a194f017fad4d 100644 (file)
@@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc)
                dc->hwss.init_hw = dcn20_fpga_init_hw;
                dc->hwseq->funcs.init_pipes = NULL;
        }
-       if (dc->debug.disable_z10) {
-               /*hw not support z10 or sw disable it*/
-               dc->hwss.z10_restore = NULL;
-               dc->hwss.z10_save_init = NULL;
-       }
 }
index 8afe2130d7c54ff911acf8df48e0ab5714f42ecb..e05527a3a8ba26776ae52999f46768d648c33f3c 100644 (file)
@@ -124,7 +124,6 @@ static bool optc31_enable_crtc(struct timing_generator *optc)
 static bool optc31_disable_crtc(struct timing_generator *optc)
 {
        struct optc *optc1 = DCN10TG_FROM_TG(optc);
-
        /* disable otg request until end of the first line
         * in the vertical blank region
         */
@@ -138,6 +137,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc)
        REG_WAIT(OTG_CLOCK_CONTROL,
                        OTG_BUSY, 0,
                        1, 100000);
+       optc1_clear_optc_underflow(optc);
 
        return true;
 }
@@ -158,6 +158,9 @@ static bool optc31_immediate_disable_crtc(struct timing_generator *optc)
                        OTG_BUSY, 0,
                        1, 100000);
 
+       /* clear the false state */
+       optc1_clear_optc_underflow(optc);
+
        return true;
 }
 
index 89b7b6b7254ac8ea97d1bac7acce2fd14d44349f..63934ecf6be84207a3ba839d86564f7088604ec7 100644 (file)
@@ -2032,7 +2032,9 @@ bool dcn31_validate_bandwidth(struct dc *dc,
 
        BW_VAL_TRACE_COUNT();
 
+       DC_FP_START();
        out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
+       DC_FP_END();
 
        // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg
        if (pipe_cnt == 0)
@@ -2232,6 +2234,7 @@ static bool dcn31_resource_construct(
        dc->caps.extended_aux_timeout_support = true;
        dc->caps.dmcub_support = true;
        dc->caps.is_apu = true;
+       dc->caps.zstate_support = true;
 
        /* Color pipeline capabilities */
        dc->caps.color.dpp.dcn_arch = 1;
index 2f6122153bdb53f75f798e3f674519f3dfc0a018..f93af45aeab4b44a90196e45b19628cbe4b6d391 100644 (file)
@@ -722,8 +722,10 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 {
        int plane_count;
        int i;
+       unsigned int optimized_min_dst_y_next_start_us;
 
        plane_count = 0;
+       optimized_min_dst_y_next_start_us = 0;
        for (i = 0; i < dc->res_pool->pipe_count; i++) {
                if (context->res_ctx.pipe_ctx[i].plane_state)
                        plane_count++;
@@ -744,11 +746,22 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
                struct dc_link *link = context->streams[0]->sink->link;
                struct dc_stream_status *stream_status = &context->stream_status[0];
 
+               if (dc_extended_blank_supported(dc)) {
+                       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+                               if (context->res_ctx.pipe_ctx[i].stream == context->streams[0]
+                                       && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max
+                                       && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) {
+                                               optimized_min_dst_y_next_start_us =
+                                                       context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us;
+                                               break;
+                               }
+                       }
+               }
                /* zstate only supported on PWRSEQ0  and when there's <2 planes*/
                if (link->link_index != 0 || stream_status->plane_count > 1)
                        return DCN_ZSTATE_SUPPORT_DISALLOW;
 
-               if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
+               if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000)
                        return DCN_ZSTATE_SUPPORT_ALLOW;
                else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr)
                        return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
@@ -786,8 +799,6 @@ void dcn20_calculate_dlg_params(
                                                        != dm_dram_clock_change_unsupported;
        context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
 
-       context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
-
        context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
 
        if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
@@ -843,6 +854,7 @@ void dcn20_calculate_dlg_params(
                                &pipes[pipe_idx].pipe);
                pipe_idx++;
        }
+       context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
 }
 
 static void swizzle_to_dml_params(
index e0fecf127bd5a9baecbd753685f9ac1c02117df7..53d760e169e61f5763ff2e5fab4f053453675b85 100644 (file)
@@ -1055,6 +1055,7 @@ static void dml_rq_dlg_get_dlg_params(
 
        float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;  // From VBA
        float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;  // From VBA
+       int blank_lines;
 
        memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
        memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
@@ -1080,6 +1081,18 @@ static void dml_rq_dlg_get_dlg_params(
        dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
 
        disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
+       blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1);
+       if (blank_lines < 0)
+               blank_lines = 0;
+       if (blank_lines != 0) {
+               disp_dlg_regs->optimized_min_dst_y_next_start_us =
+                       ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz;
+               disp_dlg_regs->optimized_min_dst_y_next_start =
+                       (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2));
+       } else {
+               // use unoptimized value
+               disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
+       }
        ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
 
        dml_print("DML_DLG: %s: min_ttu_vblank (us)         = %3.2f\n", __func__, min_ttu_vblank);
index 59f0a61c33cf9018deb5d792bd76a5d6619420e4..2df660cd8801b34dbd7b33a385571f16230b8838 100644 (file)
@@ -446,6 +446,8 @@ struct _vcs_dpi_display_dlg_regs_st {
        unsigned int refcyc_h_blank_end;
        unsigned int dlg_vblank_end;
        unsigned int min_dst_y_next_start;
+       unsigned int optimized_min_dst_y_next_start;
+       unsigned int optimized_min_dst_y_next_start_us;
        unsigned int refcyc_per_htotal;
        unsigned int refcyc_x_after_scaler;
        unsigned int dst_y_after_scaler;
index efc2339f1fa00b4e901a247043e4dab793619ef2..4385d19bc489193f5f93575092d9fa9da7d95eca 100644 (file)
@@ -864,11 +864,11 @@ static bool setup_dsc_config(
                min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h);
        }
 
+       is_dsc_possible = (min_slices_h <= max_slices_h);
+
        if (pic_width % min_slices_h != 0)
                min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first?
 
-       is_dsc_possible = (min_slices_h <= max_slices_h);
-
        if (min_slices_h == 0 && max_slices_h == 0)
                is_dsc_possible = false;
 
index ab9939db8cea881040b1f4577c0a5730ea62e695..44f167d2584f522e10104f0a7e15de83b45efb8d 100644 (file)
@@ -33,6 +33,7 @@
 #define MAX_MTP_SLOT_COUNT 64
 #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
 #define TRAINING_AUX_RD_INTERVAL 100 //us
+#define LINK_AUX_WAKE_TIMEOUT_MS 1500 // Timeout when trying to wake unresponsive DPRX.
 
 struct dc_link;
 struct dc_stream_state;
index e45b7993c5c574861aff875ca890ca1d3e9982e9..ad69d78c4ac347bf0c5256f02e1cf357add6ff24 100644 (file)
@@ -195,6 +195,9 @@ struct hubp_funcs {
 
        void (*hubp_set_flip_int)(struct hubp *hubp);
 
+       void (*program_extended_blank)(struct hubp *hubp,
+                       unsigned int min_dst_y_next_start_optimized);
+
        void (*hubp_wait_pipe_read_start)(struct hubp *hubp);
 };
 
index b691aa45e84fbcec96c418d9df3fa7ec327eada5..79bc207415bcb0b0a259642a20429c923d374d4d 100644 (file)
@@ -100,7 +100,8 @@ enum vsc_packet_revision {
 //PB7 = MD0
 #define MASK_VTEM_MD0__VRR_EN         0x01
 #define MASK_VTEM_MD0__M_CONST        0x02
-#define MASK_VTEM_MD0__RESERVED2      0x0C
+#define MASK_VTEM_MD0__QMS_EN         0x04
+#define MASK_VTEM_MD0__RESERVED2      0x08
 #define MASK_VTEM_MD0__FVA_FACTOR_M1  0xF0
 
 //MD1
@@ -109,7 +110,7 @@ enum vsc_packet_revision {
 //MD2
 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98  0x03
 #define MASK_VTEM_MD2__RB                    0x04
-#define MASK_VTEM_MD2__RESERVED3             0xF8
+#define MASK_VTEM_MD2__NEXT_TFR              0xF8
 
 //MD3
 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07  0xFF
index 89fbee568be4ac514d5110536928ce379ca33c8f..72e7b5d40af6936b5a22ca1283b96a0660ce7629 100644 (file)
@@ -173,6 +173,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
 
        if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
                return false;
+       /* Don't use baco for reset in S3.
+        * This is a workaround for some platforms
+        * where entering BACO during suspend
+        * seems to cause reboots or hangs.
+        * This might be related to the fact that BACO controls
+        * power to the whole GPU including devices like audio and USB.
+        * Powering down/up everything may adversely affect these other
+        * devices.  Needs more investigation.
+        */
+       if (adev->in_s3)
+               return false;
 
        mutex_lock(&adev->pm.mutex);
 
@@ -416,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
 void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
 {
        const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+       int i;
 
        if (!adev->pm.dpm_enabled)
                return;
@@ -423,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
        if (!pp_funcs->pm_compute_clocks)
                return;
 
+       if (adev->mode_info.num_crtc)
+               amdgpu_display_bandwidth_update(adev);
+
+       for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+               struct amdgpu_ring *ring = adev->rings[i];
+               if (ring && ring->sched.ready)
+                       amdgpu_fence_wait_empty(ring);
+       }
+
        mutex_lock(&adev->pm.mutex);
        pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
        mutex_unlock(&adev->pm.mutex);
@@ -432,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 {
        int ret = 0;
 
+       if (adev->family == AMDGPU_FAMILY_SI) {
+               mutex_lock(&adev->pm.mutex);
+               if (enable) {
+                       adev->pm.dpm.uvd_active = true;
+                       adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
+               } else {
+                       adev->pm.dpm.uvd_active = false;
+               }
+               mutex_unlock(&adev->pm.mutex);
+
+               amdgpu_dpm_compute_clocks(adev);
+               return;
+       }
+
        ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
        if (ret)
                DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
@@ -442,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 {
        int ret = 0;
 
+       if (adev->family == AMDGPU_FAMILY_SI) {
+               mutex_lock(&adev->pm.mutex);
+               if (enable) {
+                       adev->pm.dpm.vce_active = true;
+                       /* XXX select vce level based on ring/task */
+                       adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
+               } else {
+                       adev->pm.dpm.vce_active = false;
+               }
+               mutex_unlock(&adev->pm.mutex);
+
+               amdgpu_dpm_compute_clocks(adev);
+               return;
+       }
+
        ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
        if (ret)
                DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
@@ -500,6 +550,9 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size)
        struct smu_context *smu = adev->powerplay.pp_handle;
        int ret = 0;
 
+       if (!is_support_sw_smu(adev))
+               return -EOPNOTSUPP;
+
        mutex_lock(&adev->pm.mutex);
        ret = smu_send_hbm_bad_pages_num(smu, size);
        mutex_unlock(&adev->pm.mutex);
@@ -512,6 +565,9 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si
        struct smu_context *smu = adev->powerplay.pp_handle;
        int ret = 0;
 
+       if (!is_support_sw_smu(adev))
+               return -EOPNOTSUPP;
+
        mutex_lock(&adev->pm.mutex);
        ret = smu_send_hbm_bad_channel_flag(smu, size);
        mutex_unlock(&adev->pm.mutex);
index 9613c6181c177795629acc1d4219ce18fbe70828..d3fe149d8476561796015a0a98f6f43a46f0423f 100644 (file)
@@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
 void amdgpu_legacy_dpm_compute_clocks(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       int i = 0;
-
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_bandwidth_update(adev);
-
-       for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-               struct amdgpu_ring *ring = adev->rings[i];
-               if (ring && ring->sched.ready)
-                       amdgpu_fence_wait_empty(ring);
-       }
 
        amdgpu_dpm_get_active_displays(adev);
 
index caae54487f9cb753337ec10ac2ebe9fdd66620af..633dab14f51c2710491154279e286c60dc43c890 100644 (file)
@@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
 }
 #endif
 
-static int si_set_powergating_by_smu(void *handle,
-                                    uint32_t block_type,
-                                    bool gate)
-{
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       switch (block_type) {
-       case AMD_IP_BLOCK_TYPE_UVD:
-               if (!gate) {
-                       adev->pm.dpm.uvd_active = true;
-                       adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
-               } else {
-                       adev->pm.dpm.uvd_active = false;
-               }
-
-               amdgpu_legacy_dpm_compute_clocks(handle);
-               break;
-       case AMD_IP_BLOCK_TYPE_VCE:
-               if (!gate) {
-                       adev->pm.dpm.vce_active = true;
-                       /* XXX select vce level based on ring/task */
-                       adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
-               } else {
-                       adev->pm.dpm.vce_active = false;
-               }
-
-               amdgpu_legacy_dpm_compute_clocks(handle);
-               break;
-       default:
-               break;
-       }
-       return 0;
-}
-
 static int si_set_sw_state(struct amdgpu_device *adev)
 {
        return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
@@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
        .print_power_state = &si_dpm_print_power_state,
        .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
        .force_performance_level = &si_dpm_force_performance_level,
-       .set_powergating_by_smu = &si_set_powergating_by_smu,
        .vblank_too_short = &si_dpm_vblank_too_short,
        .set_fan_control_mode = &si_dpm_set_fan_control_mode,
        .get_fan_control_mode = &si_dpm_get_fan_control_mode,
index a2da46bf3985d0b22ed9eeb9526ac7fb2042ba14..71e9c6ce6b1a78c1a9b8109d6f7265d72f680413 100644 (file)
@@ -1487,16 +1487,6 @@ static void pp_pm_compute_clocks(void *handle)
 {
        struct pp_hwmgr *hwmgr = handle;
        struct amdgpu_device *adev = hwmgr->adev;
-       int i = 0;
-
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_bandwidth_update(adev);
-
-       for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-               struct amdgpu_ring *ring = adev->rings[i];
-               if (ring && ring->sched.ready)
-                       amdgpu_fence_wait_empty(ring);
-       }
 
        if (!amdgpu_device_has_dc_support(adev)) {
                amdgpu_dpm_get_active_displays(adev);
index 9ddd8491ff00847d5c0becbcdec1be4fa3edcb50..ede71de2343dcfba4952c9f20ad62c5614b64e12 100644 (file)
@@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetHardMinFclkByFreq,
                                                hwmgr->display_config->num_display > 3 ?
-                                               data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk :
+                                               (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) :
                                                min_mclk,
                                                NULL);
 
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetHardMinSocclkByFreq,
-                                               data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk,
+                                               data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100,
                                                NULL);
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetHardMinVcn,
@@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
                                                NULL);
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetSoftMaxFclkByFreq,
-                                               data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk,
+                                               data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100,
                                                NULL);
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetSoftMaxSocclkByFreq,
-                                               data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk,
+                                               data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100,
                                                NULL);
                smum_send_msg_to_smc_with_parameter(hwmgr,
                                                PPSMC_MSG_SetSoftMaxVcn,
index f1544755d8b469539cd3c4a20b2e6f635f8da800..f10a0256413e6461b228cd8e454588daaa599ad6 100644 (file)
@@ -1351,14 +1351,8 @@ static int smu_disable_dpms(struct smu_context *smu)
 {
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;
-       /*
-        * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair
-        * the workaround which always reset the asic in suspend.
-        * It's likely that workaround will be dropped in the future.
-        * Then the change here should be dropped together.
-        */
        bool use_baco = !smu->is_apu &&
-               (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) &&
+               ((amdgpu_in_reset(adev) &&
                  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
                 ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev)));
 
index 7bfac029e51382e6c331e5591b1c1f7505f0e9cc..b81711c4ff3358c126742c05137e40269f49b8d9 100644 (file)
@@ -991,7 +991,7 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu,
                return -EINVAL;
        }
 
-       if (sclk_min && sclk_max) {
+       if (sclk_min && sclk_max && smu_v13_0_5_clk_dpm_is_enabled(smu, SMU_SCLK)) {
                ret = smu_v13_0_5_set_soft_freq_limited_range(smu,
                                                            SMU_SCLK,
                                                            sclk_min,
index 007e5a282f67dce3576babf69f95018bea4ba3be..2145b08f953463ba5bcbbb275c676bbba0fa3d36 100644 (file)
@@ -78,6 +78,7 @@ config DRM_ITE_IT6505
         tristate "ITE IT6505 DisplayPort bridge"
         depends on OF
         select DRM_KMS_HELPER
+        select DRM_DP_HELPER
         select EXTCON
         help
           ITE IT6505 DisplayPort bridge chip driver.
index 11300b53d24fcada3535881926b1eb886435c537..7a7cc44686f9749584d1e0e34c7540d0c210d355 100644 (file)
@@ -4852,6 +4852,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr,
 
        mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port);
        drm_edid_get_monitor_name(mst_edid, name, namelen);
+       kfree(mst_edid);
 }
 
 /**
index 026e4e29a0f374436a839303c019eef0541fb45a..9a2cfab3a177fe17618cd765a04c480e516a938e 100644 (file)
@@ -244,21 +244,6 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
        if (panel)
                *panel = NULL;
 
-       /**
-        * Devices can also be child nodes when we also control that device
-        * through the upstream device (ie, MIPI-DCS for a MIPI-DSI device).
-        *
-        * Lookup for a child node of the given parent that isn't either port
-        * or ports.
-        */
-       for_each_available_child_of_node(np, remote) {
-               if (of_node_name_eq(remote, "port") ||
-                   of_node_name_eq(remote, "ports"))
-                       continue;
-
-               goto of_find_panel_or_bridge;
-       }
-
        /*
         * of_graph_get_remote_node() produces a noisy error message if port
         * node isn't found and the absence of the port is a legit case here,
@@ -269,8 +254,6 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
                return -ENODEV;
 
        remote = of_graph_get_remote_node(np, port, endpoint);
-
-of_find_panel_or_bridge:
        if (!remote)
                return -ENODEV;
 
index 7616a3906b9ec440b0bf648dc10a79ebf2da246d..1b774dcfb28194c9516dae35f10c2a4b3582a72f 100644 (file)
@@ -367,6 +367,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc,
        }
 }
 
+static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc,
+                                      const u32 *mmioaddr, u32 mmio_count,
+                                      int header_ver, u8 dmc_id)
+{
+       struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc);
+       u32 start_range, end_range;
+       int i;
+
+       if (dmc_id >= DMC_FW_MAX) {
+               drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id);
+               return false;
+       }
+
+       if (header_ver == 1) {
+               start_range = DMC_MMIO_START_RANGE;
+               end_range = DMC_MMIO_END_RANGE;
+       } else if (dmc_id == DMC_FW_MAIN) {
+               start_range = TGL_MAIN_MMIO_START;
+               end_range = TGL_MAIN_MMIO_END;
+       } else if (DISPLAY_VER(i915) >= 13) {
+               start_range = ADLP_PIPE_MMIO_START;
+               end_range = ADLP_PIPE_MMIO_END;
+       } else if (DISPLAY_VER(i915) >= 12) {
+               start_range = TGL_PIPE_MMIO_START(dmc_id);
+               end_range = TGL_PIPE_MMIO_END(dmc_id);
+       } else {
+               drm_warn(&i915->drm, "Unknown mmio range for sanity check");
+               return false;
+       }
+
+       for (i = 0; i < mmio_count; i++) {
+               if (mmioaddr[i] < start_range || mmioaddr[i] > end_range)
+                       return false;
+       }
+
+       return true;
+}
+
 static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
                               const struct intel_dmc_header_base *dmc_header,
                               size_t rem_size, u8 dmc_id)
@@ -436,6 +474,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
                return 0;
        }
 
+       if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count,
+                                       dmc_header->header_ver, dmc_id)) {
+               drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n");
+               return 0;
+       }
+
        for (i = 0; i < mmio_count; i++) {
                dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
                dmc_info->mmiodata[i] = mmiodata[i];
index d667657e360624eeb80543cab361361891ae11eb..f868db8be02a6813d9b18d5fc39067afcff72072 100644 (file)
@@ -4383,13 +4383,20 @@ intel_dp_update_420(struct intel_dp *intel_dp)
 static void
 intel_dp_set_edid(struct intel_dp *intel_dp)
 {
+       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
        struct intel_connector *connector = intel_dp->attached_connector;
        struct edid *edid;
+       bool vrr_capable;
 
        intel_dp_unset_edid(intel_dp);
        edid = intel_dp_get_edid(intel_dp);
        connector->detect_edid = edid;
 
+       vrr_capable = intel_vrr_is_capable(&connector->base);
+       drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] VRR capable: %s\n",
+                   connector->base.base.id, connector->base.name, str_yes_no(vrr_capable));
+       drm_connector_set_vrr_capable_property(&connector->base, vrr_capable);
+
        intel_dp_update_dfp(intel_dp, edid);
        intel_dp_update_420(intel_dp);
 
@@ -4422,6 +4429,9 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
 
        intel_dp->dfp.ycbcr_444_to_420 = false;
        connector->base.ycbcr_420_allowed = false;
+
+       drm_connector_set_vrr_capable_property(&connector->base,
+                                              false);
 }
 
 static int
@@ -4572,14 +4582,9 @@ static int intel_dp_get_modes(struct drm_connector *connector)
        int num_modes = 0;
 
        edid = intel_connector->detect_edid;
-       if (edid) {
+       if (edid)
                num_modes = intel_connector_update_modes(connector, edid);
 
-               if (intel_vrr_is_capable(connector))
-                       drm_connector_set_vrr_capable_property(connector,
-                                                              true);
-       }
-
        /* Also add fixed mode, which may or may not be present in EDID */
        if (intel_dp_is_edp(intel_attached_dp(intel_connector)) &&
            intel_connector->panel.fixed_mode) {
index 97cf3cac0105872f880f995427987ed309e38299..fb6cf30ee6281dd15f66c403882b3e38013bf821 100644 (file)
 
 #define INTEL_EDP_BRIGHTNESS_OPTIMIZATION_1                            0x359
 
+enum intel_dp_aux_backlight_modparam {
+       INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
+       INTEL_DP_AUX_BACKLIGHT_OFF = 0,
+       INTEL_DP_AUX_BACKLIGHT_ON = 1,
+       INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
+       INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
+};
+
 /* Intel EDP backlight callbacks */
 static bool
 intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
@@ -126,6 +134,24 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
                return false;
        }
 
+       /*
+        * If we don't have HDR static metadata there is no way to
+        * runtime detect used range for nits based control. For now
+        * do not use Intel proprietary eDP backlight control if we
+        * don't have this data in panel EDID. In case we find panel
+        * which supports only nits based control, but doesn't provide
+        * HDR static metadata we need to start maintaining table of
+        * ranges for such panels.
+        */
+       if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL &&
+           !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type &
+             BIT(HDMI_STATIC_METADATA_TYPE1))) {
+               drm_info(&i915->drm,
+                        "Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n",
+                        INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL);
+               return false;
+       }
+
        panel->backlight.edp.intel.sdr_uses_aux =
                tcon_cap[2] & INTEL_EDP_SDR_TCON_BRIGHTNESS_AUX_CAP;
 
@@ -413,14 +439,6 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = {
        .get = intel_dp_aux_vesa_get_backlight,
 };
 
-enum intel_dp_aux_backlight_modparam {
-       INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
-       INTEL_DP_AUX_BACKLIGHT_OFF = 0,
-       INTEL_DP_AUX_BACKLIGHT_ON = 1,
-       INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
-       INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
-};
-
 int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
 {
        struct drm_device *dev = connector->base.dev;
index 87f4af3fd523ec0d1a9a2191aeec6bbd422c43db..3e61a89362453727c9c1dc95f43c059ec85354c8 100644 (file)
@@ -1037,7 +1037,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
        struct intel_plane_state *plane_state =
                intel_atomic_get_new_plane_state(state, plane);
        const struct drm_framebuffer *fb = plane_state->hw.fb;
-       struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc);
+       struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc);
        const struct intel_crtc_state *crtc_state;
        struct intel_fbc *fbc = plane->fbc;
 
index bff8c2d73cdfb858c467a116d05f90b3a6f16143..6c9e6e7f0afd05f8db056eb1e70ff8306f85987b 100644 (file)
@@ -887,6 +887,20 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
                return false;
        }
 
+       /* Wa_16011303918:adl-p */
+       if (crtc_state->vrr.enable &&
+           IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
+               drm_dbg_kms(&dev_priv->drm,
+                           "PSR2 not enabled, not compatible with HW stepping + VRR\n");
+               return false;
+       }
+
+       if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
+               drm_dbg_kms(&dev_priv->drm,
+                           "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
+               return false;
+       }
+
        if (HAS_PSR2_SEL_FETCH(dev_priv)) {
                if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) &&
                    !HAS_PSR_HW_TRACKING(dev_priv)) {
@@ -900,12 +914,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
        if (!crtc_state->enable_psr2_sel_fetch &&
            IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) {
                drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n");
-               return false;
+               goto unsupported;
        }
 
        if (!psr2_granularity_check(intel_dp, crtc_state)) {
                drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n");
-               return false;
+               goto unsupported;
        }
 
        if (!crtc_state->enable_psr2_sel_fetch &&
@@ -914,25 +928,15 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
                            "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n",
                            crtc_hdisplay, crtc_vdisplay,
                            psr_max_h, psr_max_v);
-               return false;
-       }
-
-       if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
-               drm_dbg_kms(&dev_priv->drm,
-                           "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
-               return false;
-       }
-
-       /* Wa_16011303918:adl-p */
-       if (crtc_state->vrr.enable &&
-           IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
-               drm_dbg_kms(&dev_priv->drm,
-                           "PSR2 not enabled, not compatible with HW stepping + VRR\n");
-               return false;
+               goto unsupported;
        }
 
        tgl_dc3co_exitline_compute_config(intel_dp, crtc_state);
        return true;
+
+unsupported:
+       crtc_state->enable_psr2_sel_fetch = false;
+       return false;
 }
 
 void intel_psr_compute_config(struct intel_dp *intel_dp,
index d42f437149c953577390f6efff9a60bdb2c62926..6ca8929cf6e128634e38fe5e136cfae98810823e 100644 (file)
@@ -1252,14 +1252,12 @@ static void *reloc_iomap(struct i915_vma *batch,
                 * Only attempt to pin the batch buffer to ggtt if the current batch
                 * is not inside ggtt, or the batch buffer is not misplaced.
                 */
-               if (!i915_is_ggtt(batch->vm)) {
+               if (!i915_is_ggtt(batch->vm) ||
+                   !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
                        vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
                                                          PIN_MAPPABLE |
                                                          PIN_NONBLOCK /* NOWARN */ |
                                                          PIN_NOEVICT);
-               } else if (i915_vma_is_map_and_fenceable(batch)) {
-                       __i915_vma_pin(batch);
-                       vma = batch;
                }
 
                if (vma == ERR_PTR(-EDEADLK))
index c3ea243d414da3dd5757ead748145fd6367b7d19..0c5c43852e24dcbb69548481e1317da7460caff1 100644 (file)
@@ -70,7 +70,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
         * mmap ioctl is disallowed for all discrete platforms,
         * and for all platforms with GRAPHICS_VER > 12.
         */
-       if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12)
+       if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
                return -EOPNOTSUPP;
 
        if (args->flags & ~(I915_MMAP_WC))
index 82713264b96c11bec1031f656f5315b677cc746d..b7c6d4462ec5555deed48e0b11f7f0516e1b8b2d 100644 (file)
@@ -806,7 +806,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
                __intel_engine_reset(engine, stalled_mask & engine->mask);
        local_bh_enable();
 
-       intel_uc_reset(&gt->uc, true);
+       intel_uc_reset(&gt->uc, ALL_ENGINES);
 
        intel_ggtt_restore_fences(gt->ggtt);
 
index bf7079480d472dd9a041b893311c0a825ef46ae3..2488d1197f3e5c5236679d072025cabb955768cb 100644 (file)
@@ -438,7 +438,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc);
 void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc);
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled);
 void intel_guc_submission_reset_finish(struct intel_guc *guc);
 void intel_guc_submission_cancel_requests(struct intel_guc *guc);
 
index 1ce7e04aa837bab4c1fe894bbfb0613e97883123..28f9aac0201ddfa69b61e741b312c09b5c4340a9 100644 (file)
@@ -1590,9 +1590,9 @@ __unwind_incomplete_requests(struct intel_context *ce)
        spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
-static void __guc_reset_context(struct intel_context *ce, bool stalled)
+static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
 {
-       bool local_stalled;
+       bool guilty;
        struct i915_request *rq;
        unsigned long flags;
        u32 head;
@@ -1620,7 +1620,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                if (!intel_context_is_pinned(ce))
                        goto next_context;
 
-               local_stalled = false;
+               guilty = false;
                rq = intel_context_find_active_request(ce);
                if (!rq) {
                        head = ce->ring->tail;
@@ -1628,14 +1628,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
                }
 
                if (i915_request_started(rq))
-                       local_stalled = true;
+                       guilty = stalled & ce->engine->mask;
 
                GEM_BUG_ON(i915_active_is_idle(&ce->active));
                head = intel_ring_wrap(ce->ring, rq->head);
 
-               __i915_request_reset(rq, local_stalled && stalled);
+               __i915_request_reset(rq, guilty);
 out_replay:
-               guc_reset_state(ce, head, local_stalled && stalled);
+               guc_reset_state(ce, head, guilty);
 next_context:
                if (i != number_children)
                        ce = list_next_entry(ce, parallel.child_link);
@@ -1645,7 +1645,7 @@ next_context:
        intel_context_put(parent);
 }
 
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
 {
        struct intel_context *ce;
        unsigned long index;
@@ -4013,7 +4013,7 @@ static void guc_context_replay(struct intel_context *ce)
 {
        struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
 
-       __guc_reset_context(ce, true);
+       __guc_reset_context(ce, ce->engine->mask);
        tasklet_hi_schedule(&sched_engine->tasklet);
 }
 
index da199aa6989fba35fcc129f48f8d86d5dd886ed7..8eb34de2f20c03612d030d4684a79faa1a223df5 100644 (file)
@@ -593,7 +593,7 @@ sanitize:
        __uc_sanitize(uc);
 }
 
-void intel_uc_reset(struct intel_uc *uc, bool stalled)
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
 {
        struct intel_guc *guc = &uc->guc;
 
index 866b462821c005314b771ce48d3c736cb10c6d3d..a8f38c2c60e23f2d5bb298aaead52a6214298032 100644 (file)
@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_driver_remove(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
-void intel_uc_reset(struct intel_uc *uc, bool stalled);
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
 void intel_uc_reset_finish(struct intel_uc *uc);
 void intel_uc_cancel_requests(struct intel_uc *uc);
 void intel_uc_suspend(struct intel_uc *uc);
index 3c87d77d2cf63156fd26ba013334b49a5347a7cf..fe960c2043621dfc0756e2baa45537afc9718c25 100644 (file)
 #define _DSPAADDR                              0x70184
 #define _DSPASTRIDE                            0x70188
 #define _DSPAPOS                               0x7018C /* reserved */
-#define   DISP_POS_Y_MASK              REG_GENMASK(31, 0)
+#define   DISP_POS_Y_MASK              REG_GENMASK(31, 16)
 #define   DISP_POS_Y(y)                        REG_FIELD_PREP(DISP_POS_Y_MASK, (y))
 #define   DISP_POS_X_MASK              REG_GENMASK(15, 0)
 #define   DISP_POS_X(x)                        REG_FIELD_PREP(DISP_POS_X_MASK, (x))
 #define _DSPASIZE                              0x70190
-#define   DISP_HEIGHT_MASK             REG_GENMASK(31, 0)
+#define   DISP_HEIGHT_MASK             REG_GENMASK(31, 16)
 #define   DISP_HEIGHT(h)               REG_FIELD_PREP(DISP_HEIGHT_MASK, (h))
 #define   DISP_WIDTH_MASK              REG_GENMASK(15, 0)
 #define   DISP_WIDTH(w)                        REG_FIELD_PREP(DISP_WIDTH_MASK, (w))
 #define _SEL_FETCH_PLANE_BASE_6_A              0x70940
 #define _SEL_FETCH_PLANE_BASE_7_A              0x70960
 #define _SEL_FETCH_PLANE_BASE_CUR_A            0x70880
-#define _SEL_FETCH_PLANE_BASE_1_B              0x70990
+#define _SEL_FETCH_PLANE_BASE_1_B              0x71890
 
 #define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \
                                             _SEL_FETCH_PLANE_BASE_1_A, \
 /* MMIO address range for DMC program (0x80000 - 0x82FFF) */
 #define DMC_MMIO_START_RANGE   0x80000
 #define DMC_MMIO_END_RANGE     0x8FFFF
+#define DMC_V1_MMIO_START_RANGE        0x80000
+#define TGL_MAIN_MMIO_START    0x8F000
+#define TGL_MAIN_MMIO_END      0x8FFFF
+#define _TGL_PIPEA_MMIO_START  0x92000
+#define _TGL_PIPEA_MMIO_END    0x93FFF
+#define _TGL_PIPEB_MMIO_START  0x96000
+#define _TGL_PIPEB_MMIO_END    0x97FFF
+#define ADLP_PIPE_MMIO_START   0x5F000
+#define ADLP_PIPE_MMIO_END     0x5FFFF
+
+#define TGL_PIPE_MMIO_START(dmc_id)    _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\
+                                               _TGL_PIPEB_MMIO_START)
+
+#define TGL_PIPE_MMIO_END(dmc_id)      _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\
+                                               _TGL_PIPEB_MMIO_END)
+
 #define SKL_DMC_DC3_DC5_COUNT  _MMIO(0x80030)
 #define SKL_DMC_DC5_DC6_COUNT  _MMIO(0x8002C)
 #define BXT_DMC_DC3_DC5_COUNT  _MMIO(0x80038)
index 94fcdb7bd21d3c8e39642132768b6ded8b9e5a9e..eeaa8d0d0407572e8a0b0e7ee5ae99a0e5a1df8d 100644 (file)
@@ -1605,17 +1605,17 @@ void i915_vma_close(struct i915_vma *vma)
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-       struct intel_gt *gt = vma->vm->gt;
-
-       spin_lock_irq(&gt->closed_lock);
        list_del_init(&vma->closed_link);
-       spin_unlock_irq(&gt->closed_lock);
 }
 
 void i915_vma_reopen(struct i915_vma *vma)
 {
+       struct intel_gt *gt = vma->vm->gt;
+
+       spin_lock_irq(&gt->closed_lock);
        if (i915_vma_is_closed(vma))
                __i915_vma_remove_closed(vma);
+       spin_unlock_irq(&gt->closed_lock);
 }
 
 void i915_vma_release(struct kref *ref)
@@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma)
 static void release_references(struct i915_vma *vma)
 {
        struct drm_i915_gem_object *obj = vma->obj;
+       struct intel_gt *gt = vma->vm->gt;
 
        GEM_BUG_ON(i915_vma_is_active(vma));
 
@@ -1650,7 +1651,9 @@ static void release_references(struct i915_vma *vma)
                rb_erase(&vma->obj_node, &obj->vma.tree);
        spin_unlock(&obj->vma.lock);
 
+       spin_lock_irq(&gt->closed_lock);
        __i915_vma_remove_closed(vma);
+       spin_unlock_irq(&gt->closed_lock);
 
        __i915_vma_put(vma);
 }
index 87428fb23d9ffa5e53d8b20a76c3d71402265a77..a2277a0d6d06fb55f129531b1e7dd660e269113f 100644 (file)
@@ -222,6 +222,7 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        const struct of_device_id *match = of_match_node(dw_hdmi_imx_dt_ids, np);
        struct imx_hdmi *hdmi;
+       int ret;
 
        hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
        if (!hdmi)
@@ -243,10 +244,15 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev)
        hdmi->bridge = of_drm_find_bridge(np);
        if (!hdmi->bridge) {
                dev_err(hdmi->dev, "Unable to find bridge\n");
+               dw_hdmi_remove(hdmi->hdmi);
                return -ENODEV;
        }
 
-       return component_add(&pdev->dev, &dw_hdmi_imx_ops);
+       ret = component_add(&pdev->dev, &dw_hdmi_imx_ops);
+       if (ret)
+               dw_hdmi_remove(hdmi->hdmi);
+
+       return ret;
 }
 
 static int dw_hdmi_imx_remove(struct platform_device *pdev)
index e5078d03020d90e84b2ca6acbc877ae52234d4ac..fb0e951248f685ed0ace2f601c1fba6478db342f 100644 (file)
@@ -572,6 +572,8 @@ static int imx_ldb_panel_ddc(struct device *dev,
                edidp = of_get_property(child, "edid", &edid_len);
                if (edidp) {
                        channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL);
+                       if (!channel->edid)
+                               return -ENOMEM;
                } else if (!channel->panel) {
                        /* fallback to display-timings node */
                        ret = of_get_drm_display_mode(child,
index 06cb1a59b9bcd6fd38f573abfdbca904f9a69cc2..63ba2ad84679183af60a635ea3f014f2f47d0bba 100644 (file)
@@ -75,8 +75,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
                ret = of_get_drm_display_mode(np, &imxpd->mode,
                                              &imxpd->bus_flags,
                                              OF_USE_NATIVE_MODE);
-               if (ret)
+               if (ret) {
+                       drm_mode_destroy(connector->dev, mode);
                        return ret;
+               }
 
                drm_mode_copy(mode, &imxpd->mode);
                mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
index 83c31b2ad865b19c62caab55a89130739e81f5b3..ccc4fcf7a630f49a62201927f871160803debc53 100644 (file)
@@ -1742,7 +1742,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu)
                return ERR_CAST(mmu);
 
        return msm_gem_address_space_create(mmu,
-               "gpu", 0x100000000ULL, 0x1ffffffffULL);
+               "gpu", 0x100000000ULL, SZ_4G);
 }
 
 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
index 89cfd84760d7e1887703f0f02c43d5621637ec63..8706bcdd1472e6c8e0371238e1efb53aedc04cd0 100644 (file)
@@ -599,43 +599,91 @@ static const struct of_device_id dt_match[] = {
        {}
 };
 
-#ifdef CONFIG_PM
-static int adreno_resume(struct device *dev)
+static int adreno_runtime_resume(struct device *dev)
 {
        struct msm_gpu *gpu = dev_to_gpu(dev);
 
        return gpu->funcs->pm_resume(gpu);
 }
 
-static int active_submits(struct msm_gpu *gpu)
+static int adreno_runtime_suspend(struct device *dev)
 {
-       int active_submits;
-       mutex_lock(&gpu->active_lock);
-       active_submits = gpu->active_submits;
-       mutex_unlock(&gpu->active_lock);
-       return active_submits;
+       struct msm_gpu *gpu = dev_to_gpu(dev);
+
+       /*
+        * We should be holding a runpm ref, which will prevent
+        * runtime suspend.  In the system suspend path, we've
+        * already waited for active jobs to complete.
+        */
+       WARN_ON_ONCE(gpu->active_submits);
+
+       return gpu->funcs->pm_suspend(gpu);
+}
+
+static void suspend_scheduler(struct msm_gpu *gpu)
+{
+       int i;
+
+       /*
+        * Shut down the scheduler before we force suspend, so that
+        * suspend isn't racing with scheduler kthread feeding us
+        * more work.
+        *
+        * Note, we just want to park the thread, and let any jobs
+        * that are already on the hw queue complete normally, as
+        * opposed to the drm_sched_stop() path used for handling
+        * faulting/timed-out jobs.  We can't really cancel any jobs
+        * already on the hw queue without racing with the GPU.
+        */
+       for (i = 0; i < gpu->nr_rings; i++) {
+               struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+               kthread_park(sched->thread);
+       }
 }
 
-static int adreno_suspend(struct device *dev)
+static void resume_scheduler(struct msm_gpu *gpu)
+{
+       int i;
+
+       for (i = 0; i < gpu->nr_rings; i++) {
+               struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+               kthread_unpark(sched->thread);
+       }
+}
+
+static int adreno_system_suspend(struct device *dev)
 {
        struct msm_gpu *gpu = dev_to_gpu(dev);
-       int remaining;
+       int remaining, ret;
+
+       suspend_scheduler(gpu);
 
        remaining = wait_event_timeout(gpu->retire_event,
-                                      active_submits(gpu) == 0,
+                                      gpu->active_submits == 0,
                                       msecs_to_jiffies(1000));
        if (remaining == 0) {
                dev_err(dev, "Timeout waiting for GPU to suspend\n");
-               return -EBUSY;
+               ret = -EBUSY;
+               goto out;
        }
 
-       return gpu->funcs->pm_suspend(gpu);
+       ret = pm_runtime_force_suspend(dev);
+out:
+       if (ret)
+               resume_scheduler(gpu);
+
+       return ret;
+}
+
+static int adreno_system_resume(struct device *dev)
+{
+       resume_scheduler(dev_to_gpu(dev));
+       return pm_runtime_force_resume(dev);
 }
-#endif
 
 static const struct dev_pm_ops adreno_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
-       SET_RUNTIME_PM_OPS(adreno_suspend, adreno_resume, NULL)
+       SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume)
+       RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL)
 };
 
 static struct platform_driver adreno_driver = {
index c515b7cf922c8515753174429bef61c228dc872a..c61b5b283f08d8ad4da80608cb32c50ffb9c49ab 100644 (file)
@@ -54,87 +54,87 @@ struct dpu_intr_reg {
  * When making changes be sure to sync with dpu_hw_intr_reg
  */
 static const struct dpu_intr_reg dpu_intr_set[] = {
-       {
+       [MDP_SSPP_TOP0_INTR] = {
                MDP_SSPP_TOP0_OFF+INTR_CLEAR,
                MDP_SSPP_TOP0_OFF+INTR_EN,
                MDP_SSPP_TOP0_OFF+INTR_STATUS
        },
-       {
+       [MDP_SSPP_TOP0_INTR2] = {
                MDP_SSPP_TOP0_OFF+INTR2_CLEAR,
                MDP_SSPP_TOP0_OFF+INTR2_EN,
                MDP_SSPP_TOP0_OFF+INTR2_STATUS
        },
-       {
+       [MDP_SSPP_TOP0_HIST_INTR] = {
                MDP_SSPP_TOP0_OFF+HIST_INTR_CLEAR,
                MDP_SSPP_TOP0_OFF+HIST_INTR_EN,
                MDP_SSPP_TOP0_OFF+HIST_INTR_STATUS
        },
-       {
+       [MDP_INTF0_INTR] = {
                MDP_INTF_0_OFF+INTF_INTR_CLEAR,
                MDP_INTF_0_OFF+INTF_INTR_EN,
                MDP_INTF_0_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF1_INTR] = {
                MDP_INTF_1_OFF+INTF_INTR_CLEAR,
                MDP_INTF_1_OFF+INTF_INTR_EN,
                MDP_INTF_1_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF2_INTR] = {
                MDP_INTF_2_OFF+INTF_INTR_CLEAR,
                MDP_INTF_2_OFF+INTF_INTR_EN,
                MDP_INTF_2_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF3_INTR] = {
                MDP_INTF_3_OFF+INTF_INTR_CLEAR,
                MDP_INTF_3_OFF+INTF_INTR_EN,
                MDP_INTF_3_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF4_INTR] = {
                MDP_INTF_4_OFF+INTF_INTR_CLEAR,
                MDP_INTF_4_OFF+INTF_INTR_EN,
                MDP_INTF_4_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF5_INTR] = {
                MDP_INTF_5_OFF+INTF_INTR_CLEAR,
                MDP_INTF_5_OFF+INTF_INTR_EN,
                MDP_INTF_5_OFF+INTF_INTR_STATUS
        },
-       {
+       [MDP_AD4_0_INTR] = {
                MDP_AD4_0_OFF + MDP_AD4_INTR_CLEAR_OFF,
                MDP_AD4_0_OFF + MDP_AD4_INTR_EN_OFF,
                MDP_AD4_0_OFF + MDP_AD4_INTR_STATUS_OFF,
        },
-       {
+       [MDP_AD4_1_INTR] = {
                MDP_AD4_1_OFF + MDP_AD4_INTR_CLEAR_OFF,
                MDP_AD4_1_OFF + MDP_AD4_INTR_EN_OFF,
                MDP_AD4_1_OFF + MDP_AD4_INTR_STATUS_OFF,
        },
-       {
+       [MDP_INTF0_7xxx_INTR] = {
                MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF1_7xxx_INTR] = {
                MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF2_7xxx_INTR] = {
                MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF3_7xxx_INTR] = {
                MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF4_7xxx_INTR] = {
                MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_STATUS
        },
-       {
+       [MDP_INTF5_7xxx_INTR] = {
                MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_CLEAR,
                MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_EN,
                MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_STATUS
index 1ee824600995805c96fc777f6d793440d75eef90..c478d25f7825a8a9d48293086bdbcc90731a8200 100644 (file)
@@ -98,7 +98,10 @@ static void mdp5_plane_reset(struct drm_plane *plane)
                __drm_atomic_helper_plane_destroy_state(plane->state);
 
        kfree(to_mdp5_plane_state(plane->state));
+       plane->state = NULL;
        mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL);
+       if (!mdp5_state)
+               return;
        __drm_atomic_helper_plane_reset(plane, &mdp5_state->base);
 }
 
index 5d2ff679105869a5d626dc13a82a2caad5e91294..acfe1b31e0792eabc697d41e4d1ee9e2dfe54b7f 100644 (file)
@@ -176,6 +176,8 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len,
        va_list va;
 
        new_blk = kzalloc(sizeof(struct msm_disp_state_block), GFP_KERNEL);
+       if (!new_blk)
+               return;
 
        va_start(va, fmt);
 
index f1418722c54928a4b8896501f60f487cceea610f..26f4b6959c31df25b94cb4b6703584e1bd159ec6 100644 (file)
@@ -206,17 +206,6 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
                        rc = -ETIMEDOUT;
                        goto end;
                }
-
-               /* fail safe edid */
-               mutex_lock(&connector->dev->mode_config.mutex);
-               if (drm_add_modes_noedid(connector, 640, 480))
-                       drm_set_preferred_mode(connector, 640, 480);
-               mutex_unlock(&connector->dev->mode_config.mutex);
-       } else {
-               /* always add fail-safe mode as backup mode */
-               mutex_lock(&connector->dev->mode_config.mutex);
-               drm_add_modes_noedid(connector, 640, 480);
-               mutex_unlock(&connector->dev->mode_config.mutex);
        }
 
        if (panel->aux_cfg_update_done) {
index 0c1b7dde377c949e2e4fc685716b97e7df8d49d8..9f6af0f0fe0053a049fe335ef5a32e02c716b7d3 100644 (file)
@@ -638,7 +638,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
        return connector;
 
 fail:
-       connector->funcs->destroy(msm_dsi->connector);
+       connector->funcs->destroy(connector);
        return ERR_PTR(ret);
 }
 
index 02b9ae65a96a8a56c17632db9942a20b4ed5916b..a4f61972667b52140ad98f767b97aa55484b4eca 100644 (file)
@@ -926,6 +926,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
                                        get_pid_task(aspace->pid, PIDTYPE_PID);
                                if (task) {
                                        comm = kstrdup(task->comm, GFP_KERNEL);
+                                       put_task_struct(task);
                                } else {
                                        comm = NULL;
                                }
index daf9f87477ba13cf3e6c1c3bdd24ff027a42a115..a2141d3d9b1d2bbc3048f6e397a358a915b1058d 100644 (file)
@@ -46,8 +46,9 @@ static bool
 nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
                           struct nouveau_backlight *bl)
 {
-       const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
-       if (nb < 0 || nb >= 100)
+       const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL);
+
+       if (nb < 0)
                return false;
        if (nb > 0)
                snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
@@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector)
                                            nv_encoder, ops, &props);
        if (IS_ERR(bl->dev)) {
                if (bl->id >= 0)
-                       ida_simple_remove(&bl_ida, bl->id);
+                       ida_free(&bl_ida, bl->id);
                ret = PTR_ERR(bl->dev);
                goto fail_alloc;
        }
@@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector)
                return;
 
        if (bl->id >= 0)
-               ida_simple_remove(&bl_ida, bl->id);
+               ida_free(&bl_ida, bl->id);
 
        backlight_device_unregister(bl->dev);
        nv_conn->backlight = NULL;
index 992cc285f2fecfb7c126fdc59e107e4cb415a904..2ed528c065fae6ba09d801a1401aa72fd0fa505d 100644 (file)
@@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 
        mutex_init(&tdev->iommu.mutex);
 
-       if (iommu_present(&platform_bus_type)) {
+       if (device_iommu_mapped(dev)) {
                tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
                if (!tdev->iommu.domain)
                        goto error;
index e1772211b0a4b1567456e0eea30f5db88155f739..612310d5d4812eb056d6a9097acb64beb09c7cdc 100644 (file)
@@ -216,6 +216,7 @@ gm20b_pmu = {
        .intr = gt215_pmu_intr,
        .recv = gm20b_pmu_recv,
        .initmsg = gm20b_pmu_initmsg,
+       .reset = gf100_pmu_reset,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
index 6bf7fc1bd1e3b1deb1808286b748eaef39b301f5..1a6f9c3af5ecde69df97d5892d3cd9dc926ae857 100644 (file)
@@ -23,7 +23,7 @@
  */
 #include "priv.h"
 
-static void
+void
 gp102_pmu_reset(struct nvkm_pmu *pmu)
 {
        struct nvkm_device *device = pmu->subdev.device;
index ba1583bb618b2ef8379dcf5cfa99b0ccfdbd264e..94cfb1791af6ea50665b5a1a4a84b406538b096e 100644 (file)
@@ -83,6 +83,7 @@ gp10b_pmu = {
        .intr = gt215_pmu_intr,
        .recv = gm20b_pmu_recv,
        .initmsg = gm20b_pmu_initmsg,
+       .reset = gp102_pmu_reset,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
index bcaade758ff728bd528fafb262fb608f0b08a09a..21abf31f44420247efd4ffb0c95ab3a988cab7f5 100644 (file)
@@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
 
 bool gf100_pmu_enabled(struct nvkm_pmu *);
 void gf100_pmu_reset(struct nvkm_pmu *);
+void gp102_pmu_reset(struct nvkm_pmu *pmu);
 
 void gk110_pmu_pgob(struct nvkm_pmu *, bool);
 
index a07ef26234e57ccf71b5bc000b15772cc1601d15..6826f4d4826a4b12520c813da4540f47f2296efd 100644 (file)
@@ -612,8 +612,10 @@ static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc,
        int ret;
 
        vcc = devm_regulator_get_optional(dev, "vcc");
-       if (IS_ERR(vcc))
+       if (IS_ERR(vcc)) {
                dev_err(dev, "get optional vcc failed\n");
+               vcc = NULL;
+       }
 
        dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver,
                                    struct mipi_dbi_dev, drm);
index 46029c5610c80814682a7c7fc32d1e2bf08ff42b..145047e193946a5e4f852802d87c2903029edd20 100644 (file)
@@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts,
 
        ret = i2c_smbus_write_byte_data(ts->i2c, reg, val);
        if (ret)
-               dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret);
+               dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret);
 }
 
 static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
@@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel)
        return 0;
 }
 
-static int rpi_touchscreen_enable(struct drm_panel *panel)
+static int rpi_touchscreen_prepare(struct drm_panel *panel)
 {
        struct rpi_touchscreen *ts = panel_to_ts(panel);
        int i;
@@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel)
        rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01);
        msleep(100);
 
+       return 0;
+}
+
+static int rpi_touchscreen_enable(struct drm_panel *panel)
+{
+       struct rpi_touchscreen *ts = panel_to_ts(panel);
+
        /* Turn on the backlight. */
        rpi_touchscreen_i2c_write(ts, REG_PWM, 255);
 
@@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel,
 static const struct drm_panel_funcs rpi_touchscreen_funcs = {
        .disable = rpi_touchscreen_disable,
        .unprepare = rpi_touchscreen_noop,
-       .prepare = rpi_touchscreen_noop,
+       .prepare = rpi_touchscreen_prepare,
        .enable = rpi_touchscreen_enable,
        .get_modes = rpi_touchscreen_get_modes,
 };
index b991ba1bcd51308d84afff666d4f56d2d4d91941..f63efd8d5e524b42eb65745e25b310cfee2c7209 100644 (file)
@@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
        struct dma_fence *f;
        int r = 0;
 
-       dma_resv_for_each_fence(&cursor, resv, shared, f) {
+       dma_resv_for_each_fence(&cursor, resv, !shared, f) {
                fence = to_radeon_fence(f);
                if (fence && fence->rdev == rdev)
                        radeon_sync_fence(sync, fence);
index 56ae38389db0b25b79d02aab3b941f27a4a0ac5c..462fae73eae98e13a9c09502513135afbdcbb1c8 100644 (file)
@@ -222,13 +222,11 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
 
        /* Set the physical address of the buffer in memory */
        paddr = drm_fb_cma_get_gem_addr(fb, state, 0);
-       paddr -= PHYS_OFFSET;
        DRM_DEBUG_DRIVER("Setting buffer #0 address to %pad\n", &paddr);
        regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR0_REG, paddr);
 
        if (fb->format->num_planes > 1) {
                paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 2 : 1);
-               paddr -= PHYS_OFFSET;
                DRM_DEBUG_DRIVER("Setting buffer #1 address to %pad\n", &paddr);
                regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR1_REG,
                             paddr);
@@ -236,7 +234,6 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
 
        if (fb->format->num_planes > 2) {
                paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 1 : 2);
-               paddr -= PHYS_OFFSET;
                DRM_DEBUG_DRIVER("Setting buffer #2 address to %pad\n", &paddr);
                regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR2_REG,
                             paddr);
index de3424fed2fc7cc1fb88073b039eb8affcddec7b..6cf2621786e684dbd637b5376e3711a35eccc9ff 100644 (file)
@@ -2,6 +2,9 @@
 config DRM_VC4
        tristate "Broadcom VC4 Graphics"
        depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST
+       # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only
+       # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE.
+       depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
        depends on DRM
        depends on SND && SND_SOC
        depends on COMMON_CLK
index 752f921735c67c9a3d4a159c07a8ec1acf8a2de6..98308a17e4ed708b3a9d38cef5b5a06beb001219 100644 (file)
@@ -846,7 +846,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
        unsigned long phy_clock;
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret) {
                DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->variant->port);
                return;
index 6c58b0fd13fbb002cdecbb10618493b6e6ee8eb5..98b78ec6b37d6d9ed49acff047eb6ad614720669 100644 (file)
@@ -38,6 +38,7 @@
 #include <drm/drm_scdc_helper.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/of_address.h>
 #include <linux/of_gpio.h>
index 31aecc46624b3fa27fea8e5d3bb29b145e7810e8..04c8a378aeed6c3644dbaf8003ae37d72cf903c7 100644 (file)
@@ -46,6 +46,21 @@ vmw_buffer_object(struct ttm_buffer_object *bo)
        return container_of(bo, struct vmw_buffer_object, base);
 }
 
+/**
+ * bo_is_vmw - check if the buffer object is a &vmw_buffer_object
+ * @bo: ttm buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * a &vmw_buffer_object.
+ *
+ * Returns:
+ * true if the object is of &vmw_buffer_object type, false if not.
+ */
+static bool bo_is_vmw(struct ttm_buffer_object *bo)
+{
+       return bo->destroy == &vmw_bo_bo_free ||
+              bo->destroy == &vmw_gem_destroy;
+}
 
 /**
  * vmw_bo_pin_in_placement - Validate a buffer to placement.
@@ -615,8 +630,9 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
 
                ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
                vmw_bo_unreference(&vbo);
-               if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
-                            ret != -EBUSY)) {
+               if (unlikely(ret != 0)) {
+                       if (ret == -ERESTARTSYS || ret == -EBUSY)
+                               return -EBUSY;
                        DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n",
                                  (unsigned int) arg->handle);
                        return ret;
@@ -798,7 +814,7 @@ int vmw_dumb_create(struct drm_file *file_priv,
 void vmw_bo_swap_notify(struct ttm_buffer_object *bo)
 {
        /* Is @bo embedded in a struct vmw_buffer_object? */
-       if (vmw_bo_is_vmw_bo(bo))
+       if (!bo_is_vmw(bo))
                return;
 
        /* Kill any cached kernel maps before swapout */
@@ -822,7 +838,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
        struct vmw_buffer_object *vbo;
 
        /* Make sure @bo is embedded in a struct vmw_buffer_object? */
-       if (vmw_bo_is_vmw_bo(bo))
+       if (!bo_is_vmw(bo))
                return;
 
        vbo = container_of(bo, struct vmw_buffer_object, base);
@@ -843,22 +859,3 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
        if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB)
                vmw_resource_unbind_list(vbo);
 }
-
-/**
- * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object
- * @bo: buffer object to be checked
- *
- * Uses destroy function associated with the object to determine if this is
- * a &vmw_buffer_object.
- *
- * Returns:
- * true if the object is of &vmw_buffer_object type, false if not.
- */
-bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo)
-{
-       if (bo->destroy == &vmw_bo_bo_free ||
-           bo->destroy == &vmw_gem_destroy)
-               return true;
-
-       return false;
-}
index a3bfbb6c3e14aa30a6431293c46252294b5668ac..162dfeb1cc5ada1bd580cf5bf40c3afbd72b7e5c 100644 (file)
@@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
                *seqno = atomic_add_return(1, &dev_priv->marker_seq);
        } while (*seqno == 0);
 
-       if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) {
+       if (!vmw_has_fences(dev_priv)) {
 
                /*
                 * Don't request hardware to send a fence. The
@@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv,
  */
 bool vmw_cmd_supported(struct vmw_private *vmw)
 {
-       if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
-                                 SVGA_CAP_CMD_BUFFERS_2)) != 0)
-               return true;
+       bool has_cmdbufs =
+               (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+                                     SVGA_CAP_CMD_BUFFERS_2)) != 0;
+       if (vmw_is_svga_v3(vmw))
+               return (has_cmdbufs &&
+                       (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0);
        /*
         * We have FIFO cmd's
         */
-       return vmw->fifo_mem != NULL;
+       return has_cmdbufs || vmw->fifo_mem != NULL;
 }
index 26eb5478394aa0b1bd923141074c8ab97af458e5..163c00793eb1c727bba3d06ab3a5ab2ec0f61108 100644 (file)
@@ -998,13 +998,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
                goto out_no_fman;
        }
 
-       drm_vma_offset_manager_init(&dev_priv->vma_manager,
-                                   DRM_FILE_PAGE_OFFSET_START,
-                                   DRM_FILE_PAGE_OFFSET_SIZE);
        ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
                              dev_priv->drm.dev,
                              dev_priv->drm.anon_inode->i_mapping,
-                             &dev_priv->vma_manager,
+                             dev_priv->drm.vma_offset_manager,
                              dev_priv->map_mode == vmw_dma_alloc_coherent,
                              false);
        if (unlikely(ret != 0)) {
@@ -1174,7 +1171,6 @@ static void vmw_driver_unload(struct drm_device *dev)
        vmw_devcaps_destroy(dev_priv);
        vmw_vram_manager_fini(dev_priv);
        ttm_device_fini(&dev_priv->bdev);
-       drm_vma_offset_manager_destroy(&dev_priv->vma_manager);
        vmw_release_device_late(dev_priv);
        vmw_fence_manager_takedown(dev_priv->fman);
        if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
@@ -1398,7 +1394,7 @@ vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
        struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
 
        return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
-                                    &dev_priv->vma_manager);
+                                    dev_priv->drm.vma_offset_manager);
 }
 
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
index ea3ecdda561dc3fe9a2e565e1273b167f138edb7..6de0b9ef5c7734bc58f852ddb2ac0982923d5386 100644 (file)
@@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw,
                outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT);
 }
 
+static inline bool vmw_has_fences(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+                                 SVGA_CAP_CMD_BUFFERS_2)) != 0)
+               return true;
+       return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
+}
+
 #endif
index 8ee34576c7d08ac66632beadf513ecbc4c811fba..adf17c740656d72f0c8752095b583977f1a0a7db 100644 (file)
@@ -483,7 +483,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
 
 static int vmw_fb_kms_framebuffer(struct fb_info *info)
 {
-       struct drm_mode_fb_cmd2 mode_cmd;
+       struct drm_mode_fb_cmd2 mode_cmd = {0};
        struct vmw_fb_par *par = info->par;
        struct fb_var_screeninfo *var = &info->var;
        struct drm_framebuffer *cur_fb;
index 59d6a2dd4c2e41e0eb7084afb0c4ae4ef5c711c6..66cc35dc223e709089e5d2b4051658bc1a335d9d 100644 (file)
@@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence)
        return container_of(fence->base.lock, struct vmw_fence_manager, lock);
 }
 
+static u32 vmw_fence_goal_read(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               return vmw_read(vmw, SVGA_REG_FENCE_GOAL);
+       else
+               return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL);
+}
+
+static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               vmw_write(vmw, SVGA_REG_FENCE_GOAL, value);
+       else
+               vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value);
+}
+
 /*
  * Note on fencing subsystem usage of irqs:
  * Typically the vmw_fences_update function is called
@@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
        if (likely(!fman->seqno_valid))
                return false;
 
-       goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+       goal_seqno = vmw_fence_goal_read(fman->dev_priv);
        if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
                return false;
 
@@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
        list_for_each_entry(fence, &fman->fence_list, head) {
                if (!list_empty(&fence->seq_passed_actions)) {
                        fman->seqno_valid = true;
-                       vmw_fifo_mem_write(fman->dev_priv,
-                                          SVGA_FIFO_FENCE_GOAL,
-                                          fence->base.seqno);
+                       vmw_fence_goal_write(fman->dev_priv,
+                                            fence->base.seqno);
                        break;
                }
        }
@@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
        if (dma_fence_is_signaled_locked(&fence->base))
                return false;
 
-       goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+       goal_seqno = vmw_fence_goal_read(fman->dev_priv);
        if (likely(fman->seqno_valid &&
                   goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
                return false;
 
-       vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL,
-                          fence->base.seqno);
+       vmw_fence_goal_write(fman->dev_priv, fence->base.seqno);
        fman->seqno_valid = true;
 
        return true;
index c5191de365ca1c1e5268654817113974372f565d..fe4732bf2c9d23a4d22cf856b46031455d9e1ba4 100644 (file)
 
 #define VMW_FENCE_WRAP (1 << 24)
 
+static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw)
+{
+       if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+               return SVGA_IRQFLAG_REG_FENCE_GOAL;
+       else
+               return SVGA_IRQFLAG_FENCE_GOAL;
+}
+
 /**
  * vmw_thread_fn - Deferred (process context) irq handler
  *
@@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg)
                wake_up_all(&dev_priv->fifo_queue);
 
        if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE |
-                             SVGA_IRQFLAG_FENCE_GOAL)) &&
+                             vmw_irqflag_fence_goal(dev_priv))) &&
            !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending))
                ret = IRQ_WAKE_THREAD;
 
@@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv,
        if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
                return true;
 
-       if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) &&
-           vmw_fifo_idle(dev_priv, seqno))
+       if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno))
                return true;
 
        /**
@@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
                      unsigned long timeout)
 {
        struct vmw_fifo_state *fifo_state = dev_priv->fifo;
+       bool fifo_down = false;
 
        uint32_t count = 0;
        uint32_t signal_seq;
@@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
         */
 
        if (fifo_idle) {
-               down_read(&fifo_state->rwsem);
                if (dev_priv->cman) {
                        ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible,
                                              10*HZ);
                        if (ret)
                                goto out_err;
+               } else if (fifo_state) {
+                       down_read(&fifo_state->rwsem);
+                       fifo_down = true;
                }
        }
 
@@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
                }
        }
        finish_wait(&dev_priv->fence_queue, &__wait);
-       if (ret == 0 && fifo_idle)
+       if (ret == 0 && fifo_idle && fifo_state)
                vmw_fence_write(dev_priv, signal_seq);
 
        wake_up_all(&dev_priv->fence_queue);
 out_err:
-       if (fifo_idle)
+       if (fifo_down)
                up_read(&fifo_state->rwsem);
 
        return ret;
@@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 
 void vmw_goal_waiter_add(struct vmw_private *dev_priv)
 {
-       vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+       vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv),
                               &dev_priv->goal_queue_waiters);
 }
 
 void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
 {
-       vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+       vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv),
                                  &dev_priv->goal_queue_waiters);
 }
 
index bbd2f4ec08ec13f95e718d6434b3d11664f6f8f2..93431e8f6606014ef33f660ed3879383385a4065 100644 (file)
@@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
                ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
                                                      mode_cmd,
                                                      is_bo_proxy);
-
                /*
                 * vmw_create_bo_proxy() adds a reference that is no longer
                 * needed
@@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
        ret = vmw_user_lookup_handle(dev_priv, file_priv,
                                     mode_cmd->handles[0],
                                     &surface, &bo);
-       if (ret)
+       if (ret) {
+               DRM_ERROR("Invalid buffer object handle %u (0x%x).\n",
+                         mode_cmd->handles[0], mode_cmd->handles[0]);
                goto err_out;
+       }
 
 
        if (!bo &&
            !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) {
-               DRM_ERROR("Surface size cannot exceed %dx%d",
+               DRM_ERROR("Surface size cannot exceed %dx%d\n",
                        dev_priv->texture_max_width,
                        dev_priv->texture_max_height);
                goto err_out;
index 00e8e27e48846206244a14c49447f0b092348d6e..ace7ca150b0362ec69c4ec22530ad19d7ccc3ae7 100644 (file)
@@ -683,6 +683,9 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
            container_of(base, struct vmw_user_surface, prime.base);
        struct vmw_resource *res = &user_srf->srf.res;
 
+       if (base->shareable && res && res->backup)
+               drm_gem_object_put(&res->backup->base.base);
+
        *p_base = NULL;
        vmw_resource_unreference(&res);
 }
@@ -857,6 +860,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
                        goto out_unlock;
                }
                vmw_bo_reference(res->backup);
+               drm_gem_object_get(&res->backup->base.base);
        }
 
        tmp = vmw_resource_reference(&srf->res);
@@ -1513,7 +1517,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
                                                        &res->backup);
                if (ret == 0)
                        vmw_bo_reference(res->backup);
-
        }
 
        if (unlikely(ret != 0)) {
@@ -1561,6 +1564,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
                        drm_vma_node_offset_addr(&res->backup->base.base.vma_node);
                rep->buffer_size = res->backup->base.base.size;
                rep->buffer_handle = backup_handle;
+               if (user_srf->prime.base.shareable)
+                       drm_gem_object_get(&res->backup->base.base);
        } else {
                rep->buffer_map_handle = 0;
                rep->buffer_size = 0;
index 666223c6bec4d52da44b38d3a0263f19bae2f4a6..0a34e0ab4fe60e7d4caad5d9c0272c3b4a391d05 100644 (file)
@@ -447,8 +447,9 @@ static void ipu_di_config_clock(struct ipu_di *di,
 
                error = rate / (sig->mode.pixelclock / 1000);
 
-               dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %d.%u%%\n",
-                       rate, div, (signed)(error - 1000) / 10, error % 10);
+               dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %c%d.%d%%\n",
+                       rate, div, error < 1000 ? '-' : '+',
+                       abs(error - 1000) / 10, abs(error - 1000) % 10);
 
                /* Allow a 1% error */
                if (error < 1010 && error >= 990) {
index 26d269ba947c85c5a87fe4d9ba1743ca6ccaa348..85a2142c9384007d2bc43a1f072ceb7b96b0c53b 100644 (file)
@@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
         * execute:
         *
         *  (a) In the "normal (i.e., not resuming from hibernation)" path,
-        *      the full barrier in smp_store_mb() guarantees that the store
+        *      the full barrier in virt_store_mb() guarantees that the store
         *      is propagated to all CPUs before the add_channel_work work
         *      is queued.  In turn, add_channel_work is queued before the
         *      channel's ring buffer is allocated/initialized and the
@@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
         *      recv_int_page before retrieving the channel pointer from the
         *      array of channels.
         *
-        *  (b) In the "resuming from hibernation" path, the smp_store_mb()
+        *  (b) In the "resuming from hibernation" path, the virt_store_mb()
         *      guarantees that the store is propagated to all CPUs before
         *      the VMBus connection is marked as ready for the resume event
         *      (cf. check_ready_for_resume_event()).  The interrupt handler
         *      of the VMBus driver and vmbus_chan_sched() can not run before
         *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
         */
-       smp_store_mb(
+       virt_store_mb(
                vmbus_connection.channels[channel->offermsg.child_relid],
                channel);
 }
index 439f99b8b5de269bdc2c0574d4eaae2521e9d50f..3248b48f37f6128aa1dd58ff6db3582edaa5f362 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
+#include <linux/count_zeros.h>
 #include <linux/memory_hotplug.h>
 #include <linux/memory.h>
 #include <linux/notifier.h>
@@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm)
        struct dm_status status;
        unsigned long now = jiffies;
        unsigned long last_post = last_post_time;
+       unsigned long num_pages_avail, num_pages_committed;
 
        if (pressure_report_delay > 0) {
                --pressure_report_delay;
@@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm)
         * num_pages_onlined) as committed to the host, otherwise it can try
         * asking us to balloon them out.
         */
-       status.num_avail = si_mem_available();
-       status.num_committed = vm_memory_committed() +
+       num_pages_avail = si_mem_available();
+       num_pages_committed = vm_memory_committed() +
                dm->num_pages_ballooned +
                (dm->num_pages_added > dm->num_pages_onlined ?
                 dm->num_pages_added - dm->num_pages_onlined : 0) +
                compute_balloon_floor();
 
-       trace_balloon_status(status.num_avail, status.num_committed,
+       trace_balloon_status(num_pages_avail, num_pages_committed,
                             vm_memory_committed(), dm->num_pages_ballooned,
                             dm->num_pages_added, dm->num_pages_onlined);
+
+       /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */
+       status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE;
+       status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE;
+
        /*
         * If our transaction ID is no longer current, just don't
         * send the status. This can happen if we were interrupted
@@ -1653,6 +1660,38 @@ static void disable_page_reporting(void)
        }
 }
 
+static int ballooning_enabled(void)
+{
+       /*
+        * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE),
+        * since currently it's unclear to us whether an unballoon request can
+        * make sure all page ranges are guest page size aligned.
+        */
+       if (PAGE_SIZE != HV_HYP_PAGE_SIZE) {
+               pr_info("Ballooning disabled because page size is not 4096 bytes\n");
+               return 0;
+       }
+
+       return 1;
+}
+
+static int hot_add_enabled(void)
+{
+       /*
+        * Disable hot add on ARM64, because we currently rely on
+        * memory_add_physaddr_to_nid() to get a node id of a hot add range,
+        * however ARM64's memory_add_physaddr_to_nid() always return 0 and
+        * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for
+        * add_memory().
+        */
+       if (IS_ENABLED(CONFIG_ARM64)) {
+               pr_info("Memory hot add disabled on ARM64\n");
+               return 0;
+       }
+
+       return 1;
+}
+
 static int balloon_connect_vsp(struct hv_device *dev)
 {
        struct dm_version_request version_req;
@@ -1724,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev)
         * currently still requires the bits to be set, so we have to add code
         * to fail the host's hot-add and balloon up/down requests, if any.
         */
-       cap_msg.caps.cap_bits.balloon = 1;
-       cap_msg.caps.cap_bits.hot_add = 1;
+       cap_msg.caps.cap_bits.balloon = ballooning_enabled();
+       cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
 
        /*
         * Specify our alignment requirements as it relates
index c1dd21d0d7ef8d1ba96523b602ba71a48f0963b1..ae68298c0dcac59add8948c0510499b6958ea434 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/panic_notifier.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
+#include <linux/dma-map-ops.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
 
@@ -218,6 +219,16 @@ bool hv_query_ext_cap(u64 cap_query)
 }
 EXPORT_SYMBOL_GPL(hv_query_ext_cap);
 
+void hv_setup_dma_ops(struct device *dev, bool coherent)
+{
+       /*
+        * Hyper-V does not offer a vIOMMU in the guest
+        * VM, so pass 0/NULL for the IOMMU settings
+        */
+       arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
+}
+EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
+
 bool hv_is_hibernation_supported(void)
 {
        return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
index 71efacb90965946f12e6513efc0a9f05bd8ba89f..3d215d9dec433b33417f69de5513c1b231a27e36 100644 (file)
@@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 {
        u32 priv_read_loc = rbi->priv_read_index;
-       u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+       u32 write_loc;
+
+       /*
+        * The Hyper-V host writes the packet data, then uses
+        * store_release() to update the write_index.  Use load_acquire()
+        * here to prevent loads of the packet data from being re-ordered
+        * before the read of the write_index and potentially getting
+        * stale data.
+        */
+       write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
 
        if (write_loc >= priv_read_loc)
                return write_loc - priv_read_loc;
index 60ee8b329f9e374787d20b6e4f33831fdbad0426..14de17087864ac3a0d1efa475d8a44fb4d684cae 100644 (file)
@@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
 
        /*
         * Hyper-V should be notified only once about a panic.  If we will be
-        * doing hyperv_report_panic_msg() later with kmsg data, don't do
-        * the notification here.
+        * doing hv_kmsg_dump() with kmsg data later, don't do the notification
+        * here.
         */
        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE
            && hyperv_report_reg()) {
@@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
 
        /*
         * Hyper-V should be notified only once about a panic.  If we will be
-        * doing hyperv_report_panic_msg() later with kmsg data, don't do
-        * the notification here.
+        * doing hv_kmsg_dump() with kmsg data later, don't do the notification
+        * here.
         */
        if (hyperv_report_reg())
                hyperv_report_panic(regs, val, true);
@@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device)
        return ret;
 }
 
+/*
+ * vmbus_dma_configure -- Configure DMA coherence for VMbus device
+ */
+static int vmbus_dma_configure(struct device *child_device)
+{
+       /*
+        * On ARM64, propagate the DMA coherence setting from the top level
+        * VMbus ACPI device to the child VMbus device being added here.
+        * On x86/x64 coherence is assumed and these calls have no effect.
+        */
+       hv_setup_dma_ops(child_device,
+               device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT);
+       return 0;
+}
+
 /*
  * vmbus_remove - Remove a vmbus device
  */
@@ -1040,6 +1055,7 @@ static struct bus_type  hv_bus = {
        .remove =               vmbus_remove,
        .probe =                vmbus_probe,
        .uevent =               vmbus_uevent,
+       .dma_configure =        vmbus_dma_configure,
        .dev_groups =           vmbus_dev_groups,
        .drv_groups =           vmbus_drv_groups,
        .bus_groups =           vmbus_bus_groups,
@@ -1546,14 +1562,20 @@ static int vmbus_bus_init(void)
        if (ret)
                goto err_connect;
 
+       if (hv_is_isolation_supported())
+               sysctl_record_panic_msg = 0;
+
        /*
         * Only register if the crash MSRs are available
         */
        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
                u64 hyperv_crash_ctl;
                /*
-                * Sysctl registration is not fatal, since by default
-                * reporting is enabled.
+                * Panic message recording (sysctl_record_panic_msg)
+                * is enabled by default in non-isolated guests and
+                * disabled by default in isolated guests; the panic
+                * message recording won't be available in isolated
+                * guests should the following registration fail.
                 */
                hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
                if (!hv_ctl_table_hdr)
@@ -2097,6 +2119,10 @@ int vmbus_device_register(struct hv_device *child_device_obj)
        child_device_obj->device.parent = &hv_acpi_dev->dev;
        child_device_obj->device.release = vmbus_device_release;
 
+       child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
+       child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
+       dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
+
        /*
         * Register with the LDM. This will kick off the driver/device
         * binding...which will eventually call vmbus_match() and vmbus_probe()
@@ -2122,9 +2148,6 @@ int vmbus_device_register(struct hv_device *child_device_obj)
        }
        hv_debug_add_dev_dir(child_device_obj);
 
-       child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
-       child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
-       dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
        return 0;
 
 err_kset_unregister:
@@ -2428,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device)
 
        hv_acpi_dev = device;
 
+       /*
+        * Older versions of Hyper-V for ARM64 fail to include the _CCA
+        * method on the top level VMbus device in the DSDT. But devices
+        * are hardware coherent in all current Hyper-V use cases, so fix
+        * up the ACPI device to behave as if _CCA is present and indicates
+        * hardware coherence.
+        */
+       ACPI_COMPANION_SET(&device->dev, device);
+       if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) &&
+           device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) {
+               pr_info("No ACPI _CCA found; assuming coherent device I/O\n");
+               device->flags.cca_seen = true;
+               device->flags.coherent_dma = true;
+       }
+
        result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
                                        vmbus_walk_resources, NULL);
 
@@ -2780,10 +2818,15 @@ static void __exit vmbus_exit(void)
        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
                kmsg_dump_unregister(&hv_kmsg_dumper);
                unregister_die_notifier(&hyperv_die_block);
-               atomic_notifier_chain_unregister(&panic_notifier_list,
-                                                &hyperv_panic_block);
        }
 
+       /*
+        * The panic notifier is always registered, hence we should
+        * also unconditionally unregister it here as well.
+        */
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &hyperv_panic_block);
+
        free_page((unsigned long)hv_panic_page);
        unregister_sysctl_table(hv_ctl_table_hdr);
        hv_ctl_table_hdr = NULL;
index 68a8a27ab3b7091d3d36974faa3eabb0b145c90a..f2b038fa3b84c35b3b6c98fa547023d6f8246b41 100644 (file)
@@ -960,7 +960,7 @@ config SENSORS_LTC4261
 
 config SENSORS_LTQ_CPUTEMP
        bool "Lantiq cpu temperature sensor driver"
-       depends on LANTIQ
+       depends on SOC_XWAY
        help
          If you say yes here you get support for the temperature
          sensor inside your CPU.
index fb6d14d213a18c8595e8111f6c0c72890e1c0d61..c67cd037a93fde44ee7e4d9d0fc1e53b642c27a6 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/log2.h>
 #include <linux/kthread.h>
 #include <linux/regmap.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/util_macros.h>
 
@@ -294,11 +295,10 @@ static int adt7470_update_thread(void *p)
                adt7470_read_temperatures(data);
                mutex_unlock(&data->lock);
 
-               set_current_state(TASK_INTERRUPTIBLE);
                if (kthread_should_stop())
                        break;
 
-               schedule_timeout(msecs_to_jiffies(data->auto_update_interval));
+               schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval));
        }
 
        return 0;
index 8fdcb62ae52de2e97bc378df100199b86025279d..9e935e34c9983b8726d28dd17703fcb5b24506eb 100644 (file)
@@ -71,7 +71,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = {
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"),
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"),
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"),
-       DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"),
+       DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"),
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"),
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"),
        DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"),
index 938a8b9ec70dd337b0ea5b7b8c900b51c4dba907..6830e029995dc5caea75550a260eaccb24c274c3 100644 (file)
@@ -1578,8 +1578,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr,
                temp *= 125;
                if (sign)
                        temp -= 128000;
-       } else
-               temp = data->temp[nr] * 1000;
+       } else {
+               temp = ((s8)data->temp[nr]) * 1000;
+       }
 
        return sprintf(buf, "%d\n", temp);
 }
index 40dffd9c4cbfc09be0d1be76546b583fda6efd9e..f546f0c12497b01812b6d50f4bed2d29ef01dddb 100644 (file)
 
 #define AHE50DC_PMBUS_READ_TEMP4 0xd0
 
+static int ahe50dc_fan_write_byte(struct i2c_client *client, int page, u8 value)
+{
+       /*
+        * The CLEAR_FAULTS operation seems to sometimes (unpredictably, perhaps
+        * 5% of the time or so) trigger a problematic phenomenon in which the
+        * fan speeds surge momentarily and at least some (perhaps all?) of the
+        * system's power outputs experience a glitch.
+        *
+        * However, according to Delta it should be OK to simply not send any
+        * CLEAR_FAULTS commands (the device doesn't seem to be capable of
+        * reporting any faults anyway), so just blackhole them unconditionally.
+        */
+       return value == PMBUS_CLEAR_FAULTS ? -EOPNOTSUPP : -ENODATA;
+}
+
 static int ahe50dc_fan_read_word_data(struct i2c_client *client, int page, int phase, int reg)
 {
        /* temp1 in (virtual) page 1 is remapped to mfr-specific temp4 */
@@ -68,6 +83,7 @@ static struct pmbus_driver_info ahe50dc_fan_info = {
                PMBUS_HAVE_VIN | PMBUS_HAVE_FAN12 | PMBUS_HAVE_FAN34 |
                PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_STATUS_FAN34 | PMBUS_PAGE_VIRTUAL,
        .func[1] = PMBUS_HAVE_TEMP | PMBUS_PAGE_VIRTUAL,
+       .write_byte = ahe50dc_fan_write_byte,
        .read_word_data = ahe50dc_fan_read_word_data,
 };
 
index b2618b1d529e2d86836364c66f5801aeefa2d466..d93574d6a1fb638cb9d0567ff0ba30db9824ed96 100644 (file)
@@ -2326,6 +2326,9 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
                data->has_status_word = true;
        }
 
+       /* Make sure PEC is disabled, will be enabled later if needed */
+       client->flags &= ~I2C_CLIENT_PEC;
+
        /* Enable PEC if the controller and bus supports it */
        if (!(data->flags & PMBUS_NO_CAPABILITY)) {
                ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
index 18fffc5d749bd98c4cff3ff6bd4a05bfcd3452c1..32bc7736d609e38565b85d0ce1640b8e408a8e5a 100644 (file)
@@ -124,7 +124,7 @@ static int xdpe122_identify(struct i2c_client *client,
        return 0;
 }
 
-static const struct regulator_desc xdpe122_reg_desc[] = {
+static const struct regulator_desc __maybe_unused xdpe122_reg_desc[] = {
        PMBUS_REGULATOR("vout", 0),
        PMBUS_REGULATOR("vout", 1),
 };
index b86d9df7105d107878f3618b5c7122452b97b6da..52c9e7d3f2ae7898e60e8fc053103af532cf5730 100644 (file)
@@ -708,10 +708,21 @@ static int tmp401_probe(struct i2c_client *client)
        return 0;
 }
 
+static const struct of_device_id __maybe_unused tmp4xx_of_match[] = {
+       { .compatible = "ti,tmp401", },
+       { .compatible = "ti,tmp411", },
+       { .compatible = "ti,tmp431", },
+       { .compatible = "ti,tmp432", },
+       { .compatible = "ti,tmp435", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, tmp4xx_of_match);
+
 static struct i2c_driver tmp401_driver = {
        .class          = I2C_CLASS_HWMON,
        .driver = {
                .name   = "tmp401",
+               .of_match_table = of_match_ptr(tmp4xx_of_match),
        },
        .probe_new      = tmp401_probe,
        .id_table       = tmp401_id,
index 27f969b3dc072b1946775e5209e3e1b2f80bcfd4..e9e2db68b9fb62f1bc3e47bf0770827667cc5063 100644 (file)
@@ -179,6 +179,12 @@ struct imx_i2c_hwdata {
        unsigned int            ndivs;
        unsigned int            i2sr_clr_opcode;
        unsigned int            i2cr_ien_opcode;
+       /*
+        * Errata ERR007805 or e7805:
+        * I2C: When the I2C clock speed is configured for 400 kHz,
+        * the SCL low period violates the I2C spec of 1.3 uS min.
+        */
+       bool                    has_err007805;
 };
 
 struct imx_i2c_dma {
@@ -240,6 +246,16 @@ static const struct imx_i2c_hwdata imx21_i2c_hwdata = {
 
 };
 
+static const struct imx_i2c_hwdata imx6_i2c_hwdata = {
+       .devtype                = IMX21_I2C,
+       .regshift               = IMX_I2C_REGSHIFT,
+       .clk_div                = imx_i2c_clk_div,
+       .ndivs                  = ARRAY_SIZE(imx_i2c_clk_div),
+       .i2sr_clr_opcode        = I2SR_CLR_OPCODE_W0C,
+       .i2cr_ien_opcode        = I2CR_IEN_OPCODE_1,
+       .has_err007805          = true,
+};
+
 static struct imx_i2c_hwdata vf610_i2c_hwdata = {
        .devtype                = VF610_I2C,
        .regshift               = VF610_I2C_REGSHIFT,
@@ -266,6 +282,16 @@ MODULE_DEVICE_TABLE(platform, imx_i2c_devtype);
 static const struct of_device_id i2c_imx_dt_ids[] = {
        { .compatible = "fsl,imx1-i2c", .data = &imx1_i2c_hwdata, },
        { .compatible = "fsl,imx21-i2c", .data = &imx21_i2c_hwdata, },
+       { .compatible = "fsl,imx6q-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx6sl-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx6sll-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx6sx-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx6ul-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx7s-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx8mm-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx8mn-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx8mp-i2c", .data = &imx6_i2c_hwdata, },
+       { .compatible = "fsl,imx8mq-i2c", .data = &imx6_i2c_hwdata, },
        { .compatible = "fsl,vf610-i2c", .data = &vf610_i2c_hwdata, },
        { /* sentinel */ }
 };
@@ -551,6 +577,13 @@ static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx,
        unsigned int div;
        int i;
 
+       if (i2c_imx->hwdata->has_err007805 && i2c_imx->bitrate > 384000) {
+               dev_dbg(&i2c_imx->adapter.dev,
+                       "SoC errata ERR007805 or e7805 applies, bus frequency limited from %d Hz to 384000 Hz.\n",
+                       i2c_imx->bitrate);
+               i2c_imx->bitrate = 384000;
+       }
+
        /* Divider value calculation */
        if (i2c_imx->cur_clk == i2c_clk_rate)
                return;
index f4820fd3dc13ebe7f23547ee039f049117064b89..c16157ee8c52039171f886c5920e32827991526b 100644 (file)
@@ -82,6 +82,7 @@
 
 #define ISMT_DESC_ENTRIES      2       /* number of descriptor entries */
 #define ISMT_MAX_RETRIES       3       /* number of SMBus retries to attempt */
+#define ISMT_LOG_ENTRIES       3       /* number of interrupt cause log entries */
 
 /* Hardware Descriptor Constants - Control Field */
 #define ISMT_DESC_CWRL 0x01    /* Command/Write Length */
 #define ISMT_SPGT_SPD_MASK     0xc0000000      /* SMBus Speed mask */
 #define ISMT_SPGT_SPD_80K      0x00            /* 80 kHz */
 #define ISMT_SPGT_SPD_100K     (0x1 << 30)     /* 100 kHz */
-#define ISMT_SPGT_SPD_400K     (0x2 << 30)     /* 400 kHz */
-#define ISMT_SPGT_SPD_1M       (0x3 << 30)     /* 1 MHz */
+#define ISMT_SPGT_SPD_400K     (0x2U << 30)    /* 400 kHz */
+#define ISMT_SPGT_SPD_1M       (0x3U << 30)    /* 1 MHz */
 
 
 /* MSI Control Register (MSICTL) bit definitions */
@@ -175,6 +176,8 @@ struct ismt_priv {
        u8 head;                                /* ring buffer head pointer */
        struct completion cmp;                  /* interrupt completion */
        u8 buffer[I2C_SMBUS_BLOCK_MAX + 16];    /* temp R/W data buffer */
+       dma_addr_t log_dma;
+       u32 *log;
 };
 
 static const struct pci_device_id ismt_ids[] = {
@@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
        memset(desc, 0, sizeof(struct ismt_desc));
        desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write);
 
+       /* Always clear the log entries */
+       memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32));
+
        /* Initialize common control bits */
        if (likely(pci_dev_msi_enabled(priv->pci_dev)))
                desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR;
@@ -708,6 +714,8 @@ static void ismt_hw_init(struct ismt_priv *priv)
        /* initialize the Master Descriptor Base Address (MDBA) */
        writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA);
 
+       writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL);
+
        /* initialize the Master Control Register (MCTRL) */
        writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL);
 
@@ -795,6 +803,12 @@ static int ismt_dev_init(struct ismt_priv *priv)
        priv->head = 0;
        init_completion(&priv->cmp);
 
+       priv->log = dmam_alloc_coherent(&priv->pci_dev->dev,
+                                       ISMT_LOG_ENTRIES * sizeof(u32),
+                                       &priv->log_dma, GFP_KERNEL);
+       if (!priv->log)
+               return -ENOMEM;
+
        return 0;
 }
 
index 45fe4a7fe0c039cbba187a46b2ce5d8a40a58541..901f0fb04fee4ba3d09f2ce9650d93f45f75d98f 100644 (file)
@@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev)
 
        if (i2c->bus_freq == 0) {
                dev_warn(i2c->dev, "clock-frequency 0 not supported\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err_disable_clk;
        }
 
        adap = &i2c->adap;
@@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev)
 
        ret = i2c_add_adapter(adap);
        if (ret < 0)
-               return ret;
+               goto err_disable_clk;
 
        dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000);
 
+       return 0;
+
+err_disable_clk:
+       clk_disable_unprepare(i2c->clk);
+
        return ret;
 }
 
index 7728c8460dc0fd3744a46ecaf0fb088c625a1481..9028ffb58cc079697f6acaed7714cba5cf0bbf27 100644 (file)
@@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter,
 
                TXFIFO_WR(smbus, msg->buf[msg->len-1] |
                          (stop ? MTXFIFO_STOP : 0));
+
+               if (stop) {
+                       err = pasemi_smb_waitready(smbus);
+                       if (err)
+                               goto reset_out;
+               }
        }
 
        return 0;
index fc1dcc19f2a170f16636e96bea37c24cfbdda9c0..5b920f0fc7dd79c5033d3182cbbc27f46e0bdb64 100644 (file)
@@ -843,10 +843,8 @@ static int geni_i2c_probe(struct platform_device *pdev)
                /* FIFO is disabled, so we can only use GPI DMA */
                gi2c->gpi_mode = true;
                ret = setup_gpi_dma(gi2c);
-               if (ret) {
-                       dev_err(dev, "Failed to setup GPI DMA mode:%d ret\n", ret);
-                       return ret;
-               }
+               if (ret)
+                       return dev_err_probe(dev, ret, "Failed to setup GPI DMA mode\n");
 
                dev_dbg(dev, "Using GPI DMA mode for I2C\n");
        } else {
index 12c90aa0900e60b63e0a14215f3692c12876c9cf..a77cd86fe75ed7401bc041b27c651b9fedf67285 100644 (file)
@@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev,
        i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info;
        i2c->adap.dev.parent = dev;
        i2c->adap.dev.of_node = pdev->dev.of_node;
+       i2c->adap.dev.fwnode = dev->fwnode;
        snprintf(i2c->adap.name, sizeof(i2c->adap.name),
                 "Cavium ThunderX i2c adapter at %s", dev_name(dev));
        i2c_set_adapdata(&i2c->adap, i2c);
index cf5d049342ead2ca707475196083b813b0ccb5af..ab0adaa130dae31da7658798481694628af270af 100644 (file)
@@ -557,7 +557,7 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
                                .addr = umsg.addr,
                                .flags = umsg.flags,
                                .len = umsg.len,
-                               .buf = compat_ptr(umsg.buf)
+                               .buf = (__force __u8 *)compat_ptr(umsg.buf),
                        };
                }
 
@@ -668,16 +668,21 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy)
        i2c_dev->dev.class = i2c_dev_class;
        i2c_dev->dev.parent = &adap->dev;
        i2c_dev->dev.release = i2cdev_dev_release;
-       dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr);
+
+       res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr);
+       if (res)
+               goto err_put_i2c_dev;
 
        res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev);
-       if (res) {
-               put_i2c_dev(i2c_dev, false);
-               return res;
-       }
+       if (res)
+               goto err_put_i2c_dev;
 
        pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr);
        return 0;
+
+err_put_i2c_dev:
+       put_i2c_dev(i2c_dev, false);
+       return res;
 }
 
 static int i2cdev_detach_adapter(struct device *dev, void *dummy)
index b7640cfe00201bf0d37053ff93b687d1bd7720ec..47551ab73ca8a03ab60448adb8fe0d8cee9aed5d 100644 (file)
@@ -69,7 +69,12 @@ static unsigned int preferred_states_mask;
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 
 static unsigned long auto_demotion_disable_flags;
-static bool disable_promotion_to_c1e;
+
+static enum {
+       C1E_PROMOTION_PRESERVE,
+       C1E_PROMOTION_ENABLE,
+       C1E_PROMOTION_DISABLE
+} c1e_promotion = C1E_PROMOTION_PRESERVE;
 
 struct idle_cpu {
        struct cpuidle_state *state_table;
@@ -1398,8 +1403,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
 
-static void c1e_promotion_enable(void);
-
 /**
  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
  *
@@ -1578,17 +1581,14 @@ static void __init spr_idle_state_table_update(void)
        unsigned long long msr;
 
        /* Check if user prefers C1E over C1. */
-       if (preferred_states_mask & BIT(2)) {
-               if (preferred_states_mask & BIT(1))
-                       /* Both can't be enabled, stick to the defaults. */
-                       return;
-
+       if ((preferred_states_mask & BIT(2)) &&
+           !(preferred_states_mask & BIT(1))) {
+               /* Disable C1 and enable C1E. */
                spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
                spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
 
                /* Enable C1E using the "C1E promotion" bit. */
-               c1e_promotion_enable();
-               disable_promotion_to_c1e = false;
+               c1e_promotion = C1E_PROMOTION_ENABLE;
        }
 
        /*
@@ -1754,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu)
        if (auto_demotion_disable_flags)
                auto_demotion_disable();
 
-       if (disable_promotion_to_c1e)
+       if (c1e_promotion == C1E_PROMOTION_ENABLE)
+               c1e_promotion_enable();
+       else if (c1e_promotion == C1E_PROMOTION_DISABLE)
                c1e_promotion_disable();
 
        return 0;
@@ -1833,7 +1835,8 @@ static int __init intel_idle_init(void)
        if (icpu) {
                cpuidle_state_table = icpu->state_table;
                auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
-               disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
+               if (icpu->disable_promotion_to_c1e)
+                       c1e_promotion = C1E_PROMOTION_DISABLE;
                if (icpu->use_acpi || force_use_acpi)
                        intel_idle_acpi_cst_extract();
        } else if (!intel_idle_acpi_cst_extract()) {
index ef9d27759961f8a2f92fdffdac81405436b388b7..ec9acbf12b9a5d6fe24990cb31a334be8e4711bf 100644 (file)
@@ -745,7 +745,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
                case IIO_EV_DIR_RISING:
                        addr = AD7280A_CELL_OVERVOLTAGE_REG;
                        ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
-                                          1, val);
+                                          1, value);
                        if (ret)
                                break;
                        st->cell_threshhigh = value;
@@ -753,7 +753,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
                case IIO_EV_DIR_FALLING:
                        addr = AD7280A_CELL_UNDERVOLTAGE_REG;
                        ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
-                                          1, val);
+                                          1, value);
                        if (ret)
                                break;
                        st->cell_threshlow = value;
@@ -770,18 +770,18 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
                case IIO_EV_DIR_RISING:
                        addr = AD7280A_AUX_ADC_OVERVOLTAGE_REG;
                        ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
-                                          1, val);
+                                          1, value);
                        if (ret)
                                break;
-                       st->aux_threshhigh = val;
+                       st->aux_threshhigh = value;
                        break;
                case IIO_EV_DIR_FALLING:
                        addr = AD7280A_AUX_ADC_UNDERVOLTAGE_REG;
                        ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
-                                          1, val);
+                                          1, value);
                        if (ret)
                                break;
-                       st->aux_threshlow = val;
+                       st->aux_threshlow = value;
                        break;
                default:
                        ret = -EINVAL;
index 20d4e7584e923297cce16f91e22b44fbbb622621..37143b5526ee640c74b8b677b5a512419034ace8 100644 (file)
@@ -471,12 +471,15 @@ static ssize_t calibration_forced_value_store(struct device *dev,
        ret = scd4x_write_and_fetch(state, CMD_FRC, arg, &val, sizeof(val));
        mutex_unlock(&state->lock);
 
+       if (ret)
+               return ret;
+
        if (val == 0xff) {
                dev_err(dev, "forced calibration has failed");
                return -EINVAL;
        }
 
-       return ret ?: len;
+       return len;
 }
 
 static IIO_DEVICE_ATTR_RW(calibration_auto_enable, 0);
index 97f13c0b96312884876aaeda1fd6c6e5b2ba9ed8..d5ea1a1be1226bf57c231bc9a8f3634d005028f6 100644 (file)
@@ -656,7 +656,7 @@ static int ad3552r_reset(struct ad3552r_desc *dac)
 {
        struct reg_addr_pool addr;
        int ret;
-       u16 val;
+       int val;
 
        dac->gpio_reset = devm_gpiod_get_optional(&dac->spi->dev, "reset",
                                                  GPIOD_OUT_LOW);
@@ -809,10 +809,10 @@ static int ad3552r_configure_custom_gain(struct ad3552r_desc *dac,
 
        gain_child = fwnode_get_named_child_node(child,
                                                 "custom-output-range-config");
-       if (IS_ERR(gain_child)) {
+       if (!gain_child) {
                dev_err(dev,
                        "mandatory custom-output-range-config property missing\n");
-               return PTR_ERR(gain_child);
+               return -EINVAL;
        }
 
        dac->ch_data[ch].range_override = 1;
index 14cfabacbea5309c5e0d03d01313af649ce8c911..fdf824041497323a1b54ad66faaf4fea55bdf1bd 100644 (file)
@@ -178,7 +178,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev,
 
        switch (m) {
        case IIO_CHAN_INFO_RAW:
-               *val = st->cached_val;
+               *val = st->cached_val >> chan->scan_type.shift;
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_SCALE:
                *val = st->vref_mv;
index a424b7220b61aacdc146f74f349e58c24fdb2090..4434c1b2a32210987657d1212c8a587a6a02ba27 100644 (file)
@@ -522,7 +522,7 @@ static int ad5592r_alloc_channels(struct iio_dev *iio_dev)
                if (!ret)
                        st->channel_modes[reg] = tmp;
 
-               fwnode_property_read_u32(child, "adi,off-state", &tmp);
+               ret = fwnode_property_read_u32(child, "adi,off-state", &tmp);
                if (!ret)
                        st->channel_offstate[reg] = tmp;
        }
index e41861d29767c3f261a82bb21cbc2f149d4fdca5..2f9c384885f4d0870d1e884fb0594ef5bd7d68d4 100644 (file)
@@ -298,7 +298,7 @@ static int ltc2688_read_raw(struct iio_dev *indio_dev,
                if (ret)
                        return ret;
 
-               *val = 16;
+               *val2 = 16;
                return IIO_VAL_FRACTIONAL_LOG2;
        case IIO_CHAN_INFO_CALIBBIAS:
                ret = regmap_read(st->regmap,
index 4a3b8d8755183ce50732707dfb29d8eed10f88cb..0b775f943db3e7da6c9915976aca6181e110bea5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 
 enum chip_id {
@@ -311,6 +312,7 @@ static int dac5571_probe(struct i2c_client *client,
        const struct dac5571_spec *spec;
        struct dac5571_data *data;
        struct iio_dev *indio_dev;
+       enum chip_id chip_id;
        int ret, i;
 
        indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
@@ -326,7 +328,13 @@ static int dac5571_probe(struct i2c_client *client,
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->channels = dac5571_channels;
 
-       spec = &dac5571_spec[id->driver_data];
+       if (dev_fwnode(dev))
+               chip_id = (uintptr_t)device_get_match_data(dev);
+       else
+               chip_id = id->driver_data;
+
+       spec = &dac5571_spec[chip_id];
+
        indio_dev->num_channels = spec->num_channels;
        data->spec = spec;
 
@@ -385,15 +393,15 @@ static int dac5571_remove(struct i2c_client *i2c)
 }
 
 static const struct of_device_id dac5571_of_id[] = {
-       {.compatible = "ti,dac5571"},
-       {.compatible = "ti,dac6571"},
-       {.compatible = "ti,dac7571"},
-       {.compatible = "ti,dac5574"},
-       {.compatible = "ti,dac6574"},
-       {.compatible = "ti,dac7574"},
-       {.compatible = "ti,dac5573"},
-       {.compatible = "ti,dac6573"},
-       {.compatible = "ti,dac7573"},
+       {.compatible = "ti,dac5571", .data = (void *)single_8bit},
+       {.compatible = "ti,dac6571", .data = (void *)single_10bit},
+       {.compatible = "ti,dac7571", .data = (void *)single_12bit},
+       {.compatible = "ti,dac5574", .data = (void *)quad_8bit},
+       {.compatible = "ti,dac6574", .data = (void *)quad_10bit},
+       {.compatible = "ti,dac7574", .data = (void *)quad_12bit},
+       {.compatible = "ti,dac5573", .data = (void *)quad_8bit},
+       {.compatible = "ti,dac6573", .data = (void *)quad_10bit},
+       {.compatible = "ti,dac7573", .data = (void *)quad_12bit},
        {}
 };
 MODULE_DEVICE_TABLE(of, dac5571_of_id);
index 3ae35817ad82796f8720d96e2fbd4045d3eaf930..a85b345ea14efc383e2eacfa23ed6d6d0aaa5e8c 100644 (file)
@@ -8,6 +8,7 @@ menu "Filters"
 config ADMV8818
        tristate "Analog Devices ADMV8818 High-Pass and Low-Pass Filter"
        depends on SPI && COMMON_CLK && 64BIT
+       select REGMAP_SPI
        help
          Say yes here to build support for Analog Devices ADMV8818
          2 GHz to 18 GHz, Digitally Tunable, High-Pass and Low-Pass Filter.
index 824b5124a5f55779a29284576496d9a84f181526..01336105792ee25b4c58f72e16879c495d4981ee 100644 (file)
@@ -730,7 +730,7 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
 
        ret = regmap_write(data->regmap, BMI160_REG_CMD, BMI160_CMD_SOFTRESET);
        if (ret)
-               return ret;
+               goto disable_regulator;
 
        usleep_range(BMI160_SOFTRESET_USLEEP, BMI160_SOFTRESET_USLEEP + 1);
 
@@ -741,29 +741,37 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
        if (use_spi) {
                ret = regmap_read(data->regmap, BMI160_REG_DUMMY, &val);
                if (ret)
-                       return ret;
+                       goto disable_regulator;
        }
 
        ret = regmap_read(data->regmap, BMI160_REG_CHIP_ID, &val);
        if (ret) {
                dev_err(dev, "Error reading chip id\n");
-               return ret;
+               goto disable_regulator;
        }
        if (val != BMI160_CHIP_ID_VAL) {
                dev_err(dev, "Wrong chip id, got %x expected %x\n",
                        val, BMI160_CHIP_ID_VAL);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto disable_regulator;
        }
 
        ret = bmi160_set_mode(data, BMI160_ACCEL, true);
        if (ret)
-               return ret;
+               goto disable_regulator;
 
        ret = bmi160_set_mode(data, BMI160_GYRO, true);
        if (ret)
-               return ret;
+               goto disable_accel;
 
        return 0;
+
+disable_accel:
+       bmi160_set_mode(data, BMI160_ACCEL, false);
+
+disable_regulator:
+       regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies);
+       return ret;
 }
 
 static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig,
index 33d9afb1ba914618330b92b4d734ee27f0b45276..d4a692b838d0f0c6dff8d779a7e6004a74ebc22e 100644 (file)
@@ -18,12 +18,15 @@ static int inv_icm42600_i2c_bus_setup(struct inv_icm42600_state *st)
        unsigned int mask, val;
        int ret;
 
-       /* setup interface registers */
-       ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
-                                INV_ICM42600_INTF_CONFIG6_MASK,
-                                INV_ICM42600_INTF_CONFIG6_I3C_EN);
-       if (ret)
-               return ret;
+       /*
+        * setup interface registers
+        * This register write to REG_INTF_CONFIG6 enables a spike filter that
+        * is impacting the line and can prevent the I2C ACK to be seen by the
+        * controller. So we don't test the return value.
+        */
+       regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
+                          INV_ICM42600_INTF_CONFIG6_MASK,
+                          INV_ICM42600_INTF_CONFIG6_I3C_EN);
 
        ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG4,
                                 INV_ICM42600_INTF_CONFIG4_I3C_BUS_ONLY, 0);
index 088f748b683ebbca7bd088e063ba4cb6c5c035d7..2432e697150c655a15a28fa99f01e93fc887357b 100644 (file)
@@ -416,6 +416,7 @@ static int ak8975_power_on(const struct ak8975_data *data)
        if (ret) {
                dev_warn(&data->client->dev,
                         "Failed to enable specified Vid supply\n");
+               regulator_disable(data->vdd);
                return ret;
        }
 
index 0d9bbbb50cb45772a083d7a351824465b5eaf8eb..70c37f664f6da7c2cf2cba5a2007e0836375d5e3 100644 (file)
 #define SX9324_REG_AFE_PH2             0x2a
 #define SX9324_REG_AFE_PH3             0x2b
 #define SX9324_REG_AFE_CTRL8           0x2c
-#define SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM 0x02
+#define SX9324_REG_AFE_CTRL8_RESERVED  0x10
+#define SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM 0x02
 #define SX9324_REG_AFE_CTRL9           0x2d
 #define SX9324_REG_AFE_CTRL9_AGAIN_1   0x08
 
 #define SX9324_REG_PROX_CTRL0          0x30
 #define SX9324_REG_PROX_CTRL0_GAIN_MASK        GENMASK(5, 3)
-#define SX9324_REG_PROX_CTRL0_GAIN_1           0x80
+#define SX9324_REG_PROX_CTRL0_GAIN_SHIFT       3
+#define SX9324_REG_PROX_CTRL0_GAIN_RSVD                0x0
+#define SX9324_REG_PROX_CTRL0_GAIN_1           0x1
+#define SX9324_REG_PROX_CTRL0_GAIN_8           0x4
 #define SX9324_REG_PROX_CTRL0_RAWFILT_MASK     GENMASK(2, 0)
 #define SX9324_REG_PROX_CTRL0_RAWFILT_1P50     0x01
 #define SX9324_REG_PROX_CTRL1          0x31
@@ -379,7 +383,14 @@ static int sx9324_read_gain(struct sx_common_data *data,
        if (ret)
                return ret;
 
-       *val = 1 << FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval);
+       regval = FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval);
+       if (regval)
+               regval--;
+       else if (regval == SX9324_REG_PROX_CTRL0_GAIN_RSVD ||
+                regval > SX9324_REG_PROX_CTRL0_GAIN_8)
+               return -EINVAL;
+
+       *val = 1 << regval;
 
        return IIO_VAL_INT;
 }
@@ -725,8 +736,12 @@ static int sx9324_write_gain(struct sx_common_data *data,
        unsigned int gain, reg;
        int ret;
 
-       gain = ilog2(val);
        reg = SX9324_REG_PROX_CTRL0 + chan->channel / 2;
+
+       gain = ilog2(val) + 1;
+       if (val <= 0 || gain > SX9324_REG_PROX_CTRL0_GAIN_8)
+               return -EINVAL;
+
        gain = FIELD_PREP(SX9324_REG_PROX_CTRL0_GAIN_MASK, gain);
 
        mutex_lock(&data->mutex);
@@ -781,12 +796,15 @@ static const struct sx_common_reg_default sx9324_default_regs[] = {
        { SX9324_REG_AFE_PH2, 0x1a },
        { SX9324_REG_AFE_PH3, 0x16 },
 
-       { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM },
+       { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESERVED |
+               SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM },
        { SX9324_REG_AFE_CTRL9, SX9324_REG_AFE_CTRL9_AGAIN_1 },
 
-       { SX9324_REG_PROX_CTRL0, SX9324_REG_PROX_CTRL0_GAIN_1 |
+       { SX9324_REG_PROX_CTRL0,
+               SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT |
                SX9324_REG_PROX_CTRL0_RAWFILT_1P50 },
-       { SX9324_REG_PROX_CTRL1, SX9324_REG_PROX_CTRL0_GAIN_1 |
+       { SX9324_REG_PROX_CTRL1,
+               SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT |
                SX9324_REG_PROX_CTRL0_RAWFILT_1P50 },
        { SX9324_REG_PROX_CTRL2, SX9324_REG_PROX_CTRL2_AVGNEG_THRESH_16K },
        { SX9324_REG_PROX_CTRL3, SX9324_REG_PROX_CTRL3_AVGDEB_2SAMPLES |
index a7c07316a0a91e9dd823b7b80182b4c97cb5a152..8ad814d96b7e291b3f8bf6bd473371901540b8d1 100644 (file)
@@ -521,6 +521,7 @@ int sx_common_probe(struct i2c_client *client,
                return dev_err_probe(dev, ret, "error reading WHOAMI\n");
 
        ACPI_COMPANION_SET(&indio_dev->dev, ACPI_COMPANION(dev));
+       indio_dev->dev.of_node = client->dev.of_node;
        indio_dev->modes = INDIO_DIRECT_MODE;
 
        indio_dev->channels =  data->chip_info->iio_channels;
index 35f0d5e7533d60b6038d033095053bdf18e61aa7..1c107d6d03b990d9ea0a33dc4bb1e634b073ed87 100644 (file)
@@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work)
        switch (cm_id_priv->id.state) {
        case IB_CM_REP_SENT:
        case IB_CM_DREQ_SENT:
+       case IB_CM_MRA_REP_RCVD:
                ib_cancel_mad(cm_id_priv->msg);
                break;
        case IB_CM_ESTABLISHED:
@@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work)
                    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
                        ib_cancel_mad(cm_id_priv->msg);
                break;
-       case IB_CM_MRA_REP_RCVD:
-               break;
        case IB_CM_TIMEWAIT:
                atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
                                                     [CM_DREQ_COUNTER]);
index 876cc78a22cca16e2ff184fc73daf0a1e89dc9e0..7333646021bb809d4b60908456eb9f01c5f1ab4f 100644 (file)
@@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
        unsigned long flags;
        struct list_head del_list;
 
+       /* Prevent freeing of mm until we are completely finished. */
+       mmgrab(handler->mn.mm);
+
        /* Unregister first so we don't get any more notifications. */
        mmu_notifier_unregister(&handler->mn, handler->mn.mm);
 
@@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
 
        do_remove(handler, &del_list);
 
+       /* Now the mm may be freed. */
+       mmdrop(handler->mn.mm);
+
        kfree(handler);
 }
 
index dedb3b7edd8d6b43827b2bdb32cdade2f4906fdf..638bf4a1ed9463d83ea11229073a3e756932f996 100644 (file)
@@ -2308,10 +2308,8 @@ err:
        return NULL;
 }
 
-static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
+static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
 {
-       struct irdma_cm_node *cm_node =
-                           container_of(rcu_head, struct irdma_cm_node, rcu_head);
        struct irdma_cm_core *cm_core = cm_node->cm_core;
        struct irdma_qp *iwqp;
        struct irdma_cm_info nfo;
@@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
        }
 
        cm_core->cm_free_ah(cm_node);
-       kfree(cm_node);
 }
 
 /**
@@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
 
        spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 
-       /* wait for all list walkers to exit their grace period */
-       call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
+       irdma_destroy_connection(cm_node);
+
+       kfree_rcu(cm_node, rcu_head);
 }
 
 /**
@@ -3246,15 +3244,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
  */
 void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
 {
-       unsigned long flags;
-
        if (!cm_core)
                return;
 
-       spin_lock_irqsave(&cm_core->ht_lock, flags);
-       if (timer_pending(&cm_core->tcp_timer))
-               del_timer_sync(&cm_core->tcp_timer);
-       spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+       del_timer_sync(&cm_core->tcp_timer);
 
        destroy_workqueue(cm_core->event_wq);
        cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@@ -3467,12 +3460,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
        }
 
        cm_id = iwqp->cm_id;
-       /* make sure we havent already closed this connection */
-       if (!cm_id) {
-               spin_unlock_irqrestore(&iwqp->lock, flags);
-               return;
-       }
-
        original_hw_tcp_state = iwqp->hw_tcp_state;
        original_ibqp_state = iwqp->ibqp_state;
        last_ae = iwqp->last_aeq;
@@ -3494,11 +3481,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
                        disconn_status = -ECONNRESET;
        }
 
-       if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
-            original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
-            last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
-            last_ae == IRDMA_AE_BAD_CLOSE ||
-            last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
+       if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+           original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+           last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+           last_ae == IRDMA_AE_BAD_CLOSE ||
+           last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
                issue_close = 1;
                iwqp->cm_id = NULL;
                qp->term_flags = 0;
index 346c2c5dabdf0a2c9da72a76b1bdb49d1a4c7379..81760415d66c85cbe13b73c53db0ac27445c7741 100644 (file)
@@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
        u32 local_ipaddr[4] = {};
        bool ipv4 = true;
 
-       real_dev = rdma_vlan_dev_real_dev(netdev);
-       if (!real_dev)
-               real_dev = netdev;
-
-       ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
-       if (!ibdev)
-               return NOTIFY_DONE;
-
-       iwdev = to_iwdev(ibdev);
-
        switch (event) {
        case NETEVENT_NEIGH_UPDATE:
+               real_dev = rdma_vlan_dev_real_dev(netdev);
+               if (!real_dev)
+                       real_dev = netdev;
+               ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+               if (!ibdev)
+                       return NOTIFY_DONE;
+
+               iwdev = to_iwdev(ibdev);
                p = (__be32 *)neigh->primary_key;
                if (neigh->tbl->family == AF_INET6) {
                        ipv4 = false;
@@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
                        irdma_manage_arp_cache(iwdev->rf, neigh->ha,
                                               local_ipaddr, ipv4,
                                               IRDMA_ARP_DELETE);
+               ib_device_put(ibdev);
                break;
        default:
                break;
        }
 
-       ib_device_put(ibdev);
-
        return NOTIFY_DONE;
 }
 
index 46f475394af5f5d1f995317a967851c1331b8aad..52f3e88f85695728b294533d7482cd0cfc9b3373 100644 (file)
@@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 
        if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
                if (dont_wait) {
-                       if (iwqp->cm_id && iwqp->hw_tcp_state) {
+                       if (iwqp->hw_tcp_state) {
                                spin_lock_irqsave(&iwqp->lock, flags);
                                iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
                                iwqp->last_aeq = IRDMA_AE_RESET_SENT;
                                spin_unlock_irqrestore(&iwqp->lock, flags);
-                               irdma_cm_disconn(iwqp);
                        }
+                       irdma_cm_disconn(iwqp);
                } else {
                        int close_timer_started;
 
index 956f8e875daa517d84314ced52ba3e58aa06b85d..32ef67e9a6a7295330b5beaeab832574abc307c9 100644 (file)
@@ -574,8 +574,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
                spin_lock_irq(&ent->lock);
                if (ent->disabled)
                        goto out;
-               if (need_delay)
+               if (need_delay) {
                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+                       goto out;
+               }
                remove_cache_mr_locked(ent);
                queue_adjust_cache_locked(ent);
        }
@@ -625,6 +627,7 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
        struct mlx5_cache_ent *ent = mr->cache_ent;
 
+       WRITE_ONCE(dev->cache.last_add, jiffies);
        spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
        ent->available_mrs++;
index ae50b56e891321fb8bf98b7c2b28bd626b5169c5..8ef112f883a772db51f1b218990408db3b934b1a 100644 (file)
@@ -3190,7 +3190,11 @@ serr_no_r_lock:
        spin_lock_irqsave(&sqp->s_lock, flags);
        rvt_send_complete(sqp, wqe, send_status);
        if (sqp->ibqp.qp_type == IB_QPT_RC) {
-               int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+               int lastwqe;
+
+               spin_lock(&sqp->r_lock);
+               lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+               spin_unlock(&sqp->r_lock);
 
                sqp->s_flags &= ~RVT_S_BUSY;
                spin_unlock_irqrestore(&sqp->s_lock, flags);
index ae8f11cb704afe5482c14bcbfc9194540341da7f..873a9b10307c0ac0131b8ffcb77a01e72dc9f190 100644 (file)
@@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
 }
 
 /**
- * rxe_mcast_delete - delete multicast address from rxe device
+ * rxe_mcast_del - delete multicast address from rxe device
  * @rxe: rxe device object
  * @mgid: multicast address as a gid
  *
  * Returns 0 on success else an error
  */
-static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
 {
        unsigned char ll_addr[ETH_ALEN];
 
@@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 {
        struct rxe_mcg *mcg;
-       unsigned long flags;
 
-       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       spin_lock_bh(&rxe->mcg_lock);
        mcg = __rxe_lookup_mcg(rxe, mgid);
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+       spin_unlock_bh(&rxe->mcg_lock);
 
        return mcg;
 }
@@ -159,17 +158,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
  * @mcg: new mcg object
  *
  * Context: caller should hold rxe->mcg lock
- * Returns: 0 on success else an error
  */
-static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
-                         struct rxe_mcg *mcg)
+static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
+                          struct rxe_mcg *mcg)
 {
-       int err;
-
-       err = rxe_mcast_add(rxe, mgid);
-       if (unlikely(err))
-               return err;
-
        kref_init(&mcg->ref_cnt);
        memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
        INIT_LIST_HEAD(&mcg->qp_list);
@@ -184,8 +176,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
         */
        kref_get(&mcg->ref_cnt);
        __rxe_insert_mcg(mcg);
-
-       return 0;
 }
 
 /**
@@ -198,7 +188,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 {
        struct rxe_mcg *mcg, *tmp;
-       unsigned long flags;
        int err;
 
        if (rxe->attr.max_mcast_grp == 0)
@@ -209,36 +198,38 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
        if (mcg)
                return mcg;
 
+       /* check to see if we have reached limit */
+       if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
+               err = -ENOMEM;
+               goto err_dec;
+       }
+
        /* speculative alloc of new mcg */
        mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
        if (!mcg)
                return ERR_PTR(-ENOMEM);
 
-       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       spin_lock_bh(&rxe->mcg_lock);
        /* re-check to see if someone else just added it */
        tmp = __rxe_lookup_mcg(rxe, mgid);
        if (tmp) {
+               spin_unlock_bh(&rxe->mcg_lock);
+               atomic_dec(&rxe->mcg_num);
                kfree(mcg);
-               mcg = tmp;
-               goto out;
+               return tmp;
        }
 
-       if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
-               err = -ENOMEM;
-               goto err_dec;
-       }
+       __rxe_init_mcg(rxe, mgid, mcg);
+       spin_unlock_bh(&rxe->mcg_lock);
 
-       err = __rxe_init_mcg(rxe, mgid, mcg);
-       if (err)
-               goto err_dec;
-out:
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-       return mcg;
+       /* add mcast address outside of lock */
+       err = rxe_mcast_add(rxe, mgid);
+       if (!err)
+               return mcg;
 
+       kfree(mcg);
 err_dec:
        atomic_dec(&rxe->mcg_num);
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-       kfree(mcg);
        return ERR_PTR(err);
 }
 
@@ -268,7 +259,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
        __rxe_remove_mcg(mcg);
        kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
 
-       rxe_mcast_delete(mcg->rxe, &mcg->mgid);
        atomic_dec(&rxe->mcg_num);
 }
 
@@ -280,11 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
  */
 static void rxe_destroy_mcg(struct rxe_mcg *mcg)
 {
-       unsigned long flags;
+       /* delete mcast address outside of lock */
+       rxe_mcast_del(mcg->rxe, &mcg->mgid);
 
-       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
+       spin_lock_bh(&mcg->rxe->mcg_lock);
        __rxe_destroy_mcg(mcg);
-       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
+       spin_unlock_bh(&mcg->rxe->mcg_lock);
 }
 
 /**
@@ -339,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 {
        struct rxe_dev *rxe = mcg->rxe;
        struct rxe_mca *mca, *tmp;
-       unsigned long flags;
        int err;
 
        /* check to see if the qp is already a member of the group */
-       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       spin_lock_bh(&rxe->mcg_lock);
        list_for_each_entry(mca, &mcg->qp_list, qp_list) {
                if (mca->qp == qp) {
-                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+                       spin_unlock_bh(&rxe->mcg_lock);
                        return 0;
                }
        }
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+       spin_unlock_bh(&rxe->mcg_lock);
 
        /* speculative alloc new mca without using GFP_ATOMIC */
        mca = kzalloc(sizeof(*mca), GFP_KERNEL);
        if (!mca)
                return -ENOMEM;
 
-       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       spin_lock_bh(&rxe->mcg_lock);
        /* re-check to see if someone else just attached qp */
        list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
                if (tmp->qp == qp) {
@@ -371,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
        if (err)
                kfree(mca);
 out:
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+       spin_unlock_bh(&rxe->mcg_lock);
        return err;
 }
 
@@ -405,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 {
        struct rxe_dev *rxe = mcg->rxe;
        struct rxe_mca *mca, *tmp;
-       unsigned long flags;
 
-       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       spin_lock_bh(&rxe->mcg_lock);
        list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
                if (mca->qp == qp) {
                        __rxe_cleanup_mca(mca, mcg);
@@ -421,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
                        if (atomic_read(&mcg->qp_num) <= 0)
                                __rxe_destroy_mcg(mcg);
 
-                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+                       spin_unlock_bh(&rxe->mcg_lock);
                        return 0;
                }
        }
 
        /* we didn't find the qp on the list */
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+       spin_unlock_bh(&rxe->mcg_lock);
        return -EINVAL;
 }
 
index 16fc7ea1298d8d61d04caac621a0727347938fd8..9cd0eaff98dea4f358c5a698d14ed63cbe6f59c2 100644 (file)
@@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
  * It is assumed that the access permissions if originally good
  * are OK and the mappings to be unchanged.
  *
+ * TODO: If someone reregisters an MR to change its size or
+ * access permissions during the processing of an RDMA read
+ * we should kill the responder resource and complete the
+ * operation with an error.
+ *
  * Return: mr on success else NULL
  */
 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
@@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
 
        if (rkey_is_mw(rkey)) {
                mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
-               if (!mw || mw->rkey != rkey)
+               if (!mw)
                        return NULL;
 
-               if (mw->state != RXE_MW_STATE_VALID) {
+               mr = mw->mr;
+               if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
+                   !mr || mr->state != RXE_MR_STATE_VALID) {
                        rxe_put(mw);
                        return NULL;
                }
 
-               mr = mw->mr;
+               rxe_get(mr);
                rxe_put(mw);
-       } else {
-               mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
-               if (!mr || mr->rkey != rkey)
-                       return NULL;
+
+               return mr;
        }
 
-       if (mr->state != RXE_MR_STATE_VALID) {
+       mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+       if (!mr)
+               return NULL;
+
+       if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
                rxe_put(mr);
                return NULL;
        }
@@ -736,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
        }
 
        if (res->state == rdatm_res_state_new) {
-               mr = qp->resp.mr;
-               qp->resp.mr = NULL;
+               if (!res->replay) {
+                       mr = qp->resp.mr;
+                       qp->resp.mr = NULL;
+               } else {
+                       mr = rxe_recheck_mr(qp, res->read.rkey);
+                       if (!mr)
+                               return RESPST_ERR_RKEY_VIOLATION;
+               }
 
                if (res->read.resid <= mtu)
                        opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
index 7acdd3c3a599d05161d4ed64c3581d0e2dd20f20..17f34d584cd9e8d5cf80f179a458eeea17aae5bf 100644 (file)
@@ -968,14 +968,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
 
                siw_cep_set_inuse(new_cep);
                rv = siw_proc_mpareq(new_cep);
-               siw_cep_set_free(new_cep);
-
                if (rv != -EAGAIN) {
                        siw_cep_put(cep);
                        new_cep->listen_cep = NULL;
-                       if (rv)
+                       if (rv) {
+                               siw_cep_set_free(new_cep);
                                goto error;
+                       }
                }
+               siw_cep_set_free(new_cep);
        }
        return;
 
index c28996028e8030d6e2ad844826bcb0610c1d8f70..9a23eed6a4f4106603069387f4c90e6a5055685f 100644 (file)
@@ -61,6 +61,14 @@ static irqreturn_t cypress_sf_irq_handler(int irq, void *devid)
        return IRQ_HANDLED;
 }
 
+static void cypress_sf_disable_regulators(void *arg)
+{
+       struct cypress_sf_data *touchkey = arg;
+
+       regulator_bulk_disable(ARRAY_SIZE(touchkey->regulators),
+                              touchkey->regulators);
+}
+
 static int cypress_sf_probe(struct i2c_client *client)
 {
        struct cypress_sf_data *touchkey;
@@ -121,6 +129,12 @@ static int cypress_sf_probe(struct i2c_client *client)
                return error;
        }
 
+       error = devm_add_action_or_reset(&client->dev,
+                                        cypress_sf_disable_regulators,
+                                        touchkey);
+       if (error)
+               return error;
+
        touchkey->input_dev = devm_input_allocate_device(&client->dev);
        if (!touchkey->input_dev) {
                dev_err(&client->dev, "Failed to allocate input device\n");
index 43375b38ee5926f31a653dfe1efb801b927332b3..8a7ce41b8c56e1b62bd8e8fe4f51204ca55c54c8 100644 (file)
@@ -393,7 +393,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
         * revision register.
         */
        error = pm_runtime_get_sync(dev);
-       if (error) {
+       if (error < 0) {
                dev_err(dev, "pm_runtime_get_sync() failed\n");
                pm_runtime_put_noidle(dev);
                return error;
index 2bd407d86bae529648d6e142da48f7343adcbab8..e9bd36adbe47dab23913aa2cc5cc9f5f315ec6de 100644 (file)
@@ -756,15 +756,12 @@ static int ili251x_firmware_reset(struct i2c_client *client)
        return ili251x_firmware_busy(client);
 }
 
-static void ili251x_hardware_reset(struct device *dev)
+static void ili210x_hardware_reset(struct gpio_desc *reset_gpio)
 {
-       struct i2c_client *client = to_i2c_client(dev);
-       struct ili210x *priv = i2c_get_clientdata(client);
-
        /* Reset the controller */
-       gpiod_set_value_cansleep(priv->reset_gpio, 1);
-       usleep_range(10000, 15000);
-       gpiod_set_value_cansleep(priv->reset_gpio, 0);
+       gpiod_set_value_cansleep(reset_gpio, 1);
+       usleep_range(12000, 15000);
+       gpiod_set_value_cansleep(reset_gpio, 0);
        msleep(300);
 }
 
@@ -773,6 +770,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
                                             const char *buf, size_t count)
 {
        struct i2c_client *client = to_i2c_client(dev);
+       struct ili210x *priv = i2c_get_clientdata(client);
        const char *fwname = ILI251X_FW_FILENAME;
        const struct firmware *fw;
        u16 ac_end, df_end;
@@ -803,7 +801,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
 
        dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname);
 
-       ili251x_hardware_reset(dev);
+       ili210x_hardware_reset(priv->reset_gpio);
 
        error = ili251x_firmware_reset(client);
        if (error)
@@ -858,7 +856,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
        error = count;
 
 exit:
-       ili251x_hardware_reset(dev);
+       ili210x_hardware_reset(priv->reset_gpio);
        dev_dbg(dev, "Firmware update ended, error=%i\n", error);
        enable_irq(client->irq);
        kfree(fwbuf);
@@ -951,9 +949,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
                if (error)
                        return error;
 
-               usleep_range(50, 100);
-               gpiod_set_value_cansleep(reset_gpio, 0);
-               msleep(100);
+               ili210x_hardware_reset(reset_gpio);
        }
 
        priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
index 9050ca1f4285cf0a9f9ff9190396a83a35d13f5d..808f6e7a80482796b23f05899147c788bf91a814 100644 (file)
@@ -1087,9 +1087,15 @@ static int of_count_icc_providers(struct device_node *np)
 {
        struct device_node *child;
        int count = 0;
+       const struct of_device_id __maybe_unused ignore_list[] = {
+               { .compatible = "qcom,sc7180-ipa-virt" },
+               { .compatible = "qcom,sdx55-ipa-virt" },
+               {}
+       };
 
        for_each_available_child_of_node(np, child) {
-               if (of_property_read_bool(child, "#interconnect-cells"))
+               if (of_property_read_bool(child, "#interconnect-cells") &&
+                   likely(!of_match_node(ignore_list, child)))
                        count++;
                count += of_count_icc_providers(child);
        }
index 12d59c36df5302aa0c33bdcdb7a4d40c84a46cbb..5f7c0f85fa8e3e5f24591bb6493254f59deaeb18 100644 (file)
@@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM
 DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC);
 DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC);
 DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC);
-DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE);
 DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1);
 DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC);
 DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC);
@@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4);
 DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC);
 DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC);
 DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4);
-DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8);
 DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4);
 DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC);
 DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC);
@@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
 DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
 DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf);
 DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
 DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc);
 DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9);
 DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu);
@@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = {
        .num_bcms = ARRAY_SIZE(gem_noc_bcms),
 };
 
-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
-       &bcm_ip0,
-};
-
-static struct qcom_icc_node *ipa_virt_nodes[] = {
-       [MASTER_IPA_CORE] = &ipa_core_master,
-       [SLAVE_IPA_CORE] = &ipa_core_slave,
-};
-
-static struct qcom_icc_desc sc7180_ipa_virt = {
-       .nodes = ipa_virt_nodes,
-       .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
-       .bcms = ipa_virt_bcms,
-       .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
-};
-
 static struct qcom_icc_bcm *mc_virt_bcms[] = {
        &bcm_acv,
        &bcm_mc0,
@@ -519,8 +500,6 @@ static const struct of_device_id qnoc_of_match[] = {
          .data = &sc7180_dc_noc},
        { .compatible = "qcom,sc7180-gem-noc",
          .data = &sc7180_gem_noc},
-       { .compatible = "qcom,sc7180-ipa-virt",
-         .data = &sc7180_ipa_virt},
        { .compatible = "qcom,sc7180-mc-virt",
          .data = &sc7180_mc_virt},
        { .compatible = "qcom,sc7180-mmss-noc",
index 03d604f84cc57fdab892d7c343896e6ab1e66b16..e3ac25a997b7109fe011c6acf67c71d22946a5ec 100644 (file)
@@ -18,7 +18,6 @@
 #include "icc-rpmh.h"
 #include "sdx55.h"
 
-DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE);
 DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0);
 DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
 DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC);
@@ -40,7 +39,6 @@ DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC);
 DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
 DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
 DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC);
-DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8);
 DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4);
 DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0);
 DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC);
@@ -82,7 +80,6 @@ DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8);
 DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
 DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
 DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
 DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg);
 DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr);
 DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie);
@@ -219,22 +216,6 @@ static const struct qcom_icc_desc sdx55_system_noc = {
        .num_bcms = ARRAY_SIZE(system_noc_bcms),
 };
 
-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
-       &bcm_ip0,
-};
-
-static struct qcom_icc_node *ipa_virt_nodes[] = {
-       [MASTER_IPA_CORE] = &ipa_core_master,
-       [SLAVE_IPA_CORE] = &ipa_core_slave,
-};
-
-static const struct qcom_icc_desc sdx55_ipa_virt = {
-       .nodes = ipa_virt_nodes,
-       .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
-       .bcms = ipa_virt_bcms,
-       .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
-};
-
 static const struct of_device_id qnoc_of_match[] = {
        { .compatible = "qcom,sdx55-mc-virt",
          .data = &sdx55_mc_virt},
@@ -242,8 +223,6 @@ static const struct of_device_id qnoc_of_match[] = {
          .data = &sdx55_mem_noc},
        { .compatible = "qcom,sdx55-system-noc",
          .data = &sdx55_system_noc},
-       { .compatible = "qcom,sdx55-ipa-virt",
-         .data = &sdx55_ipa_virt},
        { }
 };
 MODULE_DEVICE_TABLE(of, qnoc_of_match);
index decafb07ad083167ca89fcc8406b69088d7efcc0..8af0242a90d91f40b62ed1ac3fd9a9ea1c8fe79b 100644 (file)
@@ -773,6 +773,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
        .get_resv_regions = apple_dart_get_resv_regions,
        .put_resv_regions = generic_iommu_put_resv_regions,
        .pgsize_bitmap = -1UL, /* Restricted during dart probe */
+       .owner = THIS_MODULE,
        .default_domain_ops = &(const struct iommu_domain_ops) {
                .attach_dev     = apple_dart_attach_dev,
                .detach_dev     = apple_dart_detach_dev,
@@ -859,16 +860,15 @@ static int apple_dart_probe(struct platform_device *pdev)
        dart->dev = dev;
        spin_lock_init(&dart->lock);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+       if (IS_ERR(dart->regs))
+               return PTR_ERR(dart->regs);
+
        if (resource_size(res) < 0x4000) {
                dev_err(dev, "MMIO region too small (%pr)\n", res);
                return -EINVAL;
        }
 
-       dart->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(dart->regs))
-               return PTR_ERR(dart->regs);
-
        dart->irq = platform_get_irq(pdev, 0);
        if (dart->irq < 0)
                return -ENODEV;
index 22ddd05bbdcd0dbf2c0a50207ca7aa61c8fcdc8a..c623dae1e1154573bfddc4697c4ecdb72fe7b380 100644 (file)
@@ -183,7 +183,14 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
 {
        struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
        struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
-       size_t size = end - start + 1;
+       size_t size;
+
+       /*
+        * The mm_types defines vm_end as the first byte after the end address,
+        * different from IOMMU subsystem using the last address of an address
+        * range. So do a simple translation here by calculating size correctly.
+        */
+       size = end - start;
 
        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
                arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
index 01e9b50b10a18d564853a1a4a9b06709f0c8039c..87bf522b9d2eec034d66bf6c5d144384ae7093fa 100644 (file)
@@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
                        dev_name(dev), err);
 }
 
+static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+                                   struct io_pgtable_cfg *pgtbl_cfg,
+                                   struct device *dev)
+{
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       const struct device_node *np = smmu->dev->of_node;
+
+       /*
+        * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+        * entries to not be invalidated correctly. The problem is that the walk
+        * cache index generated for IOVA is not same across translation and
+        * invalidation requests. This is leading to page faults when PMD entry
+        * is released during unmap and populated with new PTE table during
+        * subsequent map request. Disabling large page mappings avoids the
+        * release of PMD entry and avoid translations seeing stale PMD entry in
+        * walk cache.
+        * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+        * Tegra234.
+        */
+       if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+           of_device_is_compatible(np, "nvidia,tegra194-smmu")) {
+               smmu->pgsize_bitmap = PAGE_SIZE;
+               pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap;
+       }
+
+       return 0;
+}
+
 static const struct arm_smmu_impl nvidia_smmu_impl = {
        .read_reg = nvidia_smmu_read_reg,
        .write_reg = nvidia_smmu_write_reg,
@@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
        .global_fault = nvidia_smmu_global_fault,
        .context_fault = nvidia_smmu_context_fault,
        .probe_finalize = nvidia_smmu_probe_finalize,
+       .init_context = nvidia_smmu_init_context,
 };
 
 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
        .probe_finalize = nvidia_smmu_probe_finalize,
+       .init_context = nvidia_smmu_init_context,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
index df5c62ecf942b8e0b402cf05dbb79a9ee95835ed..0ea47e17b379e5edd0a90d7e292289e6a5eefa84 100644 (file)
@@ -1588,7 +1588,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
                                  unsigned long pfn, unsigned int pages,
                                  int ih, int map)
 {
-       unsigned int mask = ilog2(__roundup_pow_of_two(pages));
+       unsigned int aligned_pages = __roundup_pow_of_two(pages);
+       unsigned int mask = ilog2(aligned_pages);
        uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
        u16 did = domain->iommu_did[iommu->seq_id];
 
@@ -1600,10 +1601,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
        if (domain_use_first_level(domain)) {
                qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
        } else {
+               unsigned long bitmask = aligned_pages - 1;
+
+               /*
+                * PSI masks the low order bits of the base address. If the
+                * address isn't aligned to the mask, then compute a mask value
+                * needed to ensure the target range is flushed.
+                */
+               if (unlikely(bitmask & pfn)) {
+                       unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+                       /*
+                        * Since end_pfn <= pfn + bitmask, the only way bits
+                        * higher than bitmask can differ in pfn and end_pfn is
+                        * by carrying. This means after masking out bitmask,
+                        * high bits starting with the first set bit in
+                        * shared_bits are all equal in both pfn and end_pfn.
+                        */
+                       shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+                       mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+               }
+
                /*
                 * Fallback to domain selective flush if no PSI support or
-                * the size is too big. PSI requires page size to be 2 ^ x,
-                * and the base address is naturally aligned to the size.
+                * the size is too big.
                 */
                if (!cap_pgsel_inv(iommu->cap) ||
                    mask > cap_max_amask_val(iommu->cap))
index 23a38763c1d1ff6efdc519c42acf88f78621f500..7ee37d996e157234147fe12e6d870ef3ef23025b 100644 (file)
@@ -757,6 +757,10 @@ bad_req:
                        goto bad_req;
                }
 
+               /* Drop Stop Marker message. No need for a response. */
+               if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
+                       goto prq_advance;
+
                if (!svm || svm->pasid != req->pasid) {
                        /*
                         * It can't go away, because the driver is not permitted
index f2c45b85b9fc252ad1e08352ac32e6053064a2c3..857d4c2fd1a206a0a944f3acf5e6ad79f4c9f21b 100644 (file)
@@ -506,6 +506,13 @@ int iommu_get_group_resv_regions(struct iommu_group *group,
        list_for_each_entry(device, &group->devices, list) {
                struct list_head dev_resv_regions;
 
+               /*
+                * Non-API groups still expose reserved_regions in sysfs,
+                * so filter out calls that get here that way.
+                */
+               if (!device->dev->iommu)
+                       break;
+
                INIT_LIST_HEAD(&dev_resv_regions);
                iommu_get_resv_regions(device->dev, &dev_resv_regions);
                ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
@@ -3019,7 +3026,7 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
                return -EACCES;
 
-       if (WARN_ON(!group))
+       if (WARN_ON(!group) || !group->default_domain)
                return -EINVAL;
 
        if (sysfs_streq(buf, "identity"))
index 4aab631ef51738a802fff2f70d610c1693cc1f7c..d9cf2820c02eae1ad3b1c0db47a36074671c7f4c 100644 (file)
@@ -1661,7 +1661,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev)
        num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
                                                     sizeof(phandle));
        if (num_iommus < 0)
-               return 0;
+               return ERR_PTR(-ENODEV);
 
        arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL);
        if (!arch_data)
index 680d2fcf2686386bfbee14e6e7eb94e13303070f..135c156673a733a7ff713e1a15e11191f40f51b4 100644 (file)
@@ -257,6 +257,18 @@ config ST_IRQCHIP
        help
          Enables SysCfg Controlled IRQs on STi based platforms.
 
+config SUN4I_INTC
+       bool
+
+config SUN6I_R_INTC
+       bool
+       select IRQ_DOMAIN_HIERARCHY
+       select IRQ_FASTEOI_HIERARCHY_HANDLERS
+
+config SUNXI_NMI_INTC
+       bool
+       select GENERIC_IRQ_CHIP
+
 config TB10X_IRQC
        bool
        select IRQ_DOMAIN
@@ -433,6 +445,7 @@ config QCOM_PDC
 config QCOM_MPM
        tristate "QCOM MPM"
        depends on ARCH_QCOM
+       depends on MAILBOX
        select IRQ_DOMAIN_HIERARCHY
        help
          MSM Power Manager driver to manage and configure wakeup
index 160a1d8ceaa96e6f255c4db8a6332f4aa37481b9..9b1ffb0f98cc090135cfa54b58b129e64ed5656d 100644 (file)
@@ -23,9 +23,9 @@ obj-$(CONFIG_OMPIC)                   += irq-ompic.o
 obj-$(CONFIG_OR1K_PIC)                 += irq-or1k-pic.o
 obj-$(CONFIG_ORION_IRQCHIP)            += irq-orion.o
 obj-$(CONFIG_OMAP_IRQCHIP)             += irq-omap-intc.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sun4i.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sun6i-r.o
-obj-$(CONFIG_ARCH_SUNXI)               += irq-sunxi-nmi.o
+obj-$(CONFIG_SUN4I_INTC)               += irq-sun4i.o
+obj-$(CONFIG_SUN6I_R_INTC)             += irq-sun6i-r.o
+obj-$(CONFIG_SUNXI_NMI_INTC)           += irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)            += spear-shirq.o
 obj-$(CONFIG_ARM_GIC)                  += irq-gic.o irq-gic-common.o
 obj-$(CONFIG_ARM_GIC_PM)               += irq-gic-pm.o
index 5b8d571c041dccfe80fbad1756ebfd77cf7fb7ed..ee18eb3e72b72ff64e116b86c7d97e7dbb53de59 100644 (file)
@@ -209,15 +209,29 @@ static struct msi_domain_info armada_370_xp_msi_domain_info = {
 
 static void armada_370_xp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
+       unsigned int cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
+
        msg->address_lo = lower_32_bits(msi_doorbell_addr);
        msg->address_hi = upper_32_bits(msi_doorbell_addr);
-       msg->data = 0xf00 | (data->hwirq + PCI_MSI_DOORBELL_START);
+       msg->data = BIT(cpu + 8) | (data->hwirq + PCI_MSI_DOORBELL_START);
 }
 
 static int armada_370_xp_msi_set_affinity(struct irq_data *irq_data,
                                          const struct cpumask *mask, bool force)
 {
-        return -EINVAL;
+       unsigned int cpu;
+
+       if (!force)
+               cpu = cpumask_any_and(mask, cpu_online_mask);
+       else
+               cpu = cpumask_first(mask);
+
+       if (cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       irq_data_update_effective_affinity(irq_data, cpumask_of(cpu));
+
+       return IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip armada_370_xp_msi_bottom_irq_chip = {
@@ -264,11 +278,21 @@ static const struct irq_domain_ops armada_370_xp_msi_domain_ops = {
        .free   = armada_370_xp_msi_free,
 };
 
-static int armada_370_xp_msi_init(struct device_node *node,
-                                 phys_addr_t main_int_phys_base)
+static void armada_370_xp_msi_reenable_percpu(void)
 {
        u32 reg;
 
+       /* Enable MSI doorbell mask and combined cpu local interrupt */
+       reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
+               | PCI_MSI_DOORBELL_MASK;
+       writel(reg, per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
+       /* Unmask local doorbell interrupt */
+       writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+}
+
+static int armada_370_xp_msi_init(struct device_node *node,
+                                 phys_addr_t main_int_phys_base)
+{
        msi_doorbell_addr = main_int_phys_base +
                ARMADA_370_XP_SW_TRIG_INT_OFFS;
 
@@ -287,18 +311,13 @@ static int armada_370_xp_msi_init(struct device_node *node,
                return -ENOMEM;
        }
 
-       reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
-               | PCI_MSI_DOORBELL_MASK;
-
-       writel(reg, per_cpu_int_base +
-              ARMADA_370_XP_IN_DRBEL_MSK_OFFS);
-
-       /* Unmask IPI interrupt */
-       writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+       armada_370_xp_msi_reenable_percpu();
 
        return 0;
 }
 #else
+static void armada_370_xp_msi_reenable_percpu(void) {}
+
 static inline int armada_370_xp_msi_init(struct device_node *node,
                                         phys_addr_t main_int_phys_base)
 {
@@ -308,7 +327,16 @@ static inline int armada_370_xp_msi_init(struct device_node *node,
 
 static void armada_xp_mpic_perf_init(void)
 {
-       unsigned long cpuid = cpu_logical_map(smp_processor_id());
+       unsigned long cpuid;
+
+       /*
+        * This Performance Counter Overflow interrupt is specific for
+        * Armada 370 and XP. It is not available on Armada 375, 38x and 39x.
+        */
+       if (!of_machine_is_compatible("marvell,armada-370-xp"))
+               return;
+
+       cpuid = cpu_logical_map(smp_processor_id());
 
        /* Enable Performance Counter Overflow interrupts */
        writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid),
@@ -501,6 +529,8 @@ static void armada_xp_mpic_reenable_percpu(void)
        }
 
        ipi_resume();
+
+       armada_370_xp_msi_reenable_percpu();
 }
 
 static int armada_xp_mpic_starting_cpu(unsigned int cpu)
index a47db16ff9603e1ab18575521d768c0d64198447..9c9fc3e2967ede2e3be21895af3d20229ee4ab8e 100644 (file)
@@ -77,8 +77,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node,
        }
 
        i2c_ic->parent_irq = irq_of_parse_and_map(node, 0);
-       if (i2c_ic->parent_irq < 0) {
-               ret = i2c_ic->parent_irq;
+       if (!i2c_ic->parent_irq) {
+               ret = -EINVAL;
                goto err_iounmap;
        }
 
index 18b77c3e6db4ba939b79152b5df8b79316c60e86..279e92cf0b16bcddd40088cbc3cc8de9da820617 100644 (file)
@@ -157,8 +157,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
        }
 
        irq = irq_of_parse_and_map(node, 0);
-       if (irq < 0) {
-               rc = irq;
+       if (!irq) {
+               rc = -EINVAL;
                goto err;
        }
 
index fd079215c17fd366b8bd420395a2dbd7e9c10e85..142a7431745f940cc2ce327d8afc5ab70f7e6908 100644 (file)
@@ -315,7 +315,7 @@ static int __init bcm6345_l1_of_init(struct device_node *dn,
                        cpumask_set_cpu(idx, &intc->cpumask);
        }
 
-       if (!cpumask_weight(&intc->cpumask)) {
+       if (cpumask_empty(&intc->cpumask)) {
                ret = -ENODEV;
                goto out_free;
        }
index d36f536506ba48472bed47c93832aac7234a17f7..42d8a2438ebc2910f79bb852146a1ba338c4cbe9 100644 (file)
@@ -136,11 +136,11 @@ static inline bool handle_irq_perbit(struct pt_regs *regs, u32 hwirq,
                                     u32 irq_base)
 {
        if (hwirq == 0)
-               return 0;
+               return false;
 
        generic_handle_domain_irq(root_domain, irq_base + __fls(hwirq));
 
-       return 1;
+       return true;
 }
 
 /* gx6605s 64 irqs interrupt controller */
index cd772973114afab89e9afe734fa91053eaa2121b..5ff09de6c48fcbec5a9b954fa284c02617204de4 100644 (file)
@@ -1624,7 +1624,7 @@ static int its_select_cpu(struct irq_data *d,
 
                cpu = cpumask_pick_least_loaded(d, tmpmask);
        } else {
-               cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask);
+               cpumask_copy(tmpmask, aff_mask);
 
                /* If we cannot cross sockets, limit the search to that node */
                if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
@@ -3011,18 +3011,12 @@ static int __init allocate_lpi_tables(void)
        return 0;
 }
 
-static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
+static u64 read_vpend_dirty_clear(void __iomem *vlpi_base)
 {
        u32 count = 1000000;    /* 1s! */
        bool clean;
        u64 val;
 
-       val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
-       val &= ~GICR_VPENDBASER_Valid;
-       val &= ~clr;
-       val |= set;
-       gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
        do {
                val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
                clean = !(val & GICR_VPENDBASER_Dirty);
@@ -3033,10 +3027,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
                }
        } while (!clean && count);
 
-       if (unlikely(val & GICR_VPENDBASER_Dirty)) {
+       if (unlikely(!clean))
                pr_err_ratelimited("ITS virtual pending table not cleaning\n");
+
+       return val;
+}
+
+static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
+{
+       u64 val;
+
+       /* Make sure we wait until the RD is done with the initial scan */
+       val = read_vpend_dirty_clear(vlpi_base);
+       val &= ~GICR_VPENDBASER_Valid;
+       val &= ~clr;
+       val |= set;
+       gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+
+       val = read_vpend_dirty_clear(vlpi_base);
+       if (unlikely(val & GICR_VPENDBASER_Dirty))
                val |= GICR_VPENDBASER_PendingLast;
-       }
 
        return val;
 }
index 0efe1a9a9f3b234930c2a7a899a4ec16d0d4447f..2be8dea6b6b00149efa613ed541260723239e61e 100644 (file)
@@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
        }
 }
 
-static void gic_do_wait_for_rwp(void __iomem *base)
+static void gic_do_wait_for_rwp(void __iomem *base, u32 bit)
 {
        u32 count = 1000000;    /* 1s! */
 
-       while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
+       while (readl_relaxed(base + GICD_CTLR) & bit) {
                count--;
                if (!count) {
                        pr_err_ratelimited("RWP timeout, gone fishing\n");
@@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base)
 /* Wait for completion of a distributor change */
 static void gic_dist_wait_for_rwp(void)
 {
-       gic_do_wait_for_rwp(gic_data.dist_base);
+       gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP);
 }
 
 /* Wait for completion of a redistributor change */
 static void gic_redist_wait_for_rwp(void)
 {
-       gic_do_wait_for_rwp(gic_data_rdist_rd_base());
+       gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP);
 }
 
 #ifdef CONFIG_ARM64
@@ -352,28 +352,27 @@ static int gic_peek_irq(struct irq_data *d, u32 offset)
 
 static void gic_poke_irq(struct irq_data *d, u32 offset)
 {
-       void (*rwp_wait)(void);
        void __iomem *base;
        u32 index, mask;
 
        offset = convert_offset_index(d, offset, &index);
        mask = 1 << (index % 32);
 
-       if (gic_irq_in_rdist(d)) {
+       if (gic_irq_in_rdist(d))
                base = gic_data_rdist_sgi_base();
-               rwp_wait = gic_redist_wait_for_rwp;
-       } else {
+       else
                base = gic_data.dist_base;
-               rwp_wait = gic_dist_wait_for_rwp;
-       }
 
        writel_relaxed(mask, base + offset + (index / 32) * 4);
-       rwp_wait();
 }
 
 static void gic_mask_irq(struct irq_data *d)
 {
        gic_poke_irq(d, GICD_ICENABLER);
+       if (gic_irq_in_rdist(d))
+               gic_redist_wait_for_rwp();
+       else
+               gic_dist_wait_for_rwp();
 }
 
 static void gic_eoimode1_mask_irq(struct irq_data *d)
@@ -420,7 +419,11 @@ static int gic_irq_set_irqchip_state(struct irq_data *d,
                break;
 
        case IRQCHIP_STATE_MASKED:
-               reg = val ? GICD_ICENABLER : GICD_ISENABLER;
+               if (val) {
+                       gic_mask_irq(d);
+                       return 0;
+               }
+               reg = GICD_ISENABLER;
                break;
 
        default:
@@ -556,7 +559,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
 
 static void gic_eoi_irq(struct irq_data *d)
 {
-       gic_write_eoir(gic_irq(d));
+       write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
+       isb();
 }
 
 static void gic_eoimode1_eoi_irq(struct irq_data *d)
@@ -574,7 +578,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
        enum gic_intid_range range;
        unsigned int irq = gic_irq(d);
-       void (*rwp_wait)(void);
        void __iomem *base;
        u32 offset, index;
        int ret;
@@ -590,17 +593,14 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
            type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
                return -EINVAL;
 
-       if (gic_irq_in_rdist(d)) {
+       if (gic_irq_in_rdist(d))
                base = gic_data_rdist_sgi_base();
-               rwp_wait = gic_redist_wait_for_rwp;
-       } else {
+       else
                base = gic_data.dist_base;
-               rwp_wait = gic_dist_wait_for_rwp;
-       }
 
        offset = convert_offset_index(d, GICD_ICFGR, &index);
 
-       ret = gic_configure_irq(index, type, base + offset, rwp_wait);
+       ret = gic_configure_irq(index, type, base + offset, NULL);
        if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) {
                /* Misconfigured PPIs are usually not fatal */
                pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq);
@@ -640,82 +640,101 @@ static void gic_deactivate_unhandled(u32 irqnr)
                if (irqnr < 8192)
                        gic_write_dir(irqnr);
        } else {
-               gic_write_eoir(irqnr);
+               write_gicreg(irqnr, ICC_EOIR1_EL1);
+               isb();
        }
 }
 
-static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+/*
+ * Follow a read of the IAR with any HW maintenance that needs to happen prior
+ * to invoking the relevant IRQ handler. We must do two things:
+ *
+ * (1) Ensure instruction ordering between a read of IAR and subsequent
+ *     instructions in the IRQ handler using an ISB.
+ *
+ *     It is possible for the IAR to report an IRQ which was signalled *after*
+ *     the CPU took an IRQ exception as multiple interrupts can race to be
+ *     recognized by the GIC, earlier interrupts could be withdrawn, and/or
+ *     later interrupts could be prioritized by the GIC.
+ *
+ *     For devices which are tightly coupled to the CPU, such as PMUs, a
+ *     context synchronization event is necessary to ensure that system
+ *     register state is not stale, as these may have been indirectly written
+ *     *after* exception entry.
+ *
+ * (2) Deactivate the interrupt when EOI mode 1 is in use.
+ */
+static inline void gic_complete_ack(u32 irqnr)
 {
-       bool irqs_enabled = interrupts_enabled(regs);
-       int err;
-
-       if (irqs_enabled)
-               nmi_enter();
-
        if (static_branch_likely(&supports_deactivate_key))
-               gic_write_eoir(irqnr);
-       /*
-        * Leave the PSR.I bit set to prevent other NMIs to be
-        * received while handling this one.
-        * PSR.I will be restored when we ERET to the
-        * interrupted context.
-        */
-       err = generic_handle_domain_nmi(gic_data.domain, irqnr);
-       if (err)
-               gic_deactivate_unhandled(irqnr);
+               write_gicreg(irqnr, ICC_EOIR1_EL1);
 
-       if (irqs_enabled)
-               nmi_exit();
+       isb();
 }
 
-static u32 do_read_iar(struct pt_regs *regs)
+static bool gic_rpr_is_nmi_prio(void)
 {
-       u32 iar;
+       if (!gic_supports_nmi())
+               return false;
 
-       if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
-               u64 pmr;
+       return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI));
+}
 
-               /*
-                * We were in a context with IRQs disabled. However, the
-                * entry code has set PMR to a value that allows any
-                * interrupt to be acknowledged, and not just NMIs. This can
-                * lead to surprising effects if the NMI has been retired in
-                * the meantime, and that there is an IRQ pending. The IRQ
-                * would then be taken in NMI context, something that nobody
-                * wants to debug twice.
-                *
-                * Until we sort this, drop PMR again to a level that will
-                * actually only allow NMIs before reading IAR, and then
-                * restore it to what it was.
-                */
-               pmr = gic_read_pmr();
-               gic_pmr_mask_irqs();
-               isb();
+static bool gic_irqnr_is_special(u32 irqnr)
+{
+       return irqnr >= 1020 && irqnr <= 1023;
+}
+
+static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs)
+{
+       if (gic_irqnr_is_special(irqnr))
+               return;
 
-               iar = gic_read_iar();
+       gic_complete_ack(irqnr);
 
-               gic_write_pmr(pmr);
-       } else {
-               iar = gic_read_iar();
+       if (generic_handle_domain_irq(gic_data.domain, irqnr)) {
+               WARN_ONCE(true, "Unexpected interrupt (irqnr %u)\n", irqnr);
+               gic_deactivate_unhandled(irqnr);
        }
+}
 
-       return iar;
+static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+{
+       if (gic_irqnr_is_special(irqnr))
+               return;
+
+       gic_complete_ack(irqnr);
+
+       if (generic_handle_domain_nmi(gic_data.domain, irqnr)) {
+               WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr);
+               gic_deactivate_unhandled(irqnr);
+       }
 }
 
-static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+/*
+ * An exception has been taken from a context with IRQs enabled, and this could
+ * be an IRQ or an NMI.
+ *
+ * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear
+ * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning,
+ * after handling any NMI but before handling any IRQ.
+ *
+ * The entry code has performed IRQ entry, and if an NMI is detected we must
+ * perform NMI entry/exit around invoking the handler.
+ */
+static void __gic_handle_irq_from_irqson(struct pt_regs *regs)
 {
+       bool is_nmi;
        u32 irqnr;
 
-       irqnr = do_read_iar(regs);
+       irqnr = gic_read_iar();
 
-       /* Check for special IDs first */
-       if ((irqnr >= 1020 && irqnr <= 1023))
-               return;
+       is_nmi = gic_rpr_is_nmi_prio();
 
-       if (gic_supports_nmi() &&
-           unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) {
-               gic_handle_nmi(irqnr, regs);
-               return;
+       if (is_nmi) {
+               nmi_enter();
+               __gic_handle_nmi(irqnr, regs);
+               nmi_exit();
        }
 
        if (gic_prio_masking_enabled()) {
@@ -723,15 +742,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
                gic_arch_enable_irqs();
        }
 
-       if (static_branch_likely(&supports_deactivate_key))
-               gic_write_eoir(irqnr);
-       else
-               isb();
+       if (!is_nmi)
+               __gic_handle_irq(irqnr, regs);
+}
 
-       if (generic_handle_domain_irq(gic_data.domain, irqnr)) {
-               WARN_ONCE(true, "Unexpected interrupt received!\n");
-               gic_deactivate_unhandled(irqnr);
-       }
+/*
+ * An exception has been taken from a context with IRQs disabled, which can only
+ * be an NMI.
+ *
+ * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave
+ * DAIF.IF (and ICC_PMR_EL1) unchanged.
+ *
+ * The entry code has performed NMI entry.
+ */
+static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs)
+{
+       u64 pmr;
+       u32 irqnr;
+
+       /*
+        * We were in a context with IRQs disabled. However, the
+        * entry code has set PMR to a value that allows any
+        * interrupt to be acknowledged, and not just NMIs. This can
+        * lead to surprising effects if the NMI has been retired in
+        * the meantime, and that there is an IRQ pending. The IRQ
+        * would then be taken in NMI context, something that nobody
+        * wants to debug twice.
+        *
+        * Until we sort this, drop PMR again to a level that will
+        * actually only allow NMIs before reading IAR, and then
+        * restore it to what it was.
+        */
+       pmr = gic_read_pmr();
+       gic_pmr_mask_irqs();
+       isb();
+       irqnr = gic_read_iar();
+       gic_write_pmr(pmr);
+
+       __gic_handle_nmi(irqnr, regs);
+}
+
+static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+{
+       if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs)))
+               __gic_handle_irq_from_irqsoff(regs);
+       else
+               __gic_handle_irq_from_irqson(regs);
 }
 
 static u32 gic_get_pribits(void)
@@ -807,8 +863,8 @@ static void __init gic_dist_init(void)
        for (i = 0; i < GIC_ESPI_NR; i += 4)
                writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i);
 
-       /* Now do the common stuff, and wait for the distributor to drain */
-       gic_dist_config(base, GIC_LINE_NR, gic_dist_wait_for_rwp);
+       /* Now do the common stuff */
+       gic_dist_config(base, GIC_LINE_NR, NULL);
 
        val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1;
        if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) {
@@ -816,8 +872,9 @@ static void __init gic_dist_init(void)
                val |= GICD_CTLR_nASSGIreq;
        }
 
-       /* Enable distributor with ARE, Group1 */
+       /* Enable distributor with ARE, Group1, and wait for it to drain */
        writel_relaxed(val, base + GICD_CTLR);
+       gic_dist_wait_for_rwp();
 
        /*
         * Set all global interrupts to the boot CPU only. ARE must be
@@ -919,6 +976,7 @@ static int __gic_update_rdist_properties(struct redist_region *region,
                                         void __iomem *ptr)
 {
        u64 typer = gic_read_typer(ptr + GICR_TYPER);
+       u32 ctlr = readl_relaxed(ptr + GICR_CTLR);
 
        /* Boot-time cleanip */
        if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) {
@@ -938,9 +996,18 @@ static int __gic_update_rdist_properties(struct redist_region *region,
 
        gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
 
-       /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */
+       /*
+        * TYPER.RVPEID implies some form of DirectLPI, no matter what the
+        * doc says... :-/ And CTLR.IR implies another subset of DirectLPI
+        * that the ITS driver can make use of for LPIs (and not VLPIs).
+        *
+        * These are 3 different ways to express the same thing, depending
+        * on the revision of the architecture and its relaxations over
+        * time. Just group them under the 'direct_lpi' banner.
+        */
        gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID);
        gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) |
+                                          !!(ctlr & GICR_CTLR_IR) |
                                           gic_data.rdists.has_rvpeid);
        gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY);
 
@@ -962,7 +1029,11 @@ static void gic_update_rdist_properties(void)
        gic_iterate_rdists(__gic_update_rdist_properties);
        if (WARN_ON(gic_data.ppi_nr == UINT_MAX))
                gic_data.ppi_nr = 0;
-       pr_info("%d PPIs implemented\n", gic_data.ppi_nr);
+       pr_info("GICv3 features: %d PPIs%s%s\n",
+               gic_data.ppi_nr,
+               gic_data.has_rss ? ", RSS" : "",
+               gic_data.rdists.has_direct_lpi ? ", DirectLPI" : "");
+
        if (gic_data.rdists.has_vlpis)
                pr_info("GICv4 features: %s%s%s\n",
                        gic_data.rdists.has_direct_lpi ? "DirectLPI " : "",
@@ -1284,8 +1355,6 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
         */
        if (enabled)
                gic_unmask_irq(d);
-       else
-               gic_dist_wait_for_rwp();
 
        irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
@@ -1466,6 +1535,12 @@ static int gic_irq_domain_translate(struct irq_domain *d,
                if(fwspec->param_count != 2)
                        return -EINVAL;
 
+               if (fwspec->param[0] < 16) {
+                       pr_err(FW_BUG "Illegal GSI%d translation request\n",
+                              fwspec->param[0]);
+                       return -EINVAL;
+               }
+
                *hwirq = fwspec->param[0];
                *type = fwspec->param[1];
 
@@ -1797,8 +1872,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
        irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
 
        gic_data.has_rss = !!(typer & GICD_TYPER_RSS);
-       pr_info("Distributor has %sRange Selector support\n",
-               gic_data.has_rss ? "" : "no ");
 
        if (typer & GICD_TYPER_MBIS) {
                err = mbi_init(handle, gic_data.domain);
@@ -1974,10 +2047,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
        u32 nr_redist_regions;
        int err, i;
 
-       dist_base = of_iomap(node, 0);
-       if (!dist_base) {
+       dist_base = of_io_request_and_map(node, 0, "GICD");
+       if (IS_ERR(dist_base)) {
                pr_err("%pOF: unable to map gic dist registers\n", node);
-               return -ENXIO;
+               return PTR_ERR(dist_base);
        }
 
        err = gic_validate_dist_version(dist_base);
@@ -2001,8 +2074,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
                int ret;
 
                ret = of_address_to_resource(node, 1 + i, &res);
-               rdist_regs[i].redist_base = of_iomap(node, 1 + i);
-               if (ret || !rdist_regs[i].redist_base) {
+               rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR");
+               if (ret || IS_ERR(rdist_regs[i].redist_base)) {
                        pr_err("%pOF: couldn't map region %d\n", node, i);
                        err = -ENODEV;
                        goto out_unmap_rdist;
@@ -2028,7 +2101,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 
 out_unmap_rdist:
        for (i = 0; i < nr_redist_regions; i++)
-               if (rdist_regs[i].redist_base)
+               if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base))
                        iounmap(rdist_regs[i].redist_base);
        kfree(rdist_regs);
 out_unmap_dist:
@@ -2075,6 +2148,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header,
                pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
                return -ENOMEM;
        }
+       request_mem_region(redist->base_address, redist->length, "GICR");
 
        gic_acpi_register_redist(redist->base_address, redist_base);
        return 0;
@@ -2097,6 +2171,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
        redist_base = ioremap(gicc->gicr_base_address, size);
        if (!redist_base)
                return -ENOMEM;
+       request_mem_region(gicc->gicr_base_address, size, "GICR");
 
        gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
        return 0;
@@ -2298,6 +2373,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
                pr_err("Unable to map GICD registers\n");
                return -ENOMEM;
        }
+       request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");
 
        err = gic_validate_dist_version(acpi_data.dist_base);
        if (err) {
index 58ba835bee1f393fe4be3b0c3dab04367fc0c9bb..820404cb56bc7396a5c5aa51ef8c76c3ccb782a9 100644 (file)
@@ -1115,7 +1115,8 @@ static int gic_irq_domain_translate(struct irq_domain *d,
                *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
 
                /* Make it clear that broken DTs are... broken */
-               WARN_ON(*type == IRQ_TYPE_NONE);
+               WARN(*type == IRQ_TYPE_NONE,
+                    "HW irq %ld has invalid type\n", *hwirq);
                return 0;
        }
 
@@ -1123,10 +1124,17 @@ static int gic_irq_domain_translate(struct irq_domain *d,
                if(fwspec->param_count != 2)
                        return -EINVAL;
 
+               if (fwspec->param[0] < 16) {
+                       pr_err(FW_BUG "Illegal GSI%d translation request\n",
+                              fwspec->param[0]);
+                       return -EINVAL;
+               }
+
                *hwirq = fwspec->param[0];
                *type = fwspec->param[1];
 
-               WARN_ON(*type == IRQ_TYPE_NONE);
+               WARN(*type == IRQ_TYPE_NONE,
+                    "HW irq %ld has invalid type\n", *hwirq);
                return 0;
        }
 
index 8d91a02593fc2755d63375c9a4c24655aac831f3..96230a04ec23803771235045e4b555499f6ca7c2 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 
 #define CTRL_STRIDE_OFF(_t, _r)        (_t * 4 * _r)
@@ -70,7 +71,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d)
        raw_spin_unlock_irqrestore(&data->lock, flags);
 }
 
-static struct irq_chip imx_irqsteer_irq_chip = {
+static const struct irq_chip imx_irqsteer_irq_chip = {
        .name           = "irqsteer",
        .irq_mask       = imx_irqsteer_irq_mask,
        .irq_unmask     = imx_irqsteer_irq_unmask,
@@ -175,7 +176,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
        data->irq_count = DIV_ROUND_UP(irqs_num, 64);
        data->reg_num = irqs_num / 32;
 
-       if (IS_ENABLED(CONFIG_PM_SLEEP)) {
+       if (IS_ENABLED(CONFIG_PM)) {
                data->saved_reg = devm_kzalloc(&pdev->dev,
                                        sizeof(u32) * data->reg_num,
                                        GFP_KERNEL);
@@ -199,6 +200,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
                ret = -ENOMEM;
                goto out;
        }
+       irq_domain_set_pm_device(data->domain, &pdev->dev);
 
        if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) {
                ret = -EINVAL;
@@ -219,6 +221,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, data);
 
+       pm_runtime_set_active(&pdev->dev);
+       pm_runtime_enable(&pdev->dev);
+
        return 0;
 out:
        clk_disable_unprepare(data->ipg_clk);
@@ -241,7 +246,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
 static void imx_irqsteer_save_regs(struct irqsteer_data *data)
 {
        int i;
@@ -288,7 +293,10 @@ static int imx_irqsteer_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops imx_irqsteer_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                     pm_runtime_force_resume)
+       SET_RUNTIME_PM_OPS(imx_irqsteer_suspend,
+                          imx_irqsteer_resume, NULL)
 };
 
 static const struct of_device_id imx_irqsteer_dt_ids[] = {
index eea5a753618c5e1f5a376d0514a8359507a4ee0d..d30614661eea69242c78a8e46d9952d40508a72c 100644 (file)
@@ -375,7 +375,7 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
        raw_spin_lock_init(&priv->lock);
 
        priv->base = devm_platform_ioremap_resource(pdev, 0);
-       if (!priv->base)
+       if (IS_ERR(priv->base))
                return PTR_ERR(priv->base);
 
        for (i = 0; i < priv->reg_stride; i++) {
index abd011fcecf4a30d020c0c1e48fb44fd92acca8b..c7db617e1a2f62a9b9526cb43eb8dac93cf7fc59 100644 (file)
@@ -37,11 +37,26 @@ struct exiu_irq_data {
        u32             spi_base;
 };
 
-static void exiu_irq_eoi(struct irq_data *d)
+static void exiu_irq_ack(struct irq_data *d)
 {
        struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
 
        writel(BIT(d->hwirq), data->base + EIREQCLR);
+}
+
+static void exiu_irq_eoi(struct irq_data *d)
+{
+       struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
+
+       /*
+        * Level triggered interrupts are latched and must be cleared during
+        * EOI or the interrupt will be jammed on. Of course if a level
+        * triggered interrupt is still asserted then the write will not clear
+        * the interrupt.
+        */
+       if (irqd_is_level_type(d))
+               writel(BIT(d->hwirq), data->base + EIREQCLR);
+
        irq_chip_eoi_parent(d);
 }
 
@@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
        writel_relaxed(val, data->base + EILVL);
 
        val = readl_relaxed(data->base + EIEDG);
-       if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH)
+       if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) {
                val &= ~BIT(d->hwirq);
-       else
+               irq_set_handler_locked(d, handle_fasteoi_irq);
+       } else {
                val |= BIT(d->hwirq);
+               irq_set_handler_locked(d, handle_fasteoi_ack_irq);
+       }
        writel_relaxed(val, data->base + EIEDG);
 
        writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR);
@@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
 
 static struct irq_chip exiu_irq_chip = {
        .name                   = "EXIU",
+       .irq_ack                = exiu_irq_ack,
        .irq_eoi                = exiu_irq_eoi,
        .irq_enable             = exiu_irq_enable,
        .irq_mask               = exiu_irq_mask,
index 4cd3e533740bf7669f38c1e66a57f6f89b3b9f07..a01e440494154ecbf93e07a0fcb5f052ecb478a9 100644 (file)
@@ -249,11 +249,13 @@ static int sun6i_r_intc_domain_alloc(struct irq_domain *domain,
        for (i = 0; i < nr_irqs; ++i, ++hwirq, ++virq) {
                if (hwirq == nmi_hwirq) {
                        irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-                                                     &sun6i_r_intc_nmi_chip, 0);
+                                                     &sun6i_r_intc_nmi_chip,
+                                                     NULL);
                        irq_set_handler(virq, handle_fasteoi_ack_irq);
                } else {
                        irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-                                                     &sun6i_r_intc_wakeup_chip, 0);
+                                                     &sun6i_r_intc_wakeup_chip,
+                                                     NULL);
                }
        }
 
index 27933338f7b363d8d0061b86acbe260e688a4ce6..8c581c985aa7ddb0e9c96008807fb945eb1361ea 100644 (file)
@@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = {
        .irq_set_affinity = xtensa_mx_irq_set_affinity,
 };
 
+static void __init xtensa_mx_init_common(struct irq_domain *root_domain)
+{
+       unsigned int i;
+
+       irq_set_default_host(root_domain);
+       secondary_init_irq();
+
+       /* Initialize default IRQ routing to CPU 0 */
+       for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i)
+               set_er(1, MIROUT(i));
+}
+
 int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
 {
        struct irq_domain *root_domain =
                irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
                                &xtensa_mx_irq_domain_ops,
                                &xtensa_mx_irq_chip);
-       irq_set_default_host(root_domain);
-       secondary_init_irq();
+       xtensa_mx_init_common(root_domain);
        return 0;
 }
 
@@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np,
        struct irq_domain *root_domain =
                irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops,
                                &xtensa_mx_irq_chip);
-       irq_set_default_host(root_domain);
-       secondary_init_irq();
+       xtensa_mx_init_common(root_domain);
        return 0;
 }
 IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init);
index 097577ae3c47177a6ec0706e106aa14e5a0b66e2..ce13c272c3872366eedd8a7d94894a81ea78d98c 100644 (file)
@@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg)
                                mutex_unlock(&ca->set->bucket_lock);
                                blkdev_issue_discard(ca->bdev,
                                        bucket_to_sector(ca->set, bucket),
-                                       ca->sb.bucket_size, GFP_KERNEL, 0);
+                                       ca->sb.bucket_size, GFP_KERNEL);
                                mutex_lock(&ca->set->bucket_lock);
                        }
 
index 6230dfdd9286ee1fe780a6989fab2783dac4ccba..7510d1c983a5edff59299e16641d60c66860b8de 100644 (file)
@@ -107,15 +107,16 @@ void bch_btree_verify(struct btree *b)
 
 void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 {
+       unsigned int nr_segs = bio_segments(bio);
        struct bio *check;
        struct bio_vec bv, cbv;
        struct bvec_iter iter, citer = { 0 };
 
-       check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
+       check = bio_kmalloc(nr_segs, GFP_NOIO);
        if (!check)
                return;
-       bio_set_dev(check, bio->bi_bdev);
-       check->bi_opf = REQ_OP_READ;
+       bio_init(check, bio->bi_bdev, check->bi_inline_vecs, nr_segs,
+                REQ_OP_READ);
        check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
        check->bi_iter.bi_size = bio->bi_iter.bi_size;
 
@@ -146,7 +147,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 
        bio_free_pages(check);
 out_put:
-       bio_put(check);
+       bio_uninit(check);
+       kfree(check);
 }
 
 #endif
index 7c2ca52ca3e43f96f20793d08bd1ff74f7d30522..df5347ea450b56c966d17764e0fec63e6374b9fc 100644 (file)
@@ -771,12 +771,12 @@ static void journal_write_unlocked(struct closure *cl)
 
                bio_reset(bio, ca->bdev, REQ_OP_WRITE | 
                          REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA);
-               bch_bio_map(bio, w->data);
                bio->bi_iter.bi_sector  = PTR_OFFSET(k, i);
                bio->bi_iter.bi_size = sectors << 9;
 
                bio->bi_end_io  = journal_write_endio;
                bio->bi_private = w;
+               bch_bio_map(bio, w->data);
 
                trace_bcache_journal_write(bio, w->data->keys);
                bio_list_add(&list, bio);
index fdd0194f84dd089572f34cdcad1401c8748db9b0..9c5dde73da88e9ea498125941fff86182bd2310c 100644 (file)
@@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s,
 {
        struct bio *bio = &s->bio.bio;
 
-       bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO);
+       bio_init_clone(orig_bio->bi_bdev, bio, orig_bio, GFP_NOIO);
        /*
         * bi_end_io can be set separately somewhere else, e.g. the
         * variants in,
@@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
                bio_get(s->iop.bio);
 
                if (bio_op(bio) == REQ_OP_DISCARD &&
-                   !blk_queue_discard(bdev_get_queue(dc->bdev)))
+                   !bdev_max_discard_sectors(dc->bdev))
                        goto insert_data;
 
                /* I/O request sent to backing device */
@@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
        bio->bi_private = ddip;
 
        if ((bio_op(bio) == REQ_OP_DISCARD) &&
-           !blk_queue_discard(bdev_get_queue(dc->bdev)))
+           !bdev_max_discard_sectors(dc->bdev))
                bio->bi_end_io(bio);
        else
                submit_bio_noacct(bio);
index bf3de149d3c9f8ff6695877ffd786df24bb04ff5..2f49e31142f6231c593fd0cc2ccd4a32a280e87b 100644 (file)
@@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 
        blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
 
        blk_queue_write_cache(q, true, true);
 
@@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
        ca->bdev->bd_holder = ca;
        ca->sb_disk = sb_disk;
 
-       if (blk_queue_discard(bdev_get_queue(bdev)))
+       if (bdev_max_discard_sectors((bdev)))
                ca->discard = CACHE_DISCARD(&ca->sb);
 
        ret = cache_alloc(ca);
index d1029d71ff3bc26d64518addbb20b39a8d3fc6ed..c6f677059214d33b4f1177a0e4ec4b506194d20b 100644 (file)
@@ -1151,7 +1151,7 @@ STORE(__bch_cache)
        if (attr == &sysfs_discard) {
                bool v = strtoul_or_return(buf);
 
-               if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+               if (bdev_max_discard_sectors(ca->bdev))
                        ca->discard = v;
 
                if (v != CACHE_DISCARD(&ca->sb)) {
index e9cbc70d5a0eec5d5b65571d3ede45943f3263c1..5ffa1dcf84cfc8f441398eacaaac1b16425ef544 100644 (file)
@@ -611,7 +611,8 @@ static void bio_complete(struct bio *bio)
 {
        struct dm_buffer *b = bio->bi_private;
        blk_status_t status = bio->bi_status;
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        b->end_io(b, status);
 }
 
@@ -626,16 +627,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
        if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
                vec_size += 2;
 
-       bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
+       bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
        if (!bio) {
 dmio:
                use_dmio(b, rw, sector, n_sectors, offset);
                return;
        }
-
+       bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw);
        bio->bi_iter.bi_sector = sector;
-       bio_set_dev(bio, b->c->bdev);
-       bio_set_op_attrs(bio, rw, 0);
        bio->bi_end_io = bio_complete;
        bio->bi_private = b;
 
index 780a61bc6cc03912f9142334d3636846689c8bc3..28c5de8eca4a0fbd82aa692046d50e476663e736 100644 (file)
@@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti,
        return r;
 }
 
-static bool origin_dev_supports_discard(struct block_device *origin_bdev)
-{
-       struct request_queue *q = bdev_get_queue(origin_bdev);
-
-       return blk_queue_discard(q);
-}
-
 /*
  * If discard_passdown was enabled verify that the origin device
  * supports discards.  Disable discard_passdown if not.
@@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
        if (!cache->features.discard_passdown)
                return;
 
-       if (!origin_dev_supports_discard(origin_bdev))
+       if (!bdev_max_discard_sectors(origin_bdev))
                reason = "discard unsupported";
 
        else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
index 128316a73d0163a8513d86e30bcfcc306c4dc4af..811b0a5379d03d5487fd79b05fa4a1e71a437a23 100644 (file)
@@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti)
        do_waker(&clone->waker.work);
 }
 
-static bool bdev_supports_discards(struct block_device *bdev)
-{
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       return (q && blk_queue_discard(q));
-}
-
 /*
  * If discard_passdown was enabled verify that the destination device supports
  * discards. Disable discard_passdown if not.
@@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
        if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
                return;
 
-       if (!bdev_supports_discards(dest_dev))
+       if (!bdev_max_discard_sectors(dest_dev))
                reason = "discard unsupported";
        else if (dest_limits->max_discard_sectors < clone->region_size)
                reason = "max discard sectors smaller than a region";
index ad2d5faa2ebbb4e4371f9f532e3558d551967465..36ae30b73a6e0eae654528741c964c6cb915a814 100644 (file)
@@ -4399,6 +4399,7 @@ try_smaller_buffer:
        }
 
        if (ic->internal_hash) {
+               size_t recalc_tags_size;
                ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
                if (!ic->recalc_wq ) {
                        ti->error = "Cannot allocate workqueue";
@@ -4412,8 +4413,10 @@ try_smaller_buffer:
                        r = -ENOMEM;
                        goto bad;
                }
-               ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
-                                                ic->tag_size, GFP_KERNEL);
+               recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
+               if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
+                       recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
+               ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
                if (!ic->recalc_tags) {
                        ti->error = "Cannot allocate tags for recalculating";
                        r = -ENOMEM;
index 5762366333a27406bb293c39c1297b17a3f8028d..e4b95eaeec8c75514d4cb1b994cb863dd0906f48 100644 (file)
@@ -311,7 +311,7 @@ static void do_region(int op, int op_flags, unsigned region,
         * Reject unsupported discard and write same requests.
         */
        if (op == REQ_OP_DISCARD)
-               special_cmd_max_sectors = q->limits.max_discard_sectors;
+               special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
        else if (op == REQ_OP_WRITE_ZEROES)
                special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
        if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
index c9d036d6bb2ee60d23985070eb3fad3382e13bfd..e194226c89e54082e9c010a395ff9b624aa365bb 100644 (file)
@@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
 static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
        struct log_writes_c *lc = ti->private;
-       struct request_queue *q = bdev_get_queue(lc->dev->bdev);
 
-       if (!q || !blk_queue_discard(q)) {
+       if (!bdev_max_discard_sectors(lc->dev->bdev)) {
                lc->device_supports_discard = false;
                limits->discard_granularity = lc->sectorsize;
                limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
index 875bca30a0dd5fb3181b8685441504f3f6ba5d51..82f2a06153dc0689ac317571a39346011afc5224 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/sched/clock.h>
 
 
 #define DM_MSG_PREFIX  "multipath historical-service-time"
@@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps,
 {
        struct selector *s = ps->context;
        struct path_info *pi = NULL, *best = NULL;
-       u64 time_now = sched_clock();
+       u64 time_now = ktime_get_ns();
        struct dm_path *ret = NULL;
        unsigned long flags;
 
@@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path,
 
 static u64 path_service_time(struct path_info *pi, u64 start_time)
 {
-       u64 sched_now = ktime_get_ns();
+       u64 now = ktime_get_ns();
 
        /* if a previous disk request has finished after this IO was
         * sent to the hardware, pretend the submission happened
@@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time)
        if (time_after64(pi->last_finish, start_time))
                start_time = pi->last_finish;
 
-       pi->last_finish = sched_now;
-       if (time_before64(sched_now, start_time))
+       pi->last_finish = now;
+       if (time_before64(now, start_time))
                return 0;
 
-       return sched_now - start_time;
+       return now - start_time;
 }
 
 static int hst_end_io(struct path_selector *ps, struct dm_path *path,
index 2b26435a6946e8e7f774bf387d0f724ff51a3b96..9526ccbedafbac9ad74c413b2968a3bdc5da776d 100644 (file)
@@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs)
        raid456 = rs_is_raid456(rs);
 
        for (i = 0; i < rs->raid_disks; i++) {
-               struct request_queue *q;
-
-               if (!rs->dev[i].rdev.bdev)
-                       continue;
-
-               q = bdev_get_queue(rs->dev[i].rdev.bdev);
-               if (!q || !blk_queue_discard(q))
+               if (!rs->dev[i].rdev.bdev ||
+                   !bdev_max_discard_sectors(rs->dev[i].rdev.bdev))
                        return;
 
                if (raid456) {
index 03541cfc2317cb0e17780fa8970ab8959d47b171..e7d42f6335a2af2c869b88e1a52cd48d04c886c8 100644 (file)
@@ -1820,9 +1820,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
 static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
                                sector_t start, sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_nonrot(q);
+       return !bdev_nonrot(dev->bdev);
 }
 
 static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
@@ -1890,9 +1888,7 @@ static bool dm_table_supports_nowait(struct dm_table *t)
 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
                                      sector_t start, sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_discard(q);
+       return !bdev_max_discard_sectors(dev->bdev);
 }
 
 static bool dm_table_supports_discards(struct dm_table *t)
@@ -1924,9 +1920,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
                                           struct dm_dev *dev, sector_t start,
                                           sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return !blk_queue_secure_erase(q);
+       return !bdev_max_secure_erase_sectors(dev->bdev);
 }
 
 static bool dm_table_supports_secure_erase(struct dm_table *t)
@@ -1952,9 +1946,7 @@ static int device_requires_stable_pages(struct dm_target *ti,
                                        struct dm_dev *dev, sector_t start,
                                        sector_t len, void *data)
 {
-       struct request_queue *q = bdev_get_queue(dev->bdev);
-
-       return blk_queue_stable_writes(q);
+       return bdev_stable_writes(dev->bdev);
 }
 
 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
@@ -1974,18 +1966,15 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
                blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
 
        if (!dm_table_supports_discards(t)) {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
-               /* Must also clear discard limits... */
                q->limits.max_discard_sectors = 0;
                q->limits.max_hw_discard_sectors = 0;
                q->limits.discard_granularity = 0;
                q->limits.discard_alignment = 0;
                q->limits.discard_misaligned = 0;
-       } else
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+       }
 
-       if (dm_table_supports_secure_erase(t))
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+       if (!dm_table_supports_secure_erase(t))
+               q->limits.max_secure_erase_sectors = 0;
 
        if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
                wc = true;
index 4d25d0e270313a9c1a6b6628c040f7a3b16d9b07..84c083f766736f37e6b514e273045a72e3fe8bd8 100644 (file)
@@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
        sector_t s = block_to_sectors(tc->pool, data_b);
        sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
-       return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
-                                     GFP_NOWAIT, 0, &op->bio);
+       return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
+                                     &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool)
 /*----------------------------------------------------------------
  * Binding of control targets to a pool object
  *--------------------------------------------------------------*/
-static bool data_dev_supports_discard(struct pool_c *pt)
-{
-       struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-
-       return blk_queue_discard(q);
-}
-
 static bool is_factor(sector_t block_size, uint32_t n)
 {
        return !sector_div(block_size, n);
@@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
        if (!pt->adjusted_pf.discard_passdown)
                return;
 
-       if (!data_dev_supports_discard(pt))
+       if (!bdev_max_discard_sectors(pt->data_dev->bdev))
                reason = "discard unsupported";
 
        else if (data_limits->max_discard_sectors < pool->sectors_per_block)
@@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
                /*
                 * Must explicitly disallow stacking discard limits otherwise the
                 * block layer will stack them if pool's data device has support.
-                * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
-                * user to see that, so make sure to set all discard limits to 0.
                 */
                limits->discard_granularity = 0;
                return;
index c1ca9be4b79e9f27da01a4c08f88835314b6f1c9..57daa86c19cf0eb4bf7d12c2f3d05e2d09f67db2 100644 (file)
@@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
        return 0;
 }
 
+struct orig_bio_details {
+       unsigned int op;
+       unsigned int nr_sectors;
+};
+
 /*
  * First phase of BIO mapping for targets with zone append emulation:
  * check all BIO that change a zone writer pointer and change zone
  * append operations into regular write operations.
  */
 static bool dm_zone_map_bio_begin(struct mapped_device *md,
-                                 struct bio *orig_bio, struct bio *clone)
+                                 unsigned int zno, struct bio *clone)
 {
        sector_t zsectors = blk_queue_zone_sectors(md->queue);
-       unsigned int zno = bio_zone_no(orig_bio);
        unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
 
        /*
@@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
                WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
        }
 
-       switch (bio_op(orig_bio)) {
+       switch (bio_op(clone)) {
        case REQ_OP_ZONE_RESET:
        case REQ_OP_ZONE_FINISH:
                return true;
@@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
                 * target zone.
                 */
                clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
-                       (orig_bio->bi_opf & (~REQ_OP_MASK));
-               clone->bi_iter.bi_sector =
-                       orig_bio->bi_iter.bi_sector + zwp_offset;
+                       (clone->bi_opf & (~REQ_OP_MASK));
+               clone->bi_iter.bi_sector += zwp_offset;
                break;
        default:
                DMWARN_LIMIT("Invalid BIO operation");
@@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
  * data written to a zone. Note that at this point, the remapped clone BIO
  * may already have completed, so we do not touch it.
  */
-static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
-                                       struct bio *orig_bio,
+static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
+                                       struct orig_bio_details *orig_bio_details,
                                        unsigned int nr_sectors)
 {
-       unsigned int zno = bio_zone_no(orig_bio);
        unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
 
        /* The clone BIO may already have been completed and failed */
@@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
                return BLK_STS_IOERR;
 
        /* Update the zone wp offset */
-       switch (bio_op(orig_bio)) {
+       switch (orig_bio_details->op) {
        case REQ_OP_ZONE_RESET:
                WRITE_ONCE(md->zwp_offset[zno], 0);
                return BLK_STS_OK;
@@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
                 * Check that the target did not truncate the write operation
                 * emulating a zone append.
                 */
-               if (nr_sectors != bio_sectors(orig_bio)) {
+               if (nr_sectors != orig_bio_details->nr_sectors) {
                        DMWARN_LIMIT("Truncated write for zone append");
                        return BLK_STS_IOERR;
                }
@@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q,
        bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
 }
 
-static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
+static bool dm_need_zone_wp_tracking(struct bio *bio)
 {
        /*
         * Special processing is not needed for operations that do not need the
@@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
         * zones and all operations that do not modify directly a sequential
         * zone write pointer.
         */
-       if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio))
+       if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
                return false;
-       switch (bio_op(orig_bio)) {
+       switch (bio_op(bio)) {
        case REQ_OP_WRITE_ZEROES:
        case REQ_OP_WRITE:
        case REQ_OP_ZONE_RESET:
        case REQ_OP_ZONE_FINISH:
        case REQ_OP_ZONE_APPEND:
-               return bio_zone_is_seq(orig_bio);
+               return bio_zone_is_seq(bio);
        default:
                return false;
        }
@@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
        struct dm_target *ti = tio->ti;
        struct mapped_device *md = io->md;
        struct request_queue *q = md->queue;
-       struct bio *orig_bio = io->orig_bio;
        struct bio *clone = &tio->clone;
+       struct orig_bio_details orig_bio_details;
        unsigned int zno;
        blk_status_t sts;
        int r;
@@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio)
         * IOs that do not change a zone write pointer do not need
         * any additional special processing.
         */
-       if (!dm_need_zone_wp_tracking(orig_bio))
+       if (!dm_need_zone_wp_tracking(clone))
                return ti->type->map(ti, clone);
 
        /* Lock the target zone */
-       zno = bio_zone_no(orig_bio);
+       zno = bio_zone_no(clone);
        dm_zone_lock(q, zno, clone);
 
+       orig_bio_details.nr_sectors = bio_sectors(clone);
+       orig_bio_details.op = bio_op(clone);
+
        /*
         * Check that the bio and the target zone write pointer offset are
         * both valid, and if the bio is a zone append, remap it to a write.
         */
-       if (!dm_zone_map_bio_begin(md, orig_bio, clone)) {
+       if (!dm_zone_map_bio_begin(md, zno, clone)) {
                dm_zone_unlock(q, zno, clone);
                return DM_MAPIO_KILL;
        }
@@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
                 * The target submitted the clone BIO. The target zone will
                 * be unlocked on completion of the clone.
                 */
-               sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
+               sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
+                                         *tio->len_ptr);
                break;
        case DM_MAPIO_REMAPPED:
                /*
@@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
                 * unlock the target zone here as the clone will not be
                 * submitted.
                 */
-               sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
+               sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
+                                         *tio->len_ptr);
                if (sts != BLK_STS_OK)
                        dm_zone_unlock(q, zno, clone);
                break;
index cac295cc8840efe5d78d60438f3276359c3b2c1d..0ec5d8b9b1a4e8040ddf93ecad27d377cc680dc6 100644 (file)
@@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
        blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
        blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
 
-       limits->discard_alignment = DMZ_BLOCK_SIZE;
+       limits->discard_alignment = 0;
        limits->discard_granularity = DMZ_BLOCK_SIZE;
        limits->max_discard_sectors = chunk_sectors;
        limits->max_hw_discard_sectors = chunk_sectors;
index 3c5fad7c4ee68c18d9951bdd2706eb35f4c96578..39081338ca6162e4e6c337ed827f90ffde389ba2 100644 (file)
@@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md)
 
        /* device doesn't really support DISCARD, disable it */
        limits->max_discard_sectors = 0;
-       blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
 }
 
 void disable_write_zeroes(struct mapped_device *md)
@@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio)
 
        if (unlikely(error == BLK_STS_TARGET)) {
                if (bio_op(bio) == REQ_OP_DISCARD &&
-                   !q->limits.max_discard_sectors)
+                   !bdev_max_discard_sectors(bio->bi_bdev))
                        disable_discard(md);
                else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
                         !q->limits.max_write_zeroes_sectors)
@@ -1323,8 +1322,7 @@ static void __map_bio(struct bio *clone)
 }
 
 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
-                               struct dm_target *ti, unsigned num_bios,
-                               unsigned *len)
+                               struct dm_target *ti, unsigned num_bios)
 {
        struct bio *bio;
        int try;
@@ -1335,7 +1333,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
                if (try)
                        mutex_lock(&ci->io->md->table_devices_lock);
                for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
-                       bio = alloc_tio(ci, ti, bio_nr, len,
+                       bio = alloc_tio(ci, ti, bio_nr, NULL,
                                        try ? GFP_NOIO : GFP_NOWAIT);
                        if (!bio)
                                break;
@@ -1363,11 +1361,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
                break;
        case 1:
                clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
-               dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
                __map_bio(clone);
                break;
        default:
-               alloc_multiple_bios(&blist, ci, ti, num_bios, len);
+               /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
+               alloc_multiple_bios(&blist, ci, ti, num_bios);
                while ((clone = bio_list_pop(&blist))) {
                        dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
                        __map_bio(clone);
@@ -1392,6 +1390,7 @@ static void __send_empty_flush(struct clone_info *ci)
 
        ci->bio = &flush_bio;
        ci->sector_count = 0;
+       ci->io->tio.clone.bi_iter.bi_size = 0;
 
        while ((ti = dm_table_get_target(ci->map, target_nr++)))
                __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1407,14 +1406,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
        len = min_t(sector_t, ci->sector_count,
                    max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
 
-       /*
-        * dm_accept_partial_bio cannot be used with duplicate bios,
-        * so update clone_info cursor before __send_duplicate_bios().
-        */
+       __send_duplicate_bios(ci, ti, num_bios, &len);
+
        ci->sector += len;
        ci->sector_count -= len;
-
-       __send_duplicate_bios(ci, ti, num_bios, &len);
 }
 
 static bool is_abnormal_io(struct bio *bio)
index bfd6026d78099b9cfe07bb2e509e077c091760ce..d87f674ab7622d46d7c20c6cd8ace4707d9015a2 100644 (file)
@@ -639,14 +639,6 @@ re_read:
        daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
        write_behind = le32_to_cpu(sb->write_behind);
        sectors_reserved = le32_to_cpu(sb->sectors_reserved);
-       /* Setup nodes/clustername only if bitmap version is
-        * cluster-compatible
-        */
-       if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
-               nodes = le32_to_cpu(sb->nodes);
-               strlcpy(bitmap->mddev->bitmap_info.cluster_name,
-                               sb->cluster_name, 64);
-       }
 
        /* verify that the bitmap-specific fields are valid */
        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@ re_read:
                goto out;
        }
 
+       /*
+        * Setup nodes/clustername only if bitmap version is
+        * cluster-compatible
+        */
+       if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+               nodes = le32_to_cpu(sb->nodes);
+               strscpy(bitmap->mddev->bitmap_info.cluster_name,
+                               sb->cluster_name, 64);
+       }
+
        /* keep the array size field of the bitmap superblock up to date */
        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 
@@ -695,14 +697,13 @@ re_read:
        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
                set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
        err = 0;
 
 out:
        kunmap_atomic(sb);
-       /* Assigning chunksize is required for "re_read" */
-       bitmap->mddev->bitmap_info.chunksize = chunksize;
        if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
+               /* Assigning chunksize is required for "re_read" */
+               bitmap->mddev->bitmap_info.chunksize = chunksize;
                err = md_setup_cluster(bitmap->mddev, nodes);
                if (err) {
                        pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,18 +714,18 @@ out:
                goto re_read;
        }
 
-
 out_no_sb:
-       if (test_bit(BITMAP_STALE, &bitmap->flags))
-               bitmap->events_cleared = bitmap->mddev->events;
-       bitmap->mddev->bitmap_info.chunksize = chunksize;
-       bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
-       bitmap->mddev->bitmap_info.max_write_behind = write_behind;
-       bitmap->mddev->bitmap_info.nodes = nodes;
-       if (bitmap->mddev->bitmap_info.space == 0 ||
-           bitmap->mddev->bitmap_info.space > sectors_reserved)
-               bitmap->mddev->bitmap_info.space = sectors_reserved;
-       if (err) {
+       if (err == 0) {
+               if (test_bit(BITMAP_STALE, &bitmap->flags))
+                       bitmap->events_cleared = bitmap->mddev->events;
+               bitmap->mddev->bitmap_info.chunksize = chunksize;
+               bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+               bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+               bitmap->mddev->bitmap_info.nodes = nodes;
+               if (bitmap->mddev->bitmap_info.space == 0 ||
+                       bitmap->mddev->bitmap_info.space > sectors_reserved)
+                       bitmap->mddev->bitmap_info.space = sectors_reserved;
+       } else {
                md_bitmap_print_sb(bitmap);
                if (bitmap->cluster_slot < 0)
                        md_cluster_stop(bitmap->mddev);
index 1c8a06b77c853b0be249b37c70926dc0323dc627..37cbcce3cc66bcb30c0629733e276b4f19f9420e 100644 (file)
@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
                pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
                goto out_err;
        }
-       strlcpy(res->name, name, namelen + 1);
+       strscpy(res->name, name, namelen + 1);
        if (with_lvb) {
                res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
                if (!res->lksb.sb_lvbptr) {
index 0f55b079371b136abf1fb5c8b338c91f4e7eba6f..138a3b25c5c82ce6a591e4a4b90b5a89628d260e 100644 (file)
@@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
        struct linear_conf *conf;
        struct md_rdev *rdev;
        int i, cnt;
-       bool discard_supported = false;
 
        conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
        if (!conf)
@@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
 
                conf->array_sectors += rdev->sectors;
                cnt++;
-
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
        }
        if (cnt != raid_disks) {
                pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
@@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
                goto out;
        }
 
-       if (!discard_supported)
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
-       else
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
-
        /*
         * Here we calculate the device offsets.
         */
@@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
                start_sector + data_offset;
 
        if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-                    !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) {
+                    !bdev_max_discard_sectors(bio->bi_bdev))) {
                /* Just ignore it */
                bio_endio(bio);
        } else {
index 309b3af906ad39c7e19608b54203c3465494d6cc..707e802d0082a1ea8a1fd08f6cf259e8cc495ecd 100644 (file)
@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
 
 static bool does_sb_need_changing(struct mddev *mddev)
 {
-       struct md_rdev *rdev;
+       struct md_rdev *rdev = NULL, *iter;
        struct mdp_superblock_1 *sb;
        int role;
 
        /* Find a good rdev */
-       rdev_for_each(rdev, mddev)
-               if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
+       rdev_for_each(iter, mddev)
+               if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
+                       rdev = iter;
                        break;
+               }
 
        /* No good device found. */
        if (!rdev)
@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
        rdev_for_each(rdev, mddev) {
                role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
                /* Device activated? */
-               if (role == 0xffff && rdev->raid_disk >=0 &&
+               if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
                    !test_bit(Faulty, &rdev->flags))
                        return true;
                /* Device turned faulty? */
-               if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
+               if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
                        return true;
        }
 
@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 
        if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
                md_error(rdev->mddev, rdev);
-               if (test_bit(Faulty, &rdev->flags))
-                       err = 0;
-               else
+
+               if (test_bit(MD_BROKEN, &rdev->mddev->flags))
                        err = -EBUSY;
+               else
+                       err = 0;
        } else if (cmd_match(buf, "remove")) {
                if (rdev->mddev->pers) {
                        clear_bit(Blocked, &rdev->flags);
@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
        oldpriv = mddev->private;
        mddev->pers = pers;
        mddev->private = priv;
-       strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+       strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
        mddev->level = mddev->new_level;
        mddev->layout = mddev->new_layout;
        mddev->chunk_sectors = mddev->new_chunk_sectors;
@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
  *     like active, but no writes have been seen for a while (100msec).
  *
  * broken
- *     RAID0/LINEAR-only: same as clean, but array is missing a member.
- *     It's useful because RAID0/LINEAR mounted-arrays aren't stopped
- *     when a member is gone, so this state will at least alert the
- *     user that something is wrong.
+*     Array is failed. It's useful because mounted-arrays aren't stopped
+*     when array is failed, so this state will at least alert the user that
+*     something is wrong.
  */
 enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
                   write_pending, active_idle, broken, bad_word};
@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
                len--;
        if (len >= DISK_NAME_LEN)
                return -E2BIG;
-       strlcpy(buf, val, len+1);
+       strscpy(buf, val, len+1);
        if (strncmp(buf, "md_", 3) == 0)
                return md_alloc(0, buf);
        if (strncmp(buf, "md", 2) == 0 &&
@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
                mddev->level = pers->level;
                mddev->new_level = pers->level;
        }
-       strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+       strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
        if (mddev->reshape_position != MaxSector &&
            pers->start_reshape == NULL) {
@@ -5991,8 +5993,7 @@ int md_run(struct mddev *mddev)
                bool nonrot = true;
 
                rdev_for_each(rdev, mddev) {
-                       if (rdev->raid_disk >= 0 &&
-                           !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+                       if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
                                nonrot = false;
                                break;
                        }
@@ -7444,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
                err =  -ENODEV;
        else {
                md_error(mddev, rdev);
-               if (!test_bit(Faulty, &rdev->flags))
+               if (test_bit(MD_BROKEN, &mddev->flags))
                        err = -EBUSY;
        }
        rcu_read_unlock();
@@ -7985,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 
        if (!mddev->pers || !mddev->pers->error_handler)
                return;
-       mddev->pers->error_handler(mddev,rdev);
-       if (mddev->degraded)
+       mddev->pers->error_handler(mddev, rdev);
+
+       if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
                set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
        sysfs_notify_dirent_safe(rdev->sysfs_state);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-       md_wakeup_thread(mddev->thread);
+       if (!test_bit(MD_BROKEN, &mddev->flags)) {
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
        if (mddev->event_work.func)
                queue_work(md_misc_wq, &mddev->event_work);
        md_new_event();
@@ -8585,7 +8589,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 {
        struct bio *discard_bio = NULL;
 
-       if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
+       if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
                        &discard_bio) || !discard_bio)
                return;
 
@@ -9671,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
 
                if (test_bit(Candidate, &rdev2->flags)) {
-                       if (role == 0xfffe) {
+                       if (role == MD_DISK_ROLE_FAULTY) {
                                pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
                                md_kick_rdev_from_array(rdev2);
                                continue;
@@ -9684,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                        /*
                         * got activated except reshape is happening.
                         */
-                       if (rdev2->raid_disk == -1 && role != 0xffff &&
+                       if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
                            !(le32_to_cpu(sb->feature_map) &
                              MD_FEATURE_RESHAPE_ACTIVE)) {
                                rdev2->saved_raid_disk = role;
@@ -9701,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                         * as faulty. The recovery is performed by the
                         * one who initiated the error.
                         */
-                       if ((role == 0xfffe) || (role == 0xfffd)) {
+                       if (role == MD_DISK_ROLE_FAULTY ||
+                           role == MD_DISK_ROLE_JOURNAL) {
                                md_error(mddev, rdev2);
                                clear_bit(Blocked, &rdev2->flags);
                        }
@@ -9791,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
 
 void md_reload_sb(struct mddev *mddev, int nr)
 {
-       struct md_rdev *rdev;
+       struct md_rdev *rdev = NULL, *iter;
        int err;
 
        /* Find the rdev */
-       rdev_for_each_rcu(rdev, mddev) {
-               if (rdev->desc_nr == nr)
+       rdev_for_each_rcu(iter, mddev) {
+               if (iter->desc_nr == nr) {
+                       rdev = iter;
                        break;
+               }
        }
 
-       if (!rdev || rdev->desc_nr != nr) {
+       if (!rdev) {
                pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
                return;
        }
index 6ac28386453368aaadaaf295bbeedae7dad9b5f0..cf2cbb17acbd423ccebf7d1a008fc47ae07ed6a5 100644 (file)
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                int is_new);
 struct md_cluster_info;
 
-/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
+/**
+ * enum mddev_flags - md device flags.
+ * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
+ * @MD_CLOSING: If set, we are closing the array, do not open it then.
+ * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
+ * @MD_HAS_JOURNAL: The raid array has journal feature set.
+ * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
+ *                            resync lock, need to release the lock.
+ * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
+ *                         calls to md_error() will never cause the array to
+ *                         become failed.
+ * @MD_HAS_PPL:  The raid array has PPL feature set.
+ * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
+ * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
+ *                      without taking reconfig_mutex.
+ * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
+ *                  explicitly holding reconfig_mutex.
+ * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
+ *                array is ready yet.
+ * @MD_BROKEN: This is used to stop writes and mark array as failed.
+ *
+ * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
+ */
 enum mddev_flags {
-       MD_ARRAY_FIRST_USE,     /* First use of array, needs initialization */
-       MD_CLOSING,             /* If set, we are closing the array, do not open
-                                * it then */
-       MD_JOURNAL_CLEAN,       /* A raid with journal is already clean */
-       MD_HAS_JOURNAL,         /* The raid array has journal feature set */
-       MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
-                                  * already took resync lock, need to
-                                  * release the lock */
-       MD_FAILFAST_SUPPORTED,  /* Using MD_FAILFAST on metadata writes is
-                                * supported as calls to md_error() will
-                                * never cause the array to become failed.
-                                */
-       MD_HAS_PPL,             /* The raid array has PPL feature set */
-       MD_HAS_MULTIPLE_PPLS,   /* The raid array has multiple PPLs feature set */
-       MD_ALLOW_SB_UPDATE,     /* md_check_recovery is allowed to update
-                                * the metadata without taking reconfig_mutex.
-                                */
-       MD_UPDATING_SB,         /* md_check_recovery is updating the metadata
-                                * without explicitly holding reconfig_mutex.
-                                */
-       MD_NOT_READY,           /* do_md_run() is active, so 'array_state'
-                                * must not report that array is ready yet
-                                */
-       MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
-                                * I/O in case an array member is gone/failed.
-                                */
+       MD_ARRAY_FIRST_USE,
+       MD_CLOSING,
+       MD_JOURNAL_CLEAN,
+       MD_HAS_JOURNAL,
+       MD_CLUSTER_RESYNC_LOCKED,
+       MD_FAILFAST_SUPPORTED,
+       MD_HAS_PPL,
+       MD_HAS_MULTIPLE_PPLS,
+       MD_ALLOW_SB_UPDATE,
+       MD_UPDATING_SB,
+       MD_NOT_READY,
+       MD_BROKEN,
 };
 
 enum mddev_sb_flags {
index b21e101183f444054ef24a32e6d3ec783392c776..e11701e394ca0b40ea520996468506d4b4d4452e 100644 (file)
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
        pr_debug("md/raid0:%s: FINAL %d zones\n",
                 mdname(mddev), conf->nr_strip_zones);
 
-       if (conf->nr_strip_zones == 1) {
-               conf->layout = RAID0_ORIG_LAYOUT;
-       } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
-                  mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
-               conf->layout = mddev->layout;
-       } else if (default_layout == RAID0_ORIG_LAYOUT ||
-                  default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
-               conf->layout = default_layout;
-       } else {
-               pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
-                      mdname(mddev));
-               pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
-               err = -ENOTSUPP;
-               goto abort;
-       }
        /*
         * now since we have the hard sector sizes, we can make sure
         * chunk size is a multiple of that sector size
@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
                         (unsigned long long)smallest->sectors);
        }
 
+       if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
+               conf->layout = RAID0_ORIG_LAYOUT;
+       } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+                  mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+               conf->layout = mddev->layout;
+       } else if (default_layout == RAID0_ORIG_LAYOUT ||
+                  default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+               conf->layout = default_layout;
+       } else {
+               pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+                      mdname(mddev));
+               pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+               err = -EOPNOTSUPP;
+               goto abort;
+       }
+
        pr_debug("md/raid0:%s: done.\n", mdname(mddev));
        *private_conf = conf;
 
@@ -399,7 +400,6 @@ static int raid0_run(struct mddev *mddev)
        conf = mddev->private;
        if (mddev->queue) {
                struct md_rdev *rdev;
-               bool discard_supported = false;
 
                blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
                blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
@@ -412,13 +412,7 @@ static int raid0_run(struct mddev *mddev)
                rdev_for_each(rdev, mddev) {
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->data_offset << 9);
-                       if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                               discard_supported = true;
                }
-               if (!discard_supported)
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
-               else
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
        }
 
        /* calculate array device size */
index 99d5464a51f810dd7f479df6d8ab61c123926a87..99d5af1362d7675b5b67bee69d5150ab9395a93c 100644 (file)
@@ -165,9 +165,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
         * Allocate bios : 1 for reading, n-1 for writing
         */
        for (j = pi->raid_disks ; j-- ; ) {
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r1_bio->bios[j] = bio;
        }
        /*
@@ -206,8 +207,10 @@ out_free_pages:
                resync_free_pages(&rps[j]);
 
 out_free_bio:
-       while (++j < pi->raid_disks)
-               bio_put(r1_bio->bios[j]);
+       while (++j < pi->raid_disks) {
+               bio_uninit(r1_bio->bios[j]);
+               kfree(r1_bio->bios[j]);
+       }
        kfree(rps);
 
 out_free_r1bio:
@@ -225,7 +228,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
        for (i = pi->raid_disks; i--; ) {
                rp = get_resync_pages(r1bio->bios[i]);
                resync_free_pages(rp);
-               bio_put(r1bio->bios[i]);
+               bio_uninit(r1bio->bios[i]);
+               kfree(r1bio->bios[i]);
        }
 
        /* resync pages array stored in the 1st bio's .bi_private */
@@ -704,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                        /* At least two disks to choose from so failfast is OK */
                        set_bit(R1BIO_FailFast, &r1_bio->state);
 
-               nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+               nonrot = bdev_nonrot(rdev->bdev);
                has_nonrot_disk |= nonrot;
                pending = atomic_read(&rdev->nr_pending);
                dist = abs(this_sector - conf->mirrors[disk].head_position);
@@ -802,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
                if (test_bit(Faulty, &rdev->flags)) {
                        bio_io_error(bio);
                } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
-                                   !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                   !bdev_max_discard_sectors(bio->bi_bdev)))
                        /* Just ignore it */
                        bio_endio(bio);
                else
@@ -1637,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
        seq_printf(seq, "]");
 }
 
+/**
+ * raid1_error() - RAID1 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ *     - &MD_BROKEN flag is set in &mddev->flags.
+ *     - recovery is disabled.
+ * Otherwise, it must be degraded:
+ *     - recovery is interrupted.
+ *     - &mddev->degraded is bumped.
+ *
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
 static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
        unsigned long flags;
 
-       /*
-        * If it is not operational, then we have already marked it as dead
-        * else if it is the last working disks with "fail_last_dev == false",
-        * ignore the error, let the next level up know.
-        * else mark the drive as failed
-        */
        spin_lock_irqsave(&conf->device_lock, flags);
-       if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
-           && (conf->raid_disks - mddev->degraded) == 1) {
-               /*
-                * Don't fail the drive, act as though we were just a
-                * normal single drive.
-                * However don't try a recovery from this drive as
-                * it is very likely to fail.
-                */
-               conf->recovery_disabled = mddev->recovery_disabled;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+       if (test_bit(In_sync, &rdev->flags) &&
+           (conf->raid_disks - mddev->degraded) == 1) {
+               set_bit(MD_BROKEN, &mddev->flags);
+
+               if (!mddev->fail_last_dev) {
+                       conf->recovery_disabled = mddev->recovery_disabled;
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       return;
+               }
        }
        set_bit(Blocked, &rdev->flags);
        if (test_and_clear_bit(In_sync, &rdev->flags))
@@ -1826,8 +1839,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                        break;
                }
        }
-       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
        print_conf(conf);
        return err;
 }
@@ -3106,7 +3117,6 @@ static int raid1_run(struct mddev *mddev)
        int i;
        struct md_rdev *rdev;
        int ret;
-       bool discard_supported = false;
 
        if (mddev->level != 1) {
                pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -3141,8 +3151,6 @@ static int raid1_run(struct mddev *mddev)
                        continue;
                disk_stack_limits(mddev->gendisk, rdev->bdev,
                                  rdev->data_offset << 9);
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
        }
 
        mddev->degraded = 0;
@@ -3179,15 +3187,6 @@ static int raid1_run(struct mddev *mddev)
 
        md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
 
-       if (mddev->queue) {
-               if (discard_supported)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                                 mddev->queue);
-       }
-
        ret = md_integrity_register(mddev);
        if (ret) {
                md_unregister_thread(&mddev->thread);
index dfe7d62d3fbdd1b5b2e1a387ad68de279097886f..dfa576cdf11cd7c382aad044c49c2422125e9fd6 100644 (file)
@@ -145,15 +145,17 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
         * Allocate bios.
         */
        for (j = nalloc ; j-- ; ) {
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r10_bio->devs[j].bio = bio;
                if (!conf->have_replacement)
                        continue;
-               bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
                if (!bio)
                        goto out_free_bio;
+               bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
                r10_bio->devs[j].repl_bio = bio;
        }
        /*
@@ -197,9 +199,11 @@ out_free_pages:
 out_free_bio:
        for ( ; j < nalloc; j++) {
                if (r10_bio->devs[j].bio)
-                       bio_put(r10_bio->devs[j].bio);
+                       bio_uninit(r10_bio->devs[j].bio);
+               kfree(r10_bio->devs[j].bio);
                if (r10_bio->devs[j].repl_bio)
-                       bio_put(r10_bio->devs[j].repl_bio);
+                       bio_uninit(r10_bio->devs[j].repl_bio);
+               kfree(r10_bio->devs[j].repl_bio);
        }
        kfree(rps);
 out_free_r10bio:
@@ -220,12 +224,15 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
                if (bio) {
                        rp = get_resync_pages(bio);
                        resync_free_pages(rp);
-                       bio_put(bio);
+                       bio_uninit(bio);
+                       kfree(bio);
                }
 
                bio = r10bio->devs[j].repl_bio;
-               if (bio)
-                       bio_put(bio);
+               if (bio) {
+                       bio_uninit(bio);
+                       kfree(bio);
+               }
        }
 
        /* resync pages array stored in the 1st bio's .bi_private */
@@ -796,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
                if (!do_balance)
                        break;
 
-               nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+               nonrot = bdev_nonrot(rdev->bdev);
                has_nonrot_disk |= nonrot;
                pending = atomic_read(&rdev->nr_pending);
                if (min_pending > pending && nonrot) {
@@ -888,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf)
                        if (test_bit(Faulty, &rdev->flags)) {
                                bio_io_error(bio);
                        } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-                                           !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                           !bdev_max_discard_sectors(bio->bi_bdev)))
                                /* Just ignore it */
                                bio_endio(bio);
                        else
@@ -1083,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
                if (test_bit(Faulty, &rdev->flags)) {
                        bio_io_error(bio);
                } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-                                   !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+                                   !bdev_max_discard_sectors(bio->bi_bdev)))
                        /* Just ignore it */
                        bio_endio(bio);
                else
@@ -1963,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
                _enough(conf, 1, ignore);
 }
 
+/**
+ * raid10_error() - RAID10 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ *     - &MD_BROKEN flag is set in &mddev->flags.
+ * Otherwise, it must be degraded:
+ *     - recovery is interrupted.
+ *     - &mddev->degraded is bumped.
+
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
 static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 {
        char b[BDEVNAME_SIZE];
        struct r10conf *conf = mddev->private;
        unsigned long flags;
 
-       /*
-        * If it is not operational, then we have already marked it as dead
-        * else if it is the last working disks with "fail_last_dev == false",
-        * ignore the error, let the next level up know.
-        * else mark the drive as failed
-        */
        spin_lock_irqsave(&conf->device_lock, flags);
-       if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
-           && !enough(conf, rdev->raid_disk)) {
-               /*
-                * Don't fail the drive, just return an IO error.
-                */
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+       if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
+               set_bit(MD_BROKEN, &mddev->flags);
+
+               if (!mddev->fail_last_dev) {
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       return;
+               }
        }
        if (test_and_clear_bit(In_sync, &rdev->flags))
                mddev->degraded++;
-       /*
-        * If recovery is running, make sure it aborts.
-        */
+
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
@@ -2144,8 +2159,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                rcu_assign_pointer(p->rdev, rdev);
                break;
        }
-       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 
        print_conf(conf);
        return err;
@@ -4069,7 +4082,6 @@ static int raid10_run(struct mddev *mddev)
        sector_t size;
        sector_t min_offset_diff = 0;
        int first = 1;
-       bool discard_supported = false;
 
        if (mddev_init_writes_pending(mddev) < 0)
                return -ENOMEM;
@@ -4140,20 +4152,9 @@ static int raid10_run(struct mddev *mddev)
                                          rdev->data_offset << 9);
 
                disk->head_position = 0;
-
-               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
-                       discard_supported = true;
                first = 0;
        }
 
-       if (mddev->queue) {
-               if (discard_supported)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                                 mddev->queue);
-       }
        /* need to check that every block has at least one working mirror */
        if (!enough(conf, -1)) {
                pr_err("md/raid10:%s: not enough operational mirrors.\n",
index a7d50ff9020a82140d28af088703d120cbf84e7a..094a4042589eb5cfdb53393a2cf2660d60196295 100644 (file)
@@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
        r5l_write_super(log, end);
 
-       if (!blk_queue_discard(bdev_get_queue(bdev)))
+       if (!bdev_max_discard_sectors(bdev))
                return;
 
        mddev = log->rdev->mddev;
@@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
        if (log->last_checkpoint < end) {
                blkdev_issue_discard(bdev,
                                log->last_checkpoint + log->rdev->data_offset,
-                               end - log->last_checkpoint, GFP_NOIO, 0);
+                               end - log->last_checkpoint, GFP_NOIO);
        } else {
                blkdev_issue_discard(bdev,
                                log->last_checkpoint + log->rdev->data_offset,
                                log->device_size - log->last_checkpoint,
-                               GFP_NOIO, 0);
+                               GFP_NOIO);
                blkdev_issue_discard(bdev, log->rdev->data_offset, end,
-                               GFP_NOIO, 0);
+                               GFP_NOIO);
        }
 }
 
index d3962d92df18a02b2531340901227a7d9a14ef04..55d065a87b8940409b6445fb5c7a3dd28a15ef85 100644 (file)
@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
                                 (unsigned long long)r_sector, dd_idx,
                                 (unsigned long long)sector);
 
-                       rdev = conf->disks[dd_idx].rdev;
+                       /* Array has not started so rcu dereference is safe */
+                       rdev = rcu_dereference_protected(
+                                       conf->disks[dd_idx].rdev, 1);
                        if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
                                      sector >= rdev->recovery_offset)) {
                                pr_debug("%s:%*s data member disk %d missing\n",
@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
                parity_sector = raid5_compute_sector(conf, r_sector_first + i,
                                0, &disk, &sh);
                BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
-               parity_rdev = conf->disks[sh.pd_idx].rdev;
+
+               /* Array has not started so rcu dereference is safe */
+               parity_rdev = rcu_dereference_protected(
+                                       conf->disks[sh.pd_idx].rdev, 1);
 
                BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
                pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
 
        for (i = 0; i < ppl_conf->count; i++) {
                struct ppl_log *log = &ppl_conf->child_logs[i];
-               struct md_rdev *rdev = conf->disks[i].rdev;
+               /* Array has not started so rcu dereference is safe */
+               struct md_rdev *rdev =
+                       rcu_dereference_protected(conf->disks[i].rdev, 1);
 
                mutex_init(&log->io_mutex);
                spin_lock_init(&log->io_list_lock);
index 351d341a1ffa4cf2e9a6641541a39ed48b1a4609..39038fa8b1c8047756d080b5a6232badd58ea790 100644 (file)
@@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
 }
 
 static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+       __acquires(&conf->device_lock)
 {
        spin_lock_irq(conf->hash_locks + hash);
        spin_lock(&conf->device_lock);
 }
 
 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+       __releases(&conf->device_lock)
 {
        spin_unlock(&conf->device_lock);
        spin_unlock_irq(conf->hash_locks + hash);
 }
 
 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+       __acquires(&conf->device_lock)
 {
        int i;
        spin_lock_irq(conf->hash_locks);
@@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
 }
 
 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+       __releases(&conf->device_lock)
 {
        int i;
        spin_unlock(&conf->device_lock);
@@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
 }
 
 static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
+       __must_hold(&sh->raid_conf->device_lock)
 {
        struct r5conf *conf = sh->raid_conf;
        struct r5worker_group *group;
@@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
 
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
                              struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        int i;
        int injournal = 0;      /* number of date pages with R5_InJournal */
@@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
 
 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
                             struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        if (atomic_dec_and_test(&sh->count))
                do_release_stripe(conf, sh, temp_inactive_list);
@@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
        }
 }
 
-/* should hold conf->device_lock already */
 static int release_stripe_list(struct r5conf *conf,
                               struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
        struct stripe_head *sh, *t;
        int count = 0;
@@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
  * This is because some failed devices may only affect one
  * of the two sections, and some non-in_sync devices may
  * be insync in the section most affected by failed devices.
+ *
+ * Most calls to this function hold &conf->device_lock. Calls
+ * in raid5_run() do not require the lock as no other threads
+ * have been started yet.
  */
 int raid5_calc_degraded(struct r5conf *conf)
 {
@@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
        return degraded;
 }
 
-static int has_failed(struct r5conf *conf)
+static bool has_failed(struct r5conf *conf)
 {
-       int degraded;
+       int degraded = conf->mddev->degraded;
 
-       if (conf->mddev->reshape_position == MaxSector)
-               return conf->mddev->degraded > conf->max_degraded;
+       if (test_bit(MD_BROKEN, &conf->mddev->flags))
+               return true;
 
-       degraded = raid5_calc_degraded(conf);
-       if (degraded > conf->max_degraded)
-               return 1;
-       return 0;
+       if (conf->mddev->reshape_position != MaxSector)
+               degraded = raid5_calc_degraded(conf);
+
+       return degraded > conf->max_degraded;
 }
 
 struct stripe_head *
@@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
        conf->slab_cache = NULL;
 }
 
+/*
+ * This helper wraps rcu_dereference_protected() and can be used when
+ * it is known that the nr_pending of the rdev is elevated.
+ */
+static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
+{
+       return rcu_dereference_protected(rdev,
+                       atomic_read(&rcu_access_pointer(rdev)->nr_pending));
+}
+
+/*
+ * This helper wraps rcu_dereference_protected() and should be used
+ * when it is known that the mddev_lock() is held. This is safe
+ * seeing raid5_remove_disk() has the same lock held.
+ */
+static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
+                                        struct md_rdev __rcu *rdev)
+{
+       return rcu_dereference_protected(rdev,
+                       lockdep_is_held(&mddev->reconfig_mutex));
+}
+
 static void raid5_end_read_request(struct bio * bi)
 {
        struct stripe_head *sh = bi->bi_private;
@@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
                 * In that case it moved down to 'rdev'.
                 * rdev is not removed until all requests are finished.
                 */
-               rdev = conf->disks[i].replacement;
+               rdev = rdev_pend_deref(conf->disks[i].replacement);
        if (!rdev)
-               rdev = conf->disks[i].rdev;
+               rdev = rdev_pend_deref(conf->disks[i].rdev);
 
        if (use_new_offset(conf, sh))
                s = sh->sector + rdev->new_data_offset;
@@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
 
        for (i = 0 ; i < disks; i++) {
                if (bi == &sh->dev[i].req) {
-                       rdev = conf->disks[i].rdev;
+                       rdev = rdev_pend_deref(conf->disks[i].rdev);
                        break;
                }
                if (bi == &sh->dev[i].rreq) {
-                       rdev = conf->disks[i].replacement;
+                       rdev = rdev_pend_deref(conf->disks[i].replacement);
                        if (rdev)
                                replacement = 1;
                        else
@@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
                                 * replaced it.  rdev is not removed
                                 * until all requests are finished.
                                 */
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                        break;
                }
        }
@@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
        unsigned long flags;
        pr_debug("raid456: error called\n");
 
+       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+               mdname(mddev), bdevname(rdev->bdev, b));
+
        spin_lock_irqsave(&conf->device_lock, flags);
+       set_bit(Faulty, &rdev->flags);
+       clear_bit(In_sync, &rdev->flags);
+       mddev->degraded = raid5_calc_degraded(conf);
 
-       if (test_bit(In_sync, &rdev->flags) &&
-           mddev->degraded == conf->max_degraded) {
-               /*
-                * Don't allow to achieve failed state
-                * Don't try to recover this device
-                */
+       if (has_failed(conf)) {
+               set_bit(MD_BROKEN, &conf->mddev->flags);
                conf->recovery_disabled = mddev->recovery_disabled;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               return;
+
+               pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
+                       mdname(mddev), mddev->degraded, conf->raid_disks);
+       } else {
+               pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+                       mdname(mddev), conf->raid_disks - mddev->degraded);
        }
 
-       set_bit(Faulty, &rdev->flags);
-       clear_bit(In_sync, &rdev->flags);
-       mddev->degraded = raid5_calc_degraded(conf);
        spin_unlock_irqrestore(&conf->device_lock, flags);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
        set_bit(Blocked, &rdev->flags);
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-       pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
-               "md/raid:%s: Operation continuing on %d devices.\n",
-               mdname(mddev),
-               bdevname(rdev->bdev, b),
-               mdname(mddev),
-               conf->raid_disks - mddev->degraded);
        r5c_update_on_rdev_error(mddev, rdev);
 }
 
@@ -5213,23 +5243,23 @@ finish:
                        struct r5dev *dev = &sh->dev[i];
                        if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
                                /* We own a safe reference to the rdev */
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                                if (!rdev_set_badblocks(rdev, sh->sector,
                                                        RAID5_STRIPE_SECTORS(conf), 0))
                                        md_error(conf->mddev, rdev);
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                        if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
-                               rdev = conf->disks[i].rdev;
+                               rdev = rdev_pend_deref(conf->disks[i].rdev);
                                rdev_clear_badblocks(rdev, sh->sector,
                                                     RAID5_STRIPE_SECTORS(conf), 0);
                                rdev_dec_pending(rdev, conf->mddev);
                        }
                        if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
-                               rdev = conf->disks[i].replacement;
+                               rdev = rdev_pend_deref(conf->disks[i].replacement);
                                if (!rdev)
                                        /* rdev have been moved down */
-                                       rdev = conf->disks[i].rdev;
+                                       rdev = rdev_pend_deref(conf->disks[i].rdev);
                                rdev_clear_badblocks(rdev, sh->sector,
                                                     RAID5_STRIPE_SECTORS(conf), 0);
                                rdev_dec_pending(rdev, conf->mddev);
@@ -5256,6 +5286,7 @@ finish:
 }
 
 static void raid5_activate_delayed(struct r5conf *conf)
+       __must_hold(&conf->device_lock)
 {
        if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
                while (!list_empty(&conf->delayed_list)) {
@@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
 }
 
 static void activate_bit_delay(struct r5conf *conf,
-       struct list_head *temp_inactive_list)
+               struct list_head *temp_inactive_list)
+       __must_hold(&conf->device_lock)
 {
-       /* device_lock is held */
        struct list_head head;
        list_add(&head, &conf->bitmap_list);
        list_del_init(&conf->bitmap_list);
@@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
  * handle_list.
  */
 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
+       __must_hold(&conf->device_lock)
 {
        struct stripe_head *sh, *tmp;
        struct list_head *handle_list = NULL;
@@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
         */
        rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
+               struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
 
                if (rdev == NULL || test_bit(Faulty, &rdev->flags))
                        still_degraded = 1;
@@ -6371,8 +6403,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
 static int handle_active_stripes(struct r5conf *conf, int group,
                                 struct r5worker *worker,
                                 struct list_head *temp_inactive_list)
-               __releases(&conf->device_lock)
-               __acquires(&conf->device_lock)
+               __must_hold(&conf->device_lock)
 {
        struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
        int i, batch_size = 0, hash;
@@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        int i;
        int group_cnt;
        struct r5worker_group *new_group;
-       int ret;
+       int ret = -ENOMEM;
 
        if (mddev->new_level != 5
            && mddev->new_level != 4
@@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        spin_lock_init(&conf->device_lock);
        seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
        mutex_init(&conf->cache_size_mutex);
+
        init_waitqueue_head(&conf->wait_for_quiescent);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
@@ -7242,7 +7274,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        rdev_for_each(rdev, mddev) {
                if (test_bit(Journal, &rdev->flags))
                        continue;
-               if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+               if (bdev_nonrot(rdev->bdev)) {
                        conf->batch_bio_dispatch = false;
                        break;
                }
@@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 
        conf->level = mddev->new_level;
        conf->chunk_sectors = mddev->new_chunk_sectors;
-       if (raid5_alloc_percpu(conf) != 0)
+       ret = raid5_alloc_percpu(conf);
+       if (ret)
                goto abort;
 
        pr_debug("raid456: run(%s) called.\n", mdname(mddev));
 
+       ret = -EIO;
        rdev_for_each(rdev, mddev) {
                raid_disk = rdev->raid_disk;
                if (raid_disk >= max_disks
@@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                if (test_bit(Replacement, &rdev->flags)) {
                        if (disk->replacement)
                                goto abort;
-                       disk->replacement = rdev;
+                       RCU_INIT_POINTER(disk->replacement, rdev);
                } else {
                        if (disk->rdev)
                                goto abort;
-                       disk->rdev = rdev;
+                       RCU_INIT_POINTER(disk->rdev, rdev);
                }
 
                if (test_bit(In_sync, &rdev->flags)) {
@@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if (grow_stripes(conf, conf->min_nr_stripes)) {
                pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
                        mdname(mddev), memory);
+               ret = -ENOMEM;
                goto abort;
        } else
                pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
@@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        conf->shrinker.count_objects = raid5_cache_count;
        conf->shrinker.batch = 128;
        conf->shrinker.flags = 0;
-       if (register_shrinker(&conf->shrinker)) {
+       ret = register_shrinker(&conf->shrinker);
+       if (ret) {
                pr_warn("md/raid:%s: couldn't register shrinker.\n",
                        mdname(mddev));
                goto abort;
@@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if (!conf->thread) {
                pr_warn("md/raid:%s: couldn't allocate thread.\n",
                        mdname(mddev));
+               ret = -ENOMEM;
                goto abort;
        }
 
        return conf;
 
  abort:
-       if (conf) {
+       if (conf)
                free_conf(conf);
-               return ERR_PTR(-EIO);
-       } else
-               return ERR_PTR(-ENOMEM);
+       return ERR_PTR(ret);
 }
 
 static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
@@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
 
        for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
             i++) {
-               rdev = conf->disks[i].rdev;
+               rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
                if (!rdev && conf->disks[i].replacement) {
                        /* The replacement is all we have yet */
-                       rdev = conf->disks[i].replacement;
+                       rdev = rdev_mdlock_deref(mddev,
+                                                conf->disks[i].replacement);
                        conf->disks[i].replacement = NULL;
                        clear_bit(Replacement, &rdev->flags);
-                       conf->disks[i].rdev = rdev;
+                       rcu_assign_pointer(conf->disks[i].rdev, rdev);
                }
                if (!rdev)
                        continue;
-               if (conf->disks[i].replacement &&
+               if (rcu_access_pointer(conf->disks[i].replacement) &&
                    conf->reshape_progress != MaxSector) {
                        /* replacements and reshape simply do not mix. */
                        pr_warn("md: cannot handle concurrent replacement and reshape.\n");
@@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
                 */
                stripe = stripe * PAGE_SIZE;
                stripe = roundup_pow_of_two(stripe);
-               mddev->queue->limits.discard_alignment = stripe;
                mddev->queue->limits.discard_granularity = stripe;
 
                blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
@@ -7776,14 +7811,10 @@ static int raid5_run(struct mddev *mddev)
                 * A better idea might be to turn DISCARD into WRITE_ZEROES
                 * requests, as that is required to be safe.
                 */
-               if (devices_handle_discard_safely &&
-                   mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
-                   mddev->queue->limits.discard_granularity >= stripe)
-                       blk_queue_flag_set(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
-               else
-                       blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
-                                               mddev->queue);
+               if (!devices_handle_discard_safely ||
+                   mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
+                   mddev->queue->limits.discard_granularity < stripe)
+                       blk_queue_max_discard_sectors(mddev->queue, 0);
 
                blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
        }
@@ -7832,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
 
 static void print_raid5_conf (struct r5conf *conf)
 {
+       struct md_rdev *rdev;
        int i;
-       struct disk_info *tmp;
 
        pr_debug("RAID conf printout:\n");
        if (!conf) {
@@ -7844,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
               conf->raid_disks,
               conf->raid_disks - conf->mddev->degraded);
 
+       rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
                char b[BDEVNAME_SIZE];
-               tmp = conf->disks + i;
-               if (tmp->rdev)
+               rdev = rcu_dereference(conf->disks[i].rdev);
+               if (rdev)
                        pr_debug(" disk %d, o:%d, dev:%s\n",
-                              i, !test_bit(Faulty, &tmp->rdev->flags),
-                              bdevname(tmp->rdev->bdev, b));
+                              i, !test_bit(Faulty, &rdev->flags),
+                              bdevname(rdev->bdev, b));
        }
+       rcu_read_unlock();
 }
 
 static int raid5_spare_active(struct mddev *mddev)
 {
        int i;
        struct r5conf *conf = mddev->private;
-       struct disk_info *tmp;
+       struct md_rdev *rdev, *replacement;
        int count = 0;
        unsigned long flags;
 
        for (i = 0; i < conf->raid_disks; i++) {
-               tmp = conf->disks + i;
-               if (tmp->replacement
-                   && tmp->replacement->recovery_offset == MaxSector
-                   && !test_bit(Faulty, &tmp->replacement->flags)
-                   && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+               rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
+               replacement = rdev_mdlock_deref(mddev,
+                                               conf->disks[i].replacement);
+               if (replacement
+                   && replacement->recovery_offset == MaxSector
+                   && !test_bit(Faulty, &replacement->flags)
+                   && !test_and_set_bit(In_sync, &replacement->flags)) {
                        /* Replacement has just become active. */
-                       if (!tmp->rdev
-                           || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+                       if (!rdev
+                           || !test_and_clear_bit(In_sync, &rdev->flags))
                                count++;
-                       if (tmp->rdev) {
+                       if (rdev) {
                                /* Replaced device not technically faulty,
                                 * but we need to be sure it gets removed
                                 * and never re-added.
                                 */
-                               set_bit(Faulty, &tmp->rdev->flags);
+                               set_bit(Faulty, &rdev->flags);
                                sysfs_notify_dirent_safe(
-                                       tmp->rdev->sysfs_state);
+                                       rdev->sysfs_state);
                        }
-                       sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
-               } else if (tmp->rdev
-                   && tmp->rdev->recovery_offset == MaxSector
-                   && !test_bit(Faulty, &tmp->rdev->flags)
-                   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+                       sysfs_notify_dirent_safe(replacement->sysfs_state);
+               } else if (rdev
+                   && rdev->recovery_offset == MaxSector
+                   && !test_bit(Faulty, &rdev->flags)
+                   && !test_and_set_bit(In_sync, &rdev->flags)) {
                        count++;
-                       sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(rdev->sysfs_state);
                }
        }
        spin_lock_irqsave(&conf->device_lock, flags);
@@ -7902,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct r5conf *conf = mddev->private;
        int err = 0;
        int number = rdev->raid_disk;
-       struct md_rdev **rdevp;
+       struct md_rdev __rcu **rdevp;
        struct disk_info *p = conf->disks + number;
+       struct md_rdev *tmp;
 
        print_raid5_conf(conf);
        if (test_bit(Journal, &rdev->flags) && conf->log) {
@@ -7921,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                log_exit(conf);
                return 0;
        }
-       if (rdev == p->rdev)
+       if (rdev == rcu_access_pointer(p->rdev))
                rdevp = &p->rdev;
-       else if (rdev == p->replacement)
+       else if (rdev == rcu_access_pointer(p->replacement))
                rdevp = &p->replacement;
        else
                return 0;
@@ -7943,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        if (!test_bit(Faulty, &rdev->flags) &&
            mddev->recovery_disabled != conf->recovery_disabled &&
            !has_failed(conf) &&
-           (!p->replacement || p->replacement == rdev) &&
+           (!rcu_access_pointer(p->replacement) ||
+            rcu_access_pointer(p->replacement) == rdev) &&
            number < conf->raid_disks) {
                err = -EBUSY;
                goto abort;
        }
        *rdevp = NULL;
        if (!test_bit(RemoveSynchronized, &rdev->flags)) {
+               lockdep_assert_held(&mddev->reconfig_mutex);
                synchronize_rcu();
                if (atomic_read(&rdev->nr_pending)) {
                        /* lost the race, try later */
                        err = -EBUSY;
-                       *rdevp = rdev;
+                       rcu_assign_pointer(*rdevp, rdev);
                }
        }
        if (!err) {
@@ -7962,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                if (err)
                        goto abort;
        }
-       if (p->replacement) {
+
+       tmp = rcu_access_pointer(p->replacement);
+       if (tmp) {
                /* We must have just cleared 'rdev' */
-               p->rdev = p->replacement;
-               clear_bit(Replacement, &p->replacement->flags);
+               rcu_assign_pointer(p->rdev, tmp);
+               clear_bit(Replacement, &tmp->flags);
                smp_mb(); /* Make sure other CPUs may see both as identical
                           * but will never see neither - if they are careful
                           */
-               p->replacement = NULL;
+               rcu_assign_pointer(p->replacement, NULL);
 
                if (!err)
-                       err = log_modify(conf, p->rdev, true);
+                       err = log_modify(conf, tmp, true);
        }
 
        clear_bit(WantReplacement, &rdev->flags);
@@ -7988,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        int ret, err = -EEXIST;
        int disk;
        struct disk_info *p;
+       struct md_rdev *tmp;
        int first = 0;
        int last = conf->raid_disks - 1;
 
@@ -8045,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        }
        for (disk = first; disk <= last; disk++) {
                p = conf->disks + disk;
-               if (test_bit(WantReplacement, &p->rdev->flags) &&
+               tmp = rdev_mdlock_deref(mddev, p->rdev);
+               if (test_bit(WantReplacement, &tmp->flags) &&
                    p->replacement == NULL) {
                        clear_bit(In_sync, &rdev->flags);
                        set_bit(Replacement, &rdev->flags);
@@ -8336,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
 static void raid5_finish_reshape(struct mddev *mddev)
 {
        struct r5conf *conf = mddev->private;
+       struct md_rdev *rdev;
 
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
@@ -8347,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
                        for (d = conf->raid_disks ;
                             d < conf->raid_disks - mddev->delta_disks;
                             d++) {
-                               struct md_rdev *rdev = conf->disks[d].rdev;
+                               rdev = rdev_mdlock_deref(mddev,
+                                                        conf->disks[d].rdev);
                                if (rdev)
                                        clear_bit(In_sync, &rdev->flags);
-                               rdev = conf->disks[d].replacement;
+                               rdev = rdev_mdlock_deref(mddev,
+                                               conf->disks[d].replacement);
                                if (rdev)
                                        clear_bit(In_sync, &rdev->flags);
                        }
index 9e8486a9e4451df367b9ececb15ff34476d535b8..638d29863503b836899d72dcf54be1af1b88b6b3 100644 (file)
@@ -473,7 +473,8 @@ enum {
  */
 
 struct disk_info {
-       struct md_rdev  *rdev, *replacement;
+       struct md_rdev  __rcu *rdev;
+       struct md_rdev  __rcu *replacement;
        struct page     *extra_page; /* extra page to use in prexor */
 };
 
@@ -560,6 +561,16 @@ struct r5pending_data {
        struct bio_list bios;
 };
 
+struct raid5_percpu {
+       struct page     *spare_page; /* Used when checking P/Q in raid6 */
+       void            *scribble;  /* space for constructing buffer
+                                    * lists and performing address
+                                    * conversions
+                                    */
+       int             scribble_obj_size;
+       local_lock_t    lock;
+};
+
 struct r5conf {
        struct hlist_head       *stripe_hashtbl;
        /* only protect corresponding hash list and inactive_list */
@@ -635,15 +646,7 @@ struct r5conf {
                                            */
        int                     recovery_disabled;
        /* per cpu variables */
-       struct raid5_percpu {
-               struct page     *spare_page; /* Used when checking P/Q in raid6 */
-               void            *scribble;  /* space for constructing buffer
-                                            * lists and performing address
-                                            * conversions
-                                            */
-               int             scribble_obj_size;
-               local_lock_t    lock;
-       } __percpu *percpu;
+       struct raid5_percpu __percpu *percpu;
        int scribble_disks;
        int scribble_sectors;
        struct hlist_node node;
index 28f2bafc14d281ee3723771ae7a9ab225e020033..5afa373e534f23052caccd735e1bc92f23051a19 100644 (file)
@@ -7,6 +7,7 @@ comment "NXP media platform drivers"
 config VIDEO_IMX_MIPI_CSIS
        tristate "NXP MIPI CSI-2 CSIS receiver found on i.MX7 and i.MX8 models"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on VIDEO_DEV
        select MEDIA_CONTROLLER
        select V4L2_FWNODE
        select VIDEO_V4L2_SUBDEV_API
index 4de5e8d2b261bd9939ec0a20f2e54c75b8fb45b2..3d3d1062e2122b49d166c3fe7e3b045ff370e740 100644 (file)
@@ -892,7 +892,7 @@ static int rga_probe(struct platform_device *pdev)
        }
        rga->dst_mmu_pages =
                (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
-       if (rga->dst_mmu_pages) {
+       if (!rga->dst_mmu_pages) {
                ret = -ENOMEM;
                goto free_src_pages;
        }
index 47029746b89eedf0d7a6beedb06a2f60ac5865c3..0de587b412d4e7deaa715eeca94ab17cd273f6dd 100644 (file)
@@ -77,16 +77,16 @@ err_mutex_unlock:
 }
 
 static const struct si2157_tuner_info si2157_tuners[] = {
-       { SI2141, false, 0x60, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE },
-       { SI2141, false, 0x61, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE },
-       { SI2146, false, 0x11, SI2146_11_FIRMWARE, NULL },
-       { SI2147, false, 0x50, SI2147_50_FIRMWARE, NULL },
-       { SI2148, true,  0x32, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE },
-       { SI2148, true,  0x33, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE },
-       { SI2157, false, 0x50, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE },
-       { SI2158, false, 0x50, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE },
-       { SI2158, false, 0x51, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE },
-       { SI2177, false, 0x50, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE },
+       { SI2141, 0x60, false, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE },
+       { SI2141, 0x61, false, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE },
+       { SI2146, 0x11, false, SI2146_11_FIRMWARE, NULL },
+       { SI2147, 0x50, false, SI2147_50_FIRMWARE, NULL },
+       { SI2148, 0x32, true,  SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE },
+       { SI2148, 0x33, true,  SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE },
+       { SI2157, 0x50, false, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE },
+       { SI2158, 0x50, false, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE },
+       { SI2158, 0x51, false, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE },
+       { SI2177, 0x50, false, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE },
 };
 
 static int si2157_load_firmware(struct dvb_frontend *fe,
@@ -178,7 +178,7 @@ static int si2157_find_and_load_firmware(struct dvb_frontend *fe)
                }
        }
 
-       if (!fw_name && !fw_alt_name) {
+       if (required && !fw_name && !fw_alt_name) {
                dev_err(&client->dev,
                        "unknown chip version Si21%d-%c%c%c ROM 0x%02x\n",
                        part_id, cmd.args[1], cmd.args[3], cmd.args[4], rom_id);
index c267283b01fdaf74c2ef094926fd65da72af0ca5..e749dcb3ddea93335a6ca5f5dd6e1de0e9a52418 100644 (file)
@@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev)
        smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0);
 
        ebi->smc.regmap = syscon_node_to_regmap(smc_np);
-       if (IS_ERR(ebi->smc.regmap))
-               return PTR_ERR(ebi->smc.regmap);
+       if (IS_ERR(ebi->smc.regmap)) {
+               ret = PTR_ERR(ebi->smc.regmap);
+               goto put_node;
+       }
 
        ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np);
-       if (IS_ERR(ebi->smc.layout))
-               return PTR_ERR(ebi->smc.layout);
+       if (IS_ERR(ebi->smc.layout)) {
+               ret = PTR_ERR(ebi->smc.layout);
+               goto put_node;
+       }
 
        ebi->smc.clk = of_clk_get(smc_np, 0);
        if (IS_ERR(ebi->smc.clk)) {
-               if (PTR_ERR(ebi->smc.clk) != -ENOENT)
-                       return PTR_ERR(ebi->smc.clk);
+               if (PTR_ERR(ebi->smc.clk) != -ENOENT) {
+                       ret = PTR_ERR(ebi->smc.clk);
+                       goto put_node;
+               }
 
                ebi->smc.clk = NULL;
        }
+       of_node_put(smc_np);
        ret = clk_prepare_enable(ebi->smc.clk);
        if (ret)
                return ret;
@@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev)
        }
 
        return of_platform_populate(np, NULL, NULL, dev);
+
+put_node:
+       of_node_put(smc_np);
+       return ret;
 }
 
 static __maybe_unused int atmel_ebi_resume(struct device *dev)
index 2f6939da21cdcef233be890d16fb8aadfe13a3ee..e83b61c925a4fdd061c89f8901a81bfeb50a7c3a 100644 (file)
@@ -287,8 +287,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
        }
 
        /* legacy dts may still use "simple-bus" compatible */
-       ret = of_platform_populate(dev->dev.of_node, NULL, NULL,
-                                       &dev->dev);
+       ret = of_platform_default_populate(dev->dev.of_node, NULL, &dev->dev);
        if (ret)
                goto err_free_nandirq;
 
index e4cc64f560196d5579839e53dc7fc6d100c5f48e..019a0822bde0e4134c4910653af0dda8d405d85b 100644 (file)
@@ -164,25 +164,39 @@ static const struct regmap_access_table rpcif_volatile_table = {
 
 
 /*
- * Custom accessor functions to ensure SMRDR0 and SMWDR0 are always accessed
- * with proper width. Requires SMENR_SPIDE to be correctly set before!
+ * Custom accessor functions to ensure SM[RW]DR[01] are always accessed with
+ * proper width.  Requires rpcif.xfer_size to be correctly set before!
  */
 static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
        struct rpcif *rpc = context;
 
-       if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) {
-               u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF);
-
-               if (spide == 0x8) {
+       switch (reg) {
+       case RPCIF_SMRDR0:
+       case RPCIF_SMWDR0:
+               switch (rpc->xfer_size) {
+               case 1:
                        *val = readb(rpc->base + reg);
                        return 0;
-               } else if (spide == 0xC) {
+
+               case 2:
                        *val = readw(rpc->base + reg);
                        return 0;
-               } else if (spide != 0xF) {
+
+               case 4:
+               case 8:
+                       *val = readl(rpc->base + reg);
+                       return 0;
+
+               default:
                        return -EILSEQ;
                }
+
+       case RPCIF_SMRDR1:
+       case RPCIF_SMWDR1:
+               if (rpc->xfer_size != 8)
+                       return -EILSEQ;
+               break;
        }
 
        *val = readl(rpc->base + reg);
@@ -193,18 +207,34 @@ static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val)
 {
        struct rpcif *rpc = context;
 
-       if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) {
-               u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF);
-
-               if (spide == 0x8) {
+       switch (reg) {
+       case RPCIF_SMWDR0:
+               switch (rpc->xfer_size) {
+               case 1:
                        writeb(val, rpc->base + reg);
                        return 0;
-               } else if (spide == 0xC) {
+
+               case 2:
                        writew(val, rpc->base + reg);
                        return 0;
-               } else if (spide != 0xF) {
+
+               case 4:
+               case 8:
+                       writel(val, rpc->base + reg);
+                       return 0;
+
+               default:
                        return -EILSEQ;
                }
+
+       case RPCIF_SMWDR1:
+               if (rpc->xfer_size != 8)
+                       return -EILSEQ;
+               break;
+
+       case RPCIF_SMRDR0:
+       case RPCIF_SMRDR1:
+               return -EPERM;
        }
 
        writel(val, rpc->base + reg);
@@ -469,6 +499,7 @@ int rpcif_manual_xfer(struct rpcif *rpc)
 
                        smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes));
                        regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
+                       rpc->xfer_size = nbytes;
 
                        memcpy(data, rpc->buffer + pos, nbytes);
                        if (nbytes == 8) {
@@ -533,6 +564,7 @@ int rpcif_manual_xfer(struct rpcif *rpc)
                        regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
                        regmap_write(rpc->regmap, RPCIF_SMCR,
                                     rpc->smcr | RPCIF_SMCR_SPIE);
+                       rpc->xfer_size = nbytes;
                        ret = wait_msg_xfer_end(rpc);
                        if (ret)
                                goto err_out;
@@ -651,6 +683,7 @@ static int rpcif_probe(struct platform_device *pdev)
        struct platform_device *vdev;
        struct device_node *flash;
        const char *name;
+       int ret;
 
        flash = of_get_next_child(pdev->dev.of_node, NULL);
        if (!flash) {
@@ -674,7 +707,14 @@ static int rpcif_probe(struct platform_device *pdev)
                return -ENOMEM;
        vdev->dev.parent = &pdev->dev;
        platform_set_drvdata(pdev, vdev);
-       return platform_device_add(vdev);
+
+       ret = platform_device_add(vdev);
+       if (ret) {
+               platform_device_put(vdev);
+               return ret;
+       }
+
+       return 0;
 }
 
 static int rpcif_remove(struct platform_device *pdev)
index e90adfa5795050bdd41a48005d088e793d9bcd24..9b3ba2df71c75b913768139272b00d232151a174 100644 (file)
@@ -6658,13 +6658,13 @@ static int mpt_summary_proc_show(struct seq_file *m, void *v)
 static int mpt_version_proc_show(struct seq_file *m, void *v)
 {
        u8       cb_idx;
-       int      scsi, fc, sas, lan, ctl, targ, dmp;
+       int      scsi, fc, sas, lan, ctl, targ;
        char    *drvname;
 
        seq_printf(m, "%s-%s\n", "mptlinux", MPT_LINUX_VERSION_COMMON);
        seq_printf(m, "  Fusion MPT base driver\n");
 
-       scsi = fc = sas = lan = ctl = targ = dmp = 0;
+       scsi = fc = sas = lan = ctl = targ = 0;
        for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
                drvname = NULL;
                if (MptCallbacks[cb_idx]) {
index 91f96abbb3f9f58165329638e6e7db9ac11918e1..8d169a35cf130c231255ee84a6de52aade3e2d84 100644 (file)
@@ -31,6 +31,8 @@
  */
 
 #define        FM25_SN_LEN     8               /* serial number length */
+#define EE_MAXADDRLEN  3               /* 24 bit addresses, up to 2 MBytes */
+
 struct at25_data {
        struct spi_eeprom       chip;
        struct spi_device       *spi;
@@ -39,6 +41,7 @@ struct at25_data {
        struct nvmem_config     nvmem_config;
        struct nvmem_device     *nvmem;
        u8 sernum[FM25_SN_LEN];
+       u8 command[EE_MAXADDRLEN + 1];
 };
 
 #define        AT25_WREN       0x06            /* latch the write enable */
@@ -61,8 +64,6 @@ struct at25_data {
 
 #define        FM25_ID_LEN     9               /* ID length */
 
-#define EE_MAXADDRLEN  3               /* 24 bit addresses, up to 2 MBytes */
-
 /*
  * Specs often allow 5ms for a page write, sometimes 20ms;
  * it's important to recover from write timeouts.
@@ -78,7 +79,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
 {
        struct at25_data *at25 = priv;
        char *buf = val;
-       u8                      command[EE_MAXADDRLEN + 1];
        u8                      *cp;
        ssize_t                 status;
        struct spi_transfer     t[2];
@@ -92,12 +92,15 @@ static int at25_ee_read(void *priv, unsigned int offset,
        if (unlikely(!count))
                return -EINVAL;
 
-       cp = command;
+       cp = at25->command;
 
        instr = AT25_READ;
        if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
                if (offset >= BIT(at25->addrlen * 8))
                        instr |= AT25_INSTR_BIT3;
+
+       mutex_lock(&at25->lock);
+
        *cp++ = instr;
 
        /* 8/16/24-bit address is written MSB first */
@@ -116,7 +119,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
        spi_message_init(&m);
        memset(t, 0, sizeof(t));
 
-       t[0].tx_buf = command;
+       t[0].tx_buf = at25->command;
        t[0].len = at25->addrlen + 1;
        spi_message_add_tail(&t[0], &m);
 
@@ -124,8 +127,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
        t[1].len = count;
        spi_message_add_tail(&t[1], &m);
 
-       mutex_lock(&at25->lock);
-
        /*
         * Read it all at once.
         *
@@ -152,7 +153,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
        spi_message_init(&m);
        memset(t, 0, sizeof(t));
 
-       t[0].tx_buf = &command;
+       t[0].tx_buf = at25->command;
        t[0].len = 1;
        spi_message_add_tail(&t[0], &m);
 
@@ -162,6 +163,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
 
        mutex_lock(&at25->lock);
 
+       at25->command[0] = command;
+
        status = spi_sync(at25->spi, &m);
        dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
 
index e008d82e4ba3a276de80a3fa81e9a23d397ddb91..a13506dd81194532fa047da82423fe9946d8a2a5 100644 (file)
@@ -111,10 +111,10 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 
        if (contiguous) {
                if (is_power_of_2(page_size))
-                       paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
-                                                               total_size, NULL, page_size);
+                       paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
+                                                                    total_size, NULL, page_size);
                else
-                       paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
+                       paddr = gen_pool_alloc(vm->dram_pg_pool, total_size);
                if (!paddr) {
                        dev_err(hdev->dev,
                                "failed to allocate %llu contiguous pages with total size of %llu\n",
@@ -150,12 +150,12 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
                for (i = 0 ; i < num_pgs ; i++) {
                        if (is_power_of_2(page_size))
                                phys_pg_pack->pages[i] =
-                                               (u64) gen_pool_dma_alloc_align(vm->dram_pg_pool,
-                                                                               page_size, NULL,
-                                                                               page_size);
+                                       (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool,
+                                                                           page_size, NULL,
+                                                                           page_size);
                        else
-                               phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool,
-                                                                               page_size);
+                               phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool,
+                                                                       page_size);
                        if (!phys_pg_pack->pages[i]) {
                                dev_err(hdev->dev,
                                        "Failed to allocate device memory (out of memory)\n");
index 4e67c1403cc93bb48302fe8747deec8a7195cea8..506dc900f5c7c391ef8c04dd0158754c0feba705 100644 (file)
@@ -993,7 +993,7 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
                return -EEXIST;
 
        md->reset_done |= type;
-       err = mmc_hw_reset(host);
+       err = mmc_hw_reset(host->card);
        /* Ensure we switch back to the correct partition */
        if (err) {
                struct mmc_blk_data *main_md =
@@ -1880,6 +1880,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq)
               brq->data.error || brq->cmd.resp[0] & CMD_ERRORS;
 }
 
+static int mmc_spi_err_check(struct mmc_card *card)
+{
+       u32 status = 0;
+       int err;
+
+       /*
+        * SPI does not have a TRAN state we have to wait on, instead the
+        * card is ready again when it no longer holds the line LOW.
+        * We still have to ensure two things here before we know the write
+        * was successful:
+        * 1. The card has not disconnected during busy and we actually read our
+        * own pull-up, thinking it was still connected, so ensure it
+        * still responds.
+        * 2. Check for any error bits, in particular R1_SPI_IDLE to catch a
+        * just reconnected card after being disconnected during busy.
+        */
+       err = __mmc_send_status(card, &status, 0);
+       if (err)
+               return err;
+       /* All R1 and R2 bits of SPI are errors in our case */
+       if (status)
+               return -EIO;
+       return 0;
+}
+
 static int mmc_blk_busy_cb(void *cb_data, bool *busy)
 {
        struct mmc_blk_busy_data *data = cb_data;
@@ -1903,9 +1928,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
        struct mmc_blk_busy_data cb_data;
        int err;
 
-       if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ)
+       if (rq_data_dir(req) == READ)
                return 0;
 
+       if (mmc_host_is_spi(card->host)) {
+               err = mmc_spi_err_check(card);
+               if (err)
+                       mqrq->brq.data.bytes_xfered = 0;
+               return err;
+       }
+
        cb_data.card = card;
        cb_data.status = 0;
        err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS,
@@ -2350,6 +2382,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
        struct mmc_blk_data *md;
        int devidx, ret;
        char cap_str[10];
+       bool cache_enabled = false;
+       bool fua_enabled = false;
 
        devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL);
        if (devidx < 0) {
@@ -2429,13 +2463,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
                        md->flags |= MMC_BLK_CMD23;
        }
 
-       if (mmc_card_mmc(card) &&
-           md->flags & MMC_BLK_CMD23 &&
+       if (md->flags & MMC_BLK_CMD23 &&
            ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
             card->ext_csd.rel_sectors)) {
                md->flags |= MMC_BLK_REL_WR;
-               blk_queue_write_cache(md->queue.queue, true, true);
+               fua_enabled = true;
+               cache_enabled = true;
        }
+       if (mmc_cache_enabled(card->host))
+               cache_enabled  = true;
+
+       blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
 
        string_get_size((u64)size, 512, STRING_UNITS_2,
                        cap_str, sizeof(cap_str));
index 368f10405e132ceedfb722278271bf99e422afeb..c6ae16d40766804357ef51dc01ec50b7a9716a21 100644 (file)
@@ -1995,7 +1995,7 @@ static void mmc_hw_reset_for_init(struct mmc_host *host)
 
 /**
  * mmc_hw_reset - reset the card in hardware
- * @host: MMC host to which the card is attached
+ * @card: card to be reset
  *
  * Hard reset the card. This function is only for upper layers, like the
  * block layer or card drivers. You cannot use it in host drivers (struct
@@ -2003,8 +2003,9 @@ static void mmc_hw_reset_for_init(struct mmc_host *host)
  *
  * Return: 0 on success, -errno on failure
  */
-int mmc_hw_reset(struct mmc_host *host)
+int mmc_hw_reset(struct mmc_card *card)
 {
+       struct mmc_host *host = card->host;
        int ret;
 
        ret = host->bus_ops->hw_reset(host);
index e7ea45386c22f7e1880a521a281397893a3a6612..efa95dc4fc4eee88d91ee3792fffa16201554ddf 100644 (file)
@@ -1384,13 +1384,17 @@ static int mmc_select_hs400es(struct mmc_card *card)
                goto out_err;
        }
 
+       /*
+        * Bump to HS timing and frequency. Some cards don't handle
+        * SEND_STATUS reliably at the initial frequency.
+        */
        mmc_set_timing(host, MMC_TIMING_MMC_HS);
+       mmc_set_bus_speed(card);
+
        err = mmc_switch_status(card, true);
        if (err)
                goto out_err;
 
-       mmc_set_clock(host, card->ext_csd.hs_max_dtr);
-
        /* Switch card to DDR with strobe bit */
        val = EXT_CSD_DDR_BUS_WIDTH_8 | EXT_CSD_BUS_WIDTH_STROBE;
        err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
@@ -1448,7 +1452,7 @@ out_err:
 static int mmc_select_hs200(struct mmc_card *card)
 {
        struct mmc_host *host = card->host;
-       unsigned int old_timing, old_signal_voltage;
+       unsigned int old_timing, old_signal_voltage, old_clock;
        int err = -EINVAL;
        u8 val;
 
@@ -1479,8 +1483,17 @@ static int mmc_select_hs200(struct mmc_card *card)
                                   false, true, MMC_CMD_RETRIES);
                if (err)
                        goto err;
+
+               /*
+                * Bump to HS timing and frequency. Some cards don't handle
+                * SEND_STATUS reliably at the initial frequency.
+                * NB: We can't move to full (HS200) speeds until after we've
+                * successfully switched over.
+                */
                old_timing = host->ios.timing;
+               old_clock = host->ios.clock;
                mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+               mmc_set_clock(card->host, card->ext_csd.hs_max_dtr);
 
                /*
                 * For HS200, CRC errors are not a reliable way to know the
@@ -1493,8 +1506,10 @@ static int mmc_select_hs200(struct mmc_card *card)
                 * mmc_select_timing() assumes timing has not changed if
                 * it is a switch error.
                 */
-               if (err == -EBADMSG)
+               if (err == -EBADMSG) {
+                       mmc_set_clock(host, old_clock);
                        mmc_set_timing(host, old_timing);
+               }
        }
 err:
        if (err) {
index 180d7e9d3400a5305b3d5d9b416b9f0942212762..81c55bfd6e0c263e5762c07e814874b016cb7b86 100644 (file)
@@ -21,7 +21,7 @@
 
 #define MMC_BKOPS_TIMEOUT_MS           (120 * 1000) /* 120s */
 #define MMC_SANITIZE_TIMEOUT_MS                (240 * 1000) /* 240s */
-#define MMC_OP_COND_PERIOD_US          (1 * 1000) /* 1ms */
+#define MMC_OP_COND_PERIOD_US          (4 * 1000) /* 4ms */
 #define MMC_OP_COND_TIMEOUT_MS         1000 /* 1s */
 
 static const u8 tuning_blk_pattern_4bit[] = {
index e6a2fd2c6d5c94b62374314c3cd11ed19502abdb..8d9bceeff9864b02c7488432e0cb5d8399b70608 100644 (file)
@@ -2325,10 +2325,9 @@ static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test)
 static int mmc_test_reset(struct mmc_test_card *test)
 {
        struct mmc_card *card = test->card;
-       struct mmc_host *host = card->host;
        int err;
 
-       err = mmc_hw_reset(host);
+       err = mmc_hw_reset(card);
        if (!err) {
                /*
                 * Reset will re-enable the card's command queue, but tests
index c69b2d9df6f16df48f5ded2e9df6a49cf0ff9787..a3d4460055716f2bc7db856b5632b85f891be67d 100644 (file)
@@ -183,14 +183,13 @@ static void mmc_queue_setup_discard(struct request_queue *q,
        if (!max_discard)
                return;
 
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
        blk_queue_max_discard_sectors(q, max_discard);
        q->limits.discard_granularity = card->pref_erase << 9;
        /* granularity must not be greater than max. discard */
        if (card->pref_erase > max_discard)
                q->limits.discard_granularity = SECTOR_SIZE;
        if (mmc_can_secure_erase_trim(card))
-               blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+               blk_queue_max_secure_erase_sectors(q, max_discard);
 }
 
 static unsigned short mmc_get_max_segments(struct mmc_host *host)
index 9c13f2c313658b5dd05ef23b7796b42a93c8117a..4566d7fc9055af49e4f4b98bb4edf65856c9d4c4 100644 (file)
@@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
         * excepted the last element which has no constraint on idmasize
         */
        for_each_sg(data->sg, sg, data->sg_len - 1, i) {
-               if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) ||
-                   !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) {
+               if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
+                   !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
                        dev_err(mmc_dev(host->mmc),
                                "unaligned scatterlist: ofst:%x length:%d\n",
                                data->sg->offset, data->sg->length);
@@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
                }
        }
 
-       if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) {
+       if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
                dev_err(mmc_dev(host->mmc),
                        "unaligned last scatterlist: ofst:%x length:%d\n",
                        data->sg->offset, data->sg->length);
index 2797a9c0f17d86f3ee774a45d02d3566f4ed723e..ddb5ca2f559e2b9b10d985e39ac803643807818f 100644 (file)
@@ -144,9 +144,9 @@ static unsigned int renesas_sdhi_clk_update(struct tmio_mmc_host *host,
                return clk_get_rate(priv->clk);
 
        if (priv->clkh) {
+               /* HS400 with 4TAP needs different clock settings */
                bool use_4tap = priv->quirks && priv->quirks->hs400_4taps;
-               bool need_slow_clkh = (host->mmc->ios.timing == MMC_TIMING_UHS_SDR104) ||
-                                     (host->mmc->ios.timing == MMC_TIMING_MMC_HS400);
+               bool need_slow_clkh = host->mmc->ios.timing == MMC_TIMING_MMC_HS400;
                clkh_shift = use_4tap && need_slow_clkh ? 1 : 2;
                ref_clk = priv->clkh;
        }
@@ -396,10 +396,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc)
                        SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) |
                        sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2));
 
-       /* Set the sampling clock selection range of HS400 mode */
        sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL,
                       SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN |
-                      0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT);
+                      sd_scc_read32(host, priv,
+                                    SH_MOBILE_SDHI_SCC_DTCNTL));
 
        /* Avoid bad TAP */
        if (bad_taps & BIT(priv->tap_set)) {
index 50c71e0ba5e4e8672d05e87103ad3b58d5272153..ff9f5b63c337ec59698fdf1b89383a8c0cf639b3 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/interconnect.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/reset.h>
 
 #include "sdhci-pltfm.h"
 #include "cqhci.h"
@@ -2482,6 +2483,43 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev,
        of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config);
 }
 
+static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host)
+{
+       struct reset_control *reset;
+       int ret = 0;
+
+       reset = reset_control_get_optional_exclusive(dev, NULL);
+       if (IS_ERR(reset))
+               return dev_err_probe(dev, PTR_ERR(reset),
+                               "unable to acquire core_reset\n");
+
+       if (!reset)
+               return ret;
+
+       ret = reset_control_assert(reset);
+       if (ret) {
+               reset_control_put(reset);
+               return dev_err_probe(dev, ret, "core_reset assert failed\n");
+       }
+
+       /*
+        * The hardware requirement for delay between assert/deassert
+        * is at least 3-4 sleep clock (32.7KHz) cycles, which comes to
+        * ~125us (4/32768). To be on the safe side add 200us delay.
+        */
+       usleep_range(200, 210);
+
+       ret = reset_control_deassert(reset);
+       if (ret) {
+               reset_control_put(reset);
+               return dev_err_probe(dev, ret, "core_reset deassert failed\n");
+       }
+
+       usleep_range(200, 210);
+       reset_control_put(reset);
+
+       return ret;
+}
 
 static int sdhci_msm_probe(struct platform_device *pdev)
 {
@@ -2529,6 +2567,10 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 
        msm_host->saved_tuning_phase = INVALID_TUNING_PHASE;
 
+       ret = sdhci_msm_gcc_reset(&pdev->dev, host);
+       if (ret)
+               goto pltfm_free;
+
        /* Setup SDCC bus voter clock. */
        msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus");
        if (!IS_ERR(msm_host->bus_clk)) {
index 666cee4c7f7c672de22a86668987d5359514756d..08e838400b526b42020fce0e869d568a2ddfd014 100644 (file)
@@ -241,16 +241,6 @@ static void xenon_voltage_switch(struct sdhci_host *host)
 {
        /* Wait for 5ms after set 1.8V signal enable bit */
        usleep_range(5000, 5500);
-
-       /*
-        * For some reason the controller's Host Control2 register reports
-        * the bit representing 1.8V signaling as 0 when read after it was
-        * written as 1. Subsequent read reports 1.
-        *
-        * Since this may cause some issues, do an empty read of the Host
-        * Control2 register here to circumvent this.
-        */
-       sdhci_readw(host, SDHCI_HOST_CONTROL2);
 }
 
 static unsigned int xenon_get_max_clock(struct sdhci_host *host)
index c62afd21269256bbc1f67e5e54d6acae92268a20..46f9e2923d869807f48d31ed56a0967175f91e2c 100644 (file)
@@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
                pdes[i].buf_addr_ptr1 =
                        cpu_to_le32(sg_dma_address(&data->sg[i]) >>
                                    host->cfg->idma_des_shift);
-               pdes[i].buf_addr_ptr2 = cpu_to_le32((u32)next_desc >>
-                                                   host->cfg->idma_des_shift);
+               pdes[i].buf_addr_ptr2 =
+                       cpu_to_le32(next_desc >>
+                                   host->cfg->idma_des_shift);
        }
 
        pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD);
index 64d2b093f114b6efdca91fc45ab9d19b7873fc7b..f73172111465501eb043fdca37e612f7655156f1 100644 (file)
@@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
        blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
        if (tr->discard) {
-               blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
                blk_queue_max_discard_sectors(new->rq, UINT_MAX);
                new->rq->limits.discard_granularity = tr->blksize;
        }
index e7df3dac705e24715f5077a9fbf094a7ca394ea0..49ab3448b9b12deaccddd71ae52d677bc9c024ee 100644 (file)
@@ -43,6 +43,7 @@
 
 struct mtk_ecc_caps {
        u32 err_mask;
+       u32 err_shift;
        const u8 *ecc_strength;
        const u32 *ecc_regs;
        u8 num_ecc_strength;
@@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = {
 };
 
 static const u8 ecc_strength_mt7622[] = {
-       4, 6, 8, 10, 12, 14, 16
+       4, 6, 8, 10, 12
 };
 
 enum mtk_ecc_regs {
@@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
        for (i = 0; i < sectors; i++) {
                offset = (i >> 2) << 2;
                err = readl(ecc->regs + ECC_DECENUM0 + offset);
-               err = err >> ((i % 4) * 8);
+               err = err >> ((i % 4) * ecc->caps->err_shift);
                err &= ecc->caps->err_mask;
                if (err == ecc->caps->err_mask) {
                        /* uncorrectable errors */
@@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits);
 
 static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
        .err_mask = 0x3f,
+       .err_shift = 8,
        .ecc_strength = ecc_strength_mt2701,
        .ecc_regs = mt2701_ecc_regs,
        .num_ecc_strength = 20,
@@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
 
 static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
        .err_mask = 0x7f,
+       .err_shift = 8,
        .ecc_strength = ecc_strength_mt2712,
        .ecc_regs = mt2712_ecc_regs,
        .num_ecc_strength = 23,
@@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
 };
 
 static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = {
-       .err_mask = 0x3f,
+       .err_mask = 0x1f,
+       .err_shift = 5,
        .ecc_strength = ecc_strength_mt7622,
        .ecc_regs = mt7622_ecc_regs,
-       .num_ecc_strength = 7,
+       .num_ecc_strength = 5,
        .ecc_mode_shift = 4,
        .parity_bits = 13,
        .pg_irq_sel = 0,
index 1a77542c6d67c6e570beea4013b33a48eb81eacd..048b255faa769c123f022028209836365f7f1743 100644 (file)
@@ -2651,10 +2651,23 @@ static int qcom_nand_attach_chip(struct nand_chip *chip)
        ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
 
        mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops);
+       /* Free the initially allocated BAM transaction for reading the ONFI params */
+       if (nandc->props->is_bam)
+               free_bam_transaction(nandc);
 
        nandc->max_cwperpage = max_t(unsigned int, nandc->max_cwperpage,
                                     cwperpage);
 
+       /* Now allocate the BAM transaction based on updated max_cwperpage */
+       if (nandc->props->is_bam) {
+               nandc->bam_txn = alloc_bam_transaction(nandc);
+               if (!nandc->bam_txn) {
+                       dev_err(nandc->dev,
+                               "failed to allocate bam transaction\n");
+                       return -ENOMEM;
+               }
+       }
+
        /*
         * DATA_UD_BYTES varies based on whether the read/write command protects
         * spare data with ECC too. We protect spare data by default, so we set
@@ -2955,17 +2968,6 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
        if (ret)
                return ret;
 
-       if (nandc->props->is_bam) {
-               free_bam_transaction(nandc);
-               nandc->bam_txn = alloc_bam_transaction(nandc);
-               if (!nandc->bam_txn) {
-                       dev_err(nandc->dev,
-                               "failed to allocate bam transaction\n");
-                       nand_cleanup(chip);
-                       return -ENOMEM;
-               }
-       }
-
        ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0);
        if (ret)
                nand_cleanup(chip);
index b85b9c6fcc4249f58d36047c01175bfb1ab6f661..a278829469d610d837ccd8b59d335590108341f7 100644 (file)
@@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
        dma_addr_t dma_addr;
        dma_cookie_t cookie;
        uint32_t reg;
-       int ret;
+       int ret = 0;
+       unsigned long time_left;
 
        if (dir == DMA_FROM_DEVICE) {
                chan = flctl->chan_fifo0_rx;
@@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
                goto out;
        }
 
-       ret =
+       time_left =
        wait_for_completion_timeout(&flctl->dma_complete,
                                msecs_to_jiffies(3000));
 
-       if (ret <= 0) {
+       if (time_left == 0) {
                dmaengine_terminate_all(chan);
                dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
+               ret = -ETIMEDOUT;
        }
 
 out:
@@ -441,7 +443,7 @@ out:
 
        dma_unmap_single(chan->device->dev, dma_addr, len, dir);
 
-       /* ret > 0 is success */
+       /* ret == 0 is success */
        return ret;
 }
 
@@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
 
        /* initiate DMA transfer */
        if (flctl->chan_fifo0_rx && rlen >= 32 &&
-               flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0)
+               !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE))
                        goto convert;   /* DMA success */
 
        /* do polling transfer */
@@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
 
        /* initiate DMA transfer */
        if (flctl->chan_fifo0_tx && rlen >= 32 &&
-               flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0)
+               !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE))
                        return; /* DMA success */
 
        /* do polling transfer */
index 15eddca7b4b6623ccb2b17d1d8f9a092ebd90ff5..38e152548126101b1c7a8f85014c55fed4d88de0 100644 (file)
@@ -4027,14 +4027,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v
        return true;
 }
 
-static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
+static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy)
 {
        hash ^= (__force u32)flow_get_u32_dst(flow) ^
                (__force u32)flow_get_u32_src(flow);
        hash ^= (hash >> 16);
        hash ^= (hash >> 8);
+
        /* discard lowest hash bit to deal with the common even ports pattern */
-       return hash >> 1;
+       if (xmit_policy == BOND_XMIT_POLICY_LAYER34 ||
+               xmit_policy == BOND_XMIT_POLICY_ENCAP34)
+               return hash >> 1;
+
+       return hash;
 }
 
 /* Generate hash based on xmit policy. If @skb is given it is used to linearize
@@ -4064,7 +4069,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi
                        memcpy(&hash, &flow.ports.ports, sizeof(hash));
        }
 
-       return bond_ip_hash(hash, &flow);
+       return bond_ip_hash(hash, &flow, bond->params.xmit_policy);
 }
 
 /**
@@ -5259,7 +5264,7 @@ static u32 bond_sk_hash_l34(struct sock *sk)
        /* L4 */
        memcpy(&hash, &flow.ports.ports, sizeof(hash));
        /* L3 */
-       return bond_ip_hash(hash, &flow);
+       return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34);
 }
 
 static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
index d0c5a7a60dafb9416109261bcfea9782223faef2..5215bd9b2c80d04bcdbcfd98d9b9aad164eb932f 100644 (file)
@@ -241,13 +241,14 @@ struct grcan_device_config {
                .rxsize         = GRCAN_DEFAULT_BUFFER_SIZE,    \
                }
 
-#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100
+#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100
 #define GRLIB_VERSION_MASK             0xffff
 
 /* GRCAN private data structure */
 struct grcan_priv {
        struct can_priv can;    /* must be the first member */
        struct net_device *dev;
+       struct device *ofdev_dev;
        struct napi_struct napi;
 
        struct grcan_registers __iomem *regs;   /* ioremap'ed registers */
@@ -921,7 +922,7 @@ static void grcan_free_dma_buffers(struct net_device *dev)
        struct grcan_priv *priv = netdev_priv(dev);
        struct grcan_dma *dma = &priv->dma;
 
-       dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf,
+       dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf,
                          dma->base_handle);
        memset(dma, 0, sizeof(*dma));
 }
@@ -946,7 +947,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev,
 
        /* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */
        dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT;
-       dma->base_buf = dma_alloc_coherent(&dev->dev,
+       dma->base_buf = dma_alloc_coherent(priv->ofdev_dev,
                                           dma->base_size,
                                           &dma->base_handle,
                                           GFP_KERNEL);
@@ -1102,8 +1103,10 @@ static int grcan_close(struct net_device *dev)
 
        priv->closing = true;
        if (priv->need_txbug_workaround) {
+               spin_unlock_irqrestore(&priv->lock, flags);
                del_timer_sync(&priv->hang_timer);
                del_timer_sync(&priv->rr_timer);
+               spin_lock_irqsave(&priv->lock, flags);
        }
        netif_stop_queue(dev);
        grcan_stop_hardware(dev);
@@ -1122,7 +1125,7 @@ static int grcan_close(struct net_device *dev)
        return 0;
 }
 
-static int grcan_transmit_catch_up(struct net_device *dev, int budget)
+static void grcan_transmit_catch_up(struct net_device *dev)
 {
        struct grcan_priv *priv = netdev_priv(dev);
        unsigned long flags;
@@ -1130,7 +1133,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       work_done = catch_up_echo_skb(dev, budget, true);
+       work_done = catch_up_echo_skb(dev, -1, true);
        if (work_done) {
                if (!priv->resetting && !priv->closing &&
                    !(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
@@ -1144,8 +1147,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
        }
 
        spin_unlock_irqrestore(&priv->lock, flags);
-
-       return work_done;
 }
 
 static int grcan_receive(struct net_device *dev, int budget)
@@ -1227,19 +1228,13 @@ static int grcan_poll(struct napi_struct *napi, int budget)
        struct net_device *dev = priv->dev;
        struct grcan_registers __iomem *regs = priv->regs;
        unsigned long flags;
-       int tx_work_done, rx_work_done;
-       int rx_budget = budget / 2;
-       int tx_budget = budget - rx_budget;
+       int work_done;
 
-       /* Half of the budget for receiving messages */
-       rx_work_done = grcan_receive(dev, rx_budget);
+       work_done = grcan_receive(dev, budget);
 
-       /* Half of the budget for transmitting messages as that can trigger echo
-        * frames being received
-        */
-       tx_work_done = grcan_transmit_catch_up(dev, tx_budget);
+       grcan_transmit_catch_up(dev);
 
-       if (rx_work_done < rx_budget && tx_work_done < tx_budget) {
+       if (work_done < budget) {
                napi_complete(napi);
 
                /* Guarantee no interference with a running reset that otherwise
@@ -1256,7 +1251,7 @@ static int grcan_poll(struct napi_struct *napi, int budget)
                spin_unlock_irqrestore(&priv->lock, flags);
        }
 
-       return rx_work_done + tx_work_done;
+       return work_done;
 }
 
 /* Work tx bug by waiting while for the risky situation to clear. If that fails,
@@ -1587,6 +1582,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev,
        memcpy(&priv->config, &grcan_module_config,
               sizeof(struct grcan_device_config));
        priv->dev = dev;
+       priv->ofdev_dev = &ofdev->dev;
        priv->regs = base;
        priv->can.bittiming_const = &grcan_bittiming_const;
        priv->can.do_set_bittiming = grcan_set_bittiming;
@@ -1639,6 +1635,7 @@ exit_free_candev:
 static int grcan_probe(struct platform_device *ofdev)
 {
        struct device_node *np = ofdev->dev.of_node;
+       struct device_node *sysid_parent;
        u32 sysid, ambafreq;
        int irq, err;
        void __iomem *base;
@@ -1647,10 +1644,15 @@ static int grcan_probe(struct platform_device *ofdev)
        /* Compare GRLIB version number with the first that does not
         * have the tx bug (see start_xmit)
         */
-       err = of_property_read_u32(np, "systemid", &sysid);
-       if (!err && ((sysid & GRLIB_VERSION_MASK)
-                    >= GRCAN_TXBUG_SAFE_GRLIB_VERSION))
-               txbug = false;
+       sysid_parent = of_find_node_by_path("/ambapp0");
+       if (sysid_parent) {
+               of_node_get(sysid_parent);
+               err = of_property_read_u32(sysid_parent, "systemid", &sysid);
+               if (!err && ((sysid & GRLIB_VERSION_MASK) >=
+                            GRCAN_TXBUG_SAFE_GRLIB_VERSION))
+                       txbug = false;
+               of_node_put(sysid_parent);
+       }
 
        err = of_property_read_u32(np, "freq", &ambafreq);
        if (err) {
index b3b5bc1c803b3c4fd6d0428b2372a64b82de85f0..088bb1bcf1efb6cd68867efdeeb6a37ad727be29 100644 (file)
@@ -1495,34 +1495,22 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
                err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
                if (err)
                        return err;
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_30X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_30X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
                break;
        case 31:
                /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
                err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
                if (err)
                        return err;
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
                break;
        case 32:
        case 33:
                /* Support both MCAN version v3.2.x and v3.3.0 */
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
 
                cdev->can.ctrlmode_supported |=
                        (m_can_niso_supported(cdev) ?
index 2c5d40997168616ca1ab85d1235560c654f2dfe7..d18b515e6ccc76c33660ce9fd0bed098ebf18559 100644 (file)
@@ -85,9 +85,6 @@ struct m_can_classdev {
        struct sk_buff *tx_skb;
        struct phy *transceiver;
 
-       const struct can_bittiming_const *bit_timing;
-       const struct can_bittiming_const *data_timing;
-
        struct m_can_ops *ops;
 
        int version;
index b56a54d6c5a9c4d274ecc76b637fbaa02ff5cf84..8f184a852a0a7c7476eaa1cf231ab1e4d46a5dc0 100644 (file)
 
 #define M_CAN_PCI_MMIO_BAR             0
 
+#define M_CAN_CLOCK_FREQ_EHL           200000000
 #define CTL_CSR_INT_CTL_OFFSET         0x508
 
-struct m_can_pci_config {
-       const struct can_bittiming_const *bit_timing;
-       const struct can_bittiming_const *data_timing;
-       unsigned int clock_freq;
-};
-
 struct m_can_pci_priv {
        struct m_can_classdev cdev;
 
@@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = {
        .read_fifo = iomap_read_fifo,
 };
 
-static const struct can_bittiming_const m_can_bittiming_const_ehl = {
-       .name = KBUILD_MODNAME,
-       .tseg1_min = 2,         /* Time segment 1 = prop_seg + phase_seg1 */
-       .tseg1_max = 64,
-       .tseg2_min = 1,         /* Time segment 2 = phase_seg2 */
-       .tseg2_max = 128,
-       .sjw_max = 128,
-       .brp_min = 1,
-       .brp_max = 512,
-       .brp_inc = 1,
-};
-
-static const struct can_bittiming_const m_can_data_bittiming_const_ehl = {
-       .name = KBUILD_MODNAME,
-       .tseg1_min = 2,         /* Time segment 1 = prop_seg + phase_seg1 */
-       .tseg1_max = 16,
-       .tseg2_min = 1,         /* Time segment 2 = phase_seg2 */
-       .tseg2_max = 8,
-       .sjw_max = 4,
-       .brp_min = 1,
-       .brp_max = 32,
-       .brp_inc = 1,
-};
-
-static const struct m_can_pci_config m_can_pci_ehl = {
-       .bit_timing = &m_can_bittiming_const_ehl,
-       .data_timing = &m_can_data_bittiming_const_ehl,
-       .clock_freq = 200000000,
-};
-
 static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
        struct device *dev = &pci->dev;
-       const struct m_can_pci_config *cfg;
        struct m_can_classdev *mcan_class;
        struct m_can_pci_priv *priv;
        void __iomem *base;
@@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
        if (!mcan_class)
                return -ENOMEM;
 
-       cfg = (const struct m_can_pci_config *)id->driver_data;
-
        priv = cdev_to_priv(mcan_class);
 
        priv->base = base;
@@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
        mcan_class->dev = &pci->dev;
        mcan_class->net->irq = pci_irq_vector(pci, 0);
        mcan_class->pm_clock_support = 1;
-       mcan_class->bit_timing = cfg->bit_timing;
-       mcan_class->data_timing = cfg->data_timing;
-       mcan_class->can.clock.freq = cfg->clock_freq;
+       mcan_class->can.clock.freq = id->driver_data;
        mcan_class->ops = &m_can_pci_ops;
 
        pci_set_drvdata(pci, mcan_class);
@@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops,
                         m_can_pci_suspend, m_can_pci_resume);
 
 static const struct pci_device_id m_can_pci_id_table[] = {
-       { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, },
-       { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, },
+       { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, },
+       { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, },
        {  }    /* Terminating Entry */
 };
 MODULE_DEVICE_TABLE(pci, m_can_pci_id_table);
index 77501f9c59159e4ad3870ba7b2cd9ef14108dc93..fbb32aa49b2412fcbfbb03c50cd1b7035df1fa07 100644 (file)
@@ -1354,46 +1354,25 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port,
        config->legacy_pre_march2020 = false;
 }
 
-int b53_phylink_mac_link_state(struct dsa_switch *ds, int port,
-                              struct phylink_link_state *state)
+static struct phylink_pcs *b53_phylink_mac_select_pcs(struct dsa_switch *ds,
+                                                     int port,
+                                                     phy_interface_t interface)
 {
        struct b53_device *dev = ds->priv;
-       int ret = -EOPNOTSUPP;
 
-       if ((phy_interface_mode_is_8023z(state->interface) ||
-            state->interface == PHY_INTERFACE_MODE_SGMII) &&
-            dev->ops->serdes_link_state)
-               ret = dev->ops->serdes_link_state(dev, port, state);
+       if (!dev->ops->phylink_mac_select_pcs)
+               return NULL;
 
-       return ret;
+       return dev->ops->phylink_mac_select_pcs(dev, port, interface);
 }
-EXPORT_SYMBOL(b53_phylink_mac_link_state);
 
 void b53_phylink_mac_config(struct dsa_switch *ds, int port,
                            unsigned int mode,
                            const struct phylink_link_state *state)
 {
-       struct b53_device *dev = ds->priv;
-
-       if (mode == MLO_AN_PHY || mode == MLO_AN_FIXED)
-               return;
-
-       if ((phy_interface_mode_is_8023z(state->interface) ||
-            state->interface == PHY_INTERFACE_MODE_SGMII) &&
-            dev->ops->serdes_config)
-               dev->ops->serdes_config(dev, port, mode, state);
 }
 EXPORT_SYMBOL(b53_phylink_mac_config);
 
-void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port)
-{
-       struct b53_device *dev = ds->priv;
-
-       if (dev->ops->serdes_an_restart)
-               dev->ops->serdes_an_restart(dev, port);
-}
-EXPORT_SYMBOL(b53_phylink_mac_an_restart);
-
 void b53_phylink_mac_link_down(struct dsa_switch *ds, int port,
                               unsigned int mode,
                               phy_interface_t interface)
@@ -2269,9 +2248,8 @@ static const struct dsa_switch_ops b53_switch_ops = {
        .phy_write              = b53_phy_write16,
        .adjust_link            = b53_adjust_link,
        .phylink_get_caps       = b53_phylink_get_caps,
-       .phylink_mac_link_state = b53_phylink_mac_link_state,
+       .phylink_mac_select_pcs = b53_phylink_mac_select_pcs,
        .phylink_mac_config     = b53_phylink_mac_config,
-       .phylink_mac_an_restart = b53_phylink_mac_an_restart,
        .phylink_mac_link_down  = b53_phylink_mac_link_down,
        .phylink_mac_link_up    = b53_phylink_mac_link_up,
        .port_enable            = b53_enable_port,
index 3085b6cc7d407e935b98a262d65d04853b9f591f..795cbffd5c2be43a468c46371fe8c2aa161ef507 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/etherdevice.h>
 #include <net/dsa.h>
 
@@ -29,7 +29,6 @@
 
 struct b53_device;
 struct net_device;
-struct phylink_link_state;
 
 struct b53_io_ops {
        int (*read8)(struct b53_device *dev, u8 page, u8 reg, u8 *value);
@@ -48,13 +47,10 @@ struct b53_io_ops {
        void (*irq_disable)(struct b53_device *dev, int port);
        void (*phylink_get_caps)(struct b53_device *dev, int port,
                                 struct phylink_config *config);
+       struct phylink_pcs *(*phylink_mac_select_pcs)(struct b53_device *dev,
+                                                     int port,
+                                                     phy_interface_t interface);
        u8 (*serdes_map_lane)(struct b53_device *dev, int port);
-       int (*serdes_link_state)(struct b53_device *dev, int port,
-                                struct phylink_link_state *state);
-       void (*serdes_config)(struct b53_device *dev, int port,
-                             unsigned int mode,
-                             const struct phylink_link_state *state);
-       void (*serdes_an_restart)(struct b53_device *dev, int port);
        void (*serdes_link_set)(struct b53_device *dev, int port,
                                unsigned int mode, phy_interface_t interface,
                                bool link_up);
@@ -85,8 +81,15 @@ enum {
        BCM7278_DEVICE_ID = 0x7278,
 };
 
+struct b53_pcs {
+       struct phylink_pcs pcs;
+       struct b53_device *dev;
+       u8 lane;
+};
+
 #define B53_N_PORTS    9
 #define B53_N_PORTS_25 6
+#define B53_N_PCS      2
 
 struct b53_port {
        u16             vlan_ctl_mask;
@@ -143,6 +146,8 @@ struct b53_device {
        bool vlan_enabled;
        unsigned int num_ports;
        struct b53_port *ports;
+
+       struct b53_pcs pcs[B53_N_PCS];
 };
 
 #define b53_for_each_port(dev, i) \
@@ -336,12 +341,9 @@ int b53_br_flags(struct dsa_switch *ds, int port,
                 struct netlink_ext_ack *extack);
 int b53_setup_devlink_resources(struct dsa_switch *ds);
 void b53_port_event(struct dsa_switch *ds, int port);
-int b53_phylink_mac_link_state(struct dsa_switch *ds, int port,
-                              struct phylink_link_state *state);
 void b53_phylink_mac_config(struct dsa_switch *ds, int port,
                            unsigned int mode,
                            const struct phylink_link_state *state);
-void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port);
 void b53_phylink_mac_link_down(struct dsa_switch *ds, int port,
                               unsigned int mode,
                               phy_interface_t interface);
index 555e5b3723215d5d69ef9a975f699b908250ad3f..0690210770ffec1d26a4366236c6d1bf50636e1e 100644 (file)
 #include "b53_serdes.h"
 #include "b53_regs.h"
 
+static inline struct b53_pcs *pcs_to_b53_pcs(struct phylink_pcs *pcs)
+{
+       return container_of(pcs, struct b53_pcs, pcs);
+}
+
 static void b53_serdes_write_blk(struct b53_device *dev, u8 offset, u16 block,
                                 u16 value)
 {
@@ -60,51 +65,47 @@ static u16 b53_serdes_read(struct b53_device *dev, u8 lane,
        return b53_serdes_read_blk(dev, offset, block);
 }
 
-void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode,
-                      const struct phylink_link_state *state)
+static int b53_serdes_config(struct phylink_pcs *pcs, unsigned int mode,
+                            phy_interface_t interface,
+                            const unsigned long *advertising,
+                            bool permit_pause_to_mac)
 {
-       u8 lane = b53_serdes_map_lane(dev, port);
+       struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+       u8 lane = pcs_to_b53_pcs(pcs)->lane;
        u16 reg;
 
-       if (lane == B53_INVALID_LANE)
-               return;
-
        reg = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_CONTROL(1),
                              SERDES_DIGITAL_BLK);
-       if (state->interface == PHY_INTERFACE_MODE_1000BASEX)
+       if (interface == PHY_INTERFACE_MODE_1000BASEX)
                reg |= FIBER_MODE_1000X;
        else
                reg &= ~FIBER_MODE_1000X;
        b53_serdes_write(dev, lane, B53_SERDES_DIGITAL_CONTROL(1),
                         SERDES_DIGITAL_BLK, reg);
+
+       return 0;
 }
-EXPORT_SYMBOL(b53_serdes_config);
 
-void b53_serdes_an_restart(struct b53_device *dev, int port)
+static void b53_serdes_an_restart(struct phylink_pcs *pcs)
 {
-       u8 lane = b53_serdes_map_lane(dev, port);
+       struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+       u8 lane = pcs_to_b53_pcs(pcs)->lane;
        u16 reg;
 
-       if (lane == B53_INVALID_LANE)
-               return;
-
        reg = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMCR),
                              SERDES_MII_BLK);
        reg |= BMCR_ANRESTART;
        b53_serdes_write(dev, lane, B53_SERDES_MII_REG(MII_BMCR),
                         SERDES_MII_BLK, reg);
 }
-EXPORT_SYMBOL(b53_serdes_an_restart);
 
-int b53_serdes_link_state(struct b53_device *dev, int port,
-                         struct phylink_link_state *state)
+static void b53_serdes_get_state(struct phylink_pcs *pcs,
+                                 struct phylink_link_state *state)
 {
-       u8 lane = b53_serdes_map_lane(dev, port);
+       struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+       u8 lane = pcs_to_b53_pcs(pcs)->lane;
        u16 dig, bmsr;
 
-       if (lane == B53_INVALID_LANE)
-               return 1;
-
        dig = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_STATUS,
                              SERDES_DIGITAL_BLK);
        bmsr = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMSR),
@@ -133,10 +134,7 @@ int b53_serdes_link_state(struct b53_device *dev, int port,
                state->pause |= MLO_PAUSE_RX;
        if (dig & PAUSE_RESOLUTION_TX_SIDE)
                state->pause |= MLO_PAUSE_TX;
-
-       return 0;
 }
-EXPORT_SYMBOL(b53_serdes_link_state);
 
 void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
                         phy_interface_t interface, bool link_up)
@@ -158,6 +156,12 @@ void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
 }
 EXPORT_SYMBOL(b53_serdes_link_set);
 
+static const struct phylink_pcs_ops b53_pcs_ops = {
+       .pcs_get_state = b53_serdes_get_state,
+       .pcs_config = b53_serdes_config,
+       .pcs_an_restart = b53_serdes_an_restart,
+};
+
 void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
                                 struct phylink_config *config)
 {
@@ -187,9 +191,28 @@ void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
 }
 EXPORT_SYMBOL(b53_serdes_phylink_get_caps);
 
+struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev,
+                                                     int port,
+                                                     phy_interface_t interface)
+{
+       u8 lane = b53_serdes_map_lane(dev, port);
+
+       if (lane == B53_INVALID_LANE || lane >= B53_N_PCS ||
+           !dev->pcs[lane].dev)
+               return NULL;
+
+       if (!phy_interface_mode_is_8023z(interface) &&
+           interface != PHY_INTERFACE_MODE_SGMII)
+               return NULL;
+
+       return &dev->pcs[lane].pcs;
+}
+EXPORT_SYMBOL(b53_serdes_phylink_mac_select_pcs);
+
 int b53_serdes_init(struct b53_device *dev, int port)
 {
        u8 lane = b53_serdes_map_lane(dev, port);
+       struct b53_pcs *pcs;
        u16 id0, msb, lsb;
 
        if (lane == B53_INVALID_LANE)
@@ -212,6 +235,11 @@ int b53_serdes_init(struct b53_device *dev, int port)
                 (id0 >> SERDES_ID0_REV_NUM_SHIFT) & SERDES_ID0_REV_NUM_MASK,
                 (u32)msb << 16 | lsb);
 
+       pcs = &dev->pcs[lane];
+       pcs->dev = dev;
+       pcs->lane = lane;
+       pcs->pcs.ops = &b53_pcs_ops;
+
        return 0;
 }
 EXPORT_SYMBOL(b53_serdes_init);
index f47d5caa75576a8d2f10328d477668fdf2a34411..ef81f5da5f81fec0007b3b2987fa6c2786844043 100644 (file)
@@ -107,14 +107,11 @@ static inline u8 b53_serdes_map_lane(struct b53_device *dev, int port)
        return dev->ops->serdes_map_lane(dev, port);
 }
 
-int b53_serdes_get_link(struct b53_device *dev, int port);
-int b53_serdes_link_state(struct b53_device *dev, int port,
-                         struct phylink_link_state *state);
-void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode,
-                      const struct phylink_link_state *state);
-void b53_serdes_an_restart(struct b53_device *dev, int port);
 void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
                         phy_interface_t interface, bool link_up);
+struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev,
+                                                     int port,
+                                                     phy_interface_t interface);
 void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
                                 struct phylink_config *config);
 #if IS_ENABLED(CONFIG_B53_SERDES)
index c51b716657db3b75cdd8f31cec76cc5bfd20a761..da0b889880f6af57dd4ed4189e06efb68ee003b5 100644 (file)
@@ -491,10 +491,8 @@ static const struct b53_io_ops b53_srab_ops = {
        .irq_disable = b53_srab_irq_disable,
        .phylink_get_caps = b53_srab_phylink_get_caps,
 #if IS_ENABLED(CONFIG_B53_SERDES)
+       .phylink_mac_select_pcs = b53_serdes_phylink_mac_select_pcs,
        .serdes_map_lane = b53_srab_serdes_map_lane,
-       .serdes_link_state = b53_serdes_link_state,
-       .serdes_config = b53_serdes_config,
-       .serdes_an_restart = b53_serdes_an_restart,
        .serdes_link_set = b53_serdes_link_set,
 #endif
 };
index cf82b1fa972529494eaaa525140eb85e766d983a..87e81c636339f9720260807521f917ff57f74fb9 100644 (file)
@@ -809,6 +809,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 reg, offset;
 
+       if (priv->wol_ports_mask & BIT(port))
+               return;
+
        if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
                if (priv->type == BCM4908_DEVICE_ID ||
                    priv->type == BCM7445_DEVICE_ID)
index a416240d001b71a554c1e1636ca764fa5cff1617..12c15da55664b9de63e2bef6e46477700289f0da 100644 (file)
@@ -1681,9 +1681,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
                break;
        case PHY_INTERFACE_MODE_RMII:
                miicfg |= GSWIP_MII_CFG_MODE_RMIIM;
-
-               /* Configure the RMII clock as output: */
-               miicfg |= GSWIP_MII_CFG_RMII_CLK;
                break;
        case PHY_INTERFACE_MODE_RGMII:
        case PHY_INTERFACE_MODE_RGMII_ID:
index 8222c8a6c5ec56969a7edd951a46215d0e0220f8..7310d19d1f06473b0157e76d43cf89b6c497eab0 100644 (file)
@@ -1021,14 +1021,32 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
                                   bool ingress, struct netlink_ext_ack *extack)
 {
        struct ksz_device *dev = ds->priv;
+       u8 data;
+       int p;
+
+       /* Limit to one sniffer port
+        * Check if any of the port is already set for sniffing
+        * If yes, instruct the user to remove the previous entry & exit
+        */
+       for (p = 0; p < dev->port_cnt; p++) {
+               /* Skip the current sniffing port */
+               if (p == mirror->to_local_port)
+                       continue;
+
+               ksz_pread8(dev, p, P_MIRROR_CTRL, &data);
+
+               if (data & PORT_MIRROR_SNIFFER) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Sniffer port is already configured, delete existing rules & retry");
+                       return -EBUSY;
+               }
+       }
 
        if (ingress)
                ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
        else
                ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true);
 
-       ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false);
-
        /* configure mirror port */
        ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
                     PORT_MIRROR_SNIFFER, true);
@@ -1042,16 +1060,28 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
                                    struct dsa_mall_mirror_tc_entry *mirror)
 {
        struct ksz_device *dev = ds->priv;
+       bool in_use = false;
        u8 data;
+       int p;
 
        if (mirror->ingress)
                ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false);
        else
                ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false);
 
-       ksz_pread8(dev, port, P_MIRROR_CTRL, &data);
 
-       if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX)))
+       /* Check if any of the port is still referring to sniffer port */
+       for (p = 0; p < dev->port_cnt; p++) {
+               ksz_pread8(dev, p, P_MIRROR_CTRL, &data);
+
+               if ((data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) {
+                       in_use = true;
+                       break;
+               }
+       }
+
+       /* delete sniffing if there are no other mirroring rules */
+       if (!in_use)
                ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
                             PORT_MIRROR_SNIFFER, false);
 }
index 19f0035d4410ff109021680cc32d44af25d2aad3..fe3cb26f4287ee039ba66f1054d41df7790b742a 100644 (file)
@@ -2229,6 +2229,7 @@ mt7530_setup(struct dsa_switch *ds)
                                ret = of_get_phy_mode(mac_np, &interface);
                                if (ret && ret != -ENODEV) {
                                        of_node_put(mac_np);
+                                       of_node_put(phy_node);
                                        return ret;
                                }
                                id = of_mdio_parse_addr(ds->dev, phy_node);
index b49d05f0e11795718f2f4daf7810f54d218a5c0d..7a9f9ff6dedf38ca56d94159a42be1d20e99a2bf 100644 (file)
@@ -40,8 +40,9 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip)
 {
        int bit = __bf_shf(MV88E6XXX_PORT_RESERVED_1A_BUSY);
 
-       return mv88e6xxx_wait_bit(chip, MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
-                                 MV88E6XXX_PORT_RESERVED_1A, bit, 0);
+       return mv88e6xxx_port_wait_bit(chip,
+                                      MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
+                                      MV88E6XXX_PORT_RESERVED_1A, bit, 0);
 }
 
 int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port,
index 413b0006e9a211fd8cf37f76f966c67545b47224..faccfb3f015836e76ba787e68a557eff3d22c834 100644 (file)
@@ -403,6 +403,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
 {
        struct ocelot *ocelot = ds->priv;
        struct felix *felix = ocelot_to_felix(ocelot);
+       struct ocelot_vcap_block *block_vcap_is2;
        struct ocelot_vcap_filter *trap;
        enum ocelot_mask_mode mask_mode;
        unsigned long port_mask;
@@ -422,9 +423,13 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
        /* We are sure that "cpu" was found, otherwise
         * dsa_tree_setup_default_cpu() would have failed earlier.
         */
+       block_vcap_is2 = &ocelot->block[VCAP_IS2];
 
        /* Make sure all traps are set up for that destination */
-       list_for_each_entry(trap, &ocelot->traps, trap_list) {
+       list_for_each_entry(trap, &block_vcap_is2->rules, list) {
+               if (!trap->is_trap)
+                       continue;
+
                /* Figure out the current trapping destination */
                if (using_tag_8021q) {
                        /* Redirect to the tag_8021q CPU port. If timestamps
@@ -670,6 +675,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu,
        struct ocelot *ocelot = ds->priv;
        struct felix *felix = ocelot_to_felix(ocelot);
        enum dsa_tag_protocol old_proto = felix->tag_proto;
+       bool cpu_port_active = false;
+       struct dsa_port *dp;
        int err;
 
        if (proto != DSA_TAG_PROTO_SEVILLE &&
@@ -677,6 +684,27 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu,
            proto != DSA_TAG_PROTO_OCELOT_8021Q)
                return -EPROTONOSUPPORT;
 
+       /* We don't support multiple CPU ports, yet the DT blob may have
+        * multiple CPU ports defined. The first CPU port is the active one,
+        * the others are inactive. In this case, DSA will call
+        * ->change_tag_protocol() multiple times, once per CPU port.
+        * Since we implement the tagging protocol change towards "ocelot" or
+        * "seville" as effectively initializing the NPI port, what we are
+        * doing is effectively changing who the NPI port is to the last @cpu
+        * argument passed, which is an unused DSA CPU port and not the one
+        * that should actively pass traffic.
+        * Suppress DSA's calls on CPU ports that are inactive.
+        */
+       dsa_switch_for_each_user_port(dp, ds) {
+               if (dp->cpu_dp->index == cpu) {
+                       cpu_port_active = true;
+                       break;
+               }
+       }
+
+       if (!cpu_port_active)
+               return 0;
+
        felix_del_tag_protocol(ds, cpu, old_proto);
 
        err = felix_set_tag_protocol(ds, cpu, proto);
index 8d382b27e625737a9e2f75d1805755bef5319b71..52a8566071eddd8e63d79ce20febf879537a88b9 100644 (file)
@@ -2316,7 +2316,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
        err = dsa_register_switch(ds);
        if (err) {
-               dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err);
+               dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n");
                goto err_register_ds;
        }
 
index 1aa79735355f13030328377f0f92369bff70f2e4..060165a85fb7d74015100b38975aea84afd60fe4 100644 (file)
@@ -9,34 +9,46 @@ menuconfig NET_DSA_REALTEK
        help
          Select to enable support for Realtek Ethernet switch chips.
 
+         Note that at least one interface driver must be enabled for the
+         subdrivers to be loaded. Moreover, an interface driver cannot achieve
+         anything without at least one subdriver enabled.
+
+if NET_DSA_REALTEK
+
 config NET_DSA_REALTEK_MDIO
-       tristate "Realtek MDIO connected switch driver"
-       depends on NET_DSA_REALTEK
+       tristate "Realtek MDIO interface driver"
        depends on OF
+       depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB
+       depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB
+       depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB
        help
          Select to enable support for registering switches configured
          through MDIO.
 
 config NET_DSA_REALTEK_SMI
-       tristate "Realtek SMI connected switch driver"
-       depends on NET_DSA_REALTEK
+       tristate "Realtek SMI interface driver"
        depends on OF
+       depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB
+       depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB
+       depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB
        help
          Select to enable support for registering switches connected
          through SMI.
 
 config NET_DSA_REALTEK_RTL8365MB
        tristate "Realtek RTL8365MB switch subdriver"
-       depends on NET_DSA_REALTEK
-       depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+       imply NET_DSA_REALTEK_SMI
+       imply NET_DSA_REALTEK_MDIO
        select NET_DSA_TAG_RTL8_4
        help
          Select to enable support for Realtek RTL8365MB-VC and RTL8367S.
 
 config NET_DSA_REALTEK_RTL8366RB
        tristate "Realtek RTL8366RB switch subdriver"
-       depends on NET_DSA_REALTEK
-       depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO
+       imply NET_DSA_REALTEK_SMI
+       imply NET_DSA_REALTEK_MDIO
        select NET_DSA_TAG_RTL4_A
        help
-         Select to enable support for Realtek RTL8366RB
+         Select to enable support for Realtek RTL8366RB.
+
+endif
index 31e1f100e48e98887ca2c4d2f333e014ec73829f..c58f49d558d2415ab69c0a77b43c10a15b57eca9 100644 (file)
@@ -267,7 +267,6 @@ static const struct of_device_id realtek_mdio_of_match[] = {
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
        { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
-       { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, },
 #endif
        { /* sentinel */ },
 };
index 2243d3da55b29c002443f38e2f38b8111a3cb74e..45992f79ec8d48da869218970ee96c4bf9546be7 100644 (file)
@@ -546,20 +546,11 @@ static const struct of_device_id realtek_smi_of_match[] = {
                .data = &rtl8366rb_variant,
        },
 #endif
-       {
-               /* FIXME: add support for RTL8366S and more */
-               .compatible = "realtek,rtl8366s",
-               .data = NULL,
-       },
 #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
        {
                .compatible = "realtek,rtl8365mb",
                .data = &rtl8365mb_variant,
        },
-       {
-               .compatible = "realtek,rtl8367s",
-               .data = &rtl8365mb_variant,
-       },
 #endif
        { /* sentinel */ },
 };
index bd4cb9d7c35d45eda668f9fb32c8770ab12f1923..827993022386c785dcc0bfbdc3b06bf4e27a69fa 100644 (file)
@@ -35,15 +35,6 @@ source "drivers/net/ethernet/aquantia/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/asix/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
-source "drivers/net/ethernet/broadcom/Kconfig"
-source "drivers/net/ethernet/brocade/Kconfig"
-source "drivers/net/ethernet/cadence/Kconfig"
-source "drivers/net/ethernet/calxeda/Kconfig"
-source "drivers/net/ethernet/cavium/Kconfig"
-source "drivers/net/ethernet/chelsio/Kconfig"
-source "drivers/net/ethernet/cirrus/Kconfig"
-source "drivers/net/ethernet/cisco/Kconfig"
-source "drivers/net/ethernet/cortina/Kconfig"
 
 config CX_ECAT
        tristate "Beckhoff CX5020 EtherCAT master support"
@@ -57,6 +48,14 @@ config CX_ECAT
          To compile this driver as a module, choose M here. The module
          will be called ec_bhf.
 
+source "drivers/net/ethernet/broadcom/Kconfig"
+source "drivers/net/ethernet/cadence/Kconfig"
+source "drivers/net/ethernet/calxeda/Kconfig"
+source "drivers/net/ethernet/cavium/Kconfig"
+source "drivers/net/ethernet/chelsio/Kconfig"
+source "drivers/net/ethernet/cirrus/Kconfig"
+source "drivers/net/ethernet/cisco/Kconfig"
+source "drivers/net/ethernet/cortina/Kconfig"
 source "drivers/net/ethernet/davicom/Kconfig"
 
 config DNET
@@ -85,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/microsoft/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
 
 config JME
@@ -128,8 +126,9 @@ source "drivers/net/ethernet/mediatek/Kconfig"
 source "drivers/net/ethernet/mellanox/Kconfig"
 source "drivers/net/ethernet/micrel/Kconfig"
 source "drivers/net/ethernet/microchip/Kconfig"
-source "drivers/net/ethernet/moxa/Kconfig"
 source "drivers/net/ethernet/mscc/Kconfig"
+source "drivers/net/ethernet/microsoft/Kconfig"
+source "drivers/net/ethernet/moxa/Kconfig"
 source "drivers/net/ethernet/myricom/Kconfig"
 
 config FEALNX
@@ -141,10 +140,10 @@ config FEALNX
          Say Y here to support the Myson MTD-800 family of PCI-based Ethernet
          cards. <http://www.myson.com.tw/>
 
+source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/natsemi/Kconfig"
 source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/netronome/Kconfig"
-source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
 source "drivers/net/ethernet/nvidia/Kconfig"
 source "drivers/net/ethernet/nxp/Kconfig"
@@ -164,6 +163,7 @@ source "drivers/net/ethernet/packetengines/Kconfig"
 source "drivers/net/ethernet/pasemi/Kconfig"
 source "drivers/net/ethernet/pensando/Kconfig"
 source "drivers/net/ethernet/qlogic/Kconfig"
+source "drivers/net/ethernet/brocade/Kconfig"
 source "drivers/net/ethernet/qualcomm/Kconfig"
 source "drivers/net/ethernet/rdc/Kconfig"
 source "drivers/net/ethernet/realtek/Kconfig"
@@ -171,10 +171,10 @@ source "drivers/net/ethernet/renesas/Kconfig"
 source "drivers/net/ethernet/rocker/Kconfig"
 source "drivers/net/ethernet/samsung/Kconfig"
 source "drivers/net/ethernet/seeq/Kconfig"
-source "drivers/net/ethernet/sfc/Kconfig"
 source "drivers/net/ethernet/sgi/Kconfig"
 source "drivers/net/ethernet/silan/Kconfig"
 source "drivers/net/ethernet/sis/Kconfig"
+source "drivers/net/ethernet/sfc/Kconfig"
 source "drivers/net/ethernet/smsc/Kconfig"
 source "drivers/net/ethernet/socionext/Kconfig"
 source "drivers/net/ethernet/stmicro/Kconfig"
index 33f1a1377588bda47db7a76c94421a3f84d3ac4f..24d715c28a355793e6fcb0b8c94877c887b2d1ca 100644 (file)
@@ -486,8 +486,8 @@ int aq_nic_start(struct aq_nic_s *self)
        if (err < 0)
                goto err_exit;
 
-       for (i = 0U, aq_vec = self->aq_vec[0];
-               self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
+       for (i = 0U; self->aq_vecs > i; ++i) {
+               aq_vec = self->aq_vec[i];
                err = aq_vec_start(aq_vec);
                if (err < 0)
                        goto err_exit;
@@ -517,8 +517,8 @@ int aq_nic_start(struct aq_nic_s *self)
                mod_timer(&self->polling_timer, jiffies +
                          AQ_CFG_POLLING_TIMER_INTERVAL);
        } else {
-               for (i = 0U, aq_vec = self->aq_vec[0];
-                       self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
+               for (i = 0U; self->aq_vecs > i; ++i) {
+                       aq_vec = self->aq_vec[i];
                        err = aq_pci_func_alloc_irq(self, i, self->ndev->name,
                                                    aq_vec_isr, aq_vec,
                                                    aq_vec_get_affinity_mask(aq_vec));
index 797a95142d1f44dbc0ed454e2daf955ec8fc90ea..831833911a52562a5bbb4f14020b9ad2d5c8f1ea 100644 (file)
@@ -444,7 +444,7 @@ err_exit:
 
 static int aq_pm_freeze(struct device *dev)
 {
-       return aq_suspend_common(dev, false);
+       return aq_suspend_common(dev, true);
 }
 
 static int aq_pm_suspend_poweroff(struct device *dev)
@@ -454,7 +454,7 @@ static int aq_pm_suspend_poweroff(struct device *dev)
 
 static int aq_pm_thaw(struct device *dev)
 {
-       return atl_resume_common(dev, false);
+       return atl_resume_common(dev, true);
 }
 
 static int aq_pm_resume_restore(struct device *dev)
index 77e76c9efd32f70fa509ad9f2f4a3f7fe8acc1b0..8201ce7adb7777eea0390f615fe7b26b7ed06c3c 100644 (file)
@@ -346,7 +346,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                     int budget)
 {
        struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
-       bool is_rsc_completed = true;
        int err = 0;
 
        for (; (self->sw_head != self->hw_head) && budget;
@@ -364,12 +363,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                        continue;
 
                if (!buff->is_eop) {
+                       unsigned int frag_cnt = 0U;
                        buff_ = buff;
                        do {
+                               bool is_rsc_completed = true;
+
                                if (buff_->next >= self->size) {
                                        err = -EIO;
                                        goto err_exit;
                                }
+
+                               frag_cnt++;
                                next_ = buff_->next,
                                buff_ = &self->buff_ring[next_];
                                is_rsc_completed =
@@ -377,18 +381,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                                            next_,
                                                            self->hw_head);
 
-                               if (unlikely(!is_rsc_completed))
-                                       break;
+                               if (unlikely(!is_rsc_completed) ||
+                                               frag_cnt > MAX_SKB_FRAGS) {
+                                       err = 0;
+                                       goto err_exit;
+                               }
 
                                buff->is_error |= buff_->is_error;
                                buff->is_cso_err |= buff_->is_cso_err;
 
                        } while (!buff_->is_eop);
 
-                       if (!is_rsc_completed) {
-                               err = 0;
-                               goto err_exit;
-                       }
                        if (buff->is_error ||
                            (buff->is_lro && buff->is_cso_err)) {
                                buff_ = buff;
@@ -446,7 +449,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                       ALIGN(hdr_len, sizeof(long)));
 
                if (buff->len - hdr_len > 0) {
-                       skb_add_rx_frag(skb, 0, buff->rxdata.page,
+                       skb_add_rx_frag(skb, i++, buff->rxdata.page,
                                        buff->rxdata.pg_off + hdr_len,
                                        buff->len - hdr_len,
                                        AQ_CFG_RX_FRAME_MAX);
@@ -455,7 +458,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 
                if (!buff->is_eop) {
                        buff_ = buff;
-                       i = 1U;
                        do {
                                next_ = buff_->next;
                                buff_ = &self->buff_ring[next_];
index f4774cf051c9780cfc434450673bb82d175e2736..6ab1f3212d2463a53ac35fc41f1aa3706d376066 100644 (file)
@@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
        if (!self) {
                err = -EINVAL;
        } else {
-               for (i = 0U, ring = self->ring[0];
-                       self->tx_rings > i; ++i, ring = self->ring[i]) {
+               for (i = 0U; self->tx_rings > i; ++i) {
+                       ring = self->ring[i];
                        u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
                        ring[AQ_VEC_RX_ID].stats.rx.polls++;
                        u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
@@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops,
        self->aq_hw_ops = aq_hw_ops;
        self->aq_hw = aq_hw;
 
-       for (i = 0U, ring = self->ring[0];
-               self->tx_rings > i; ++i, ring = self->ring[i]) {
+       for (i = 0U; self->tx_rings > i; ++i) {
+               ring = self->ring[i];
                err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX);
                if (err < 0)
                        goto err_exit;
@@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self)
        unsigned int i = 0U;
        int err = 0;
 
-       for (i = 0U, ring = self->ring[0];
-               self->tx_rings > i; ++i, ring = self->ring[i]) {
+       for (i = 0U; self->tx_rings > i; ++i) {
+               ring = self->ring[i];
                err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw,
                                                        &ring[AQ_VEC_TX_ID]);
                if (err < 0)
@@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self)
        struct aq_ring_s *ring = NULL;
        unsigned int i = 0U;
 
-       for (i = 0U, ring = self->ring[0];
-               self->tx_rings > i; ++i, ring = self->ring[i]) {
+       for (i = 0U; self->tx_rings > i; ++i) {
+               ring = self->ring[i];
                self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw,
                                                 &ring[AQ_VEC_TX_ID]);
 
@@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self)
        if (!self)
                goto err_exit;
 
-       for (i = 0U, ring = self->ring[0];
-               self->tx_rings > i; ++i, ring = self->ring[i]) {
+       for (i = 0U; self->tx_rings > i; ++i) {
+               ring = self->ring[i];
                aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
                aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]);
        }
@@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self)
        if (!self)
                goto err_exit;
 
-       for (i = 0U, ring = self->ring[0];
-               self->tx_rings > i; ++i, ring = self->ring[i]) {
+       for (i = 0U; self->tx_rings > i; ++i) {
+               ring = self->ring[i];
                aq_ring_free(&ring[AQ_VEC_TX_ID]);
                if (i < self->rx_rings)
                        aq_ring_free(&ring[AQ_VEC_RX_ID]);
index d875ce3ec759bbd0e557ce97f6c7f9cef96af8b4..15ede7285fb5d1140cd0afec5fcfbd85d25ef965 100644 (file)
@@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
                err = -ENXIO;
                goto err_exit;
        }
+
+       /* Validate that the new hw_head_ is reasonable. */
+       if (hw_head_ >= ring->size) {
+               err = -ENXIO;
+               goto err_exit;
+       }
+
        ring->hw_head = hw_head_;
        err = aq_hw_err_from_flags(self);
 
index 60dde29974bfea53f8092fda078f348d36c6a6ef..df51be3cbe06906550c627c3265657fc1828a92a 100644 (file)
@@ -2585,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
                device_set_wakeup_capable(&pdev->dev, 1);
 
        priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
-       if (IS_ERR(priv->wol_clk))
-               return PTR_ERR(priv->wol_clk);
+       if (IS_ERR(priv->wol_clk)) {
+               ret = PTR_ERR(priv->wol_clk);
+               goto err_deregister_fixed_link;
+       }
 
        /* Set the needed headroom once and for all */
        BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
index c19b072f3a2375c8bf2bfccd856172644b0da7ff..962253db25b820ee70fd8578cc90488a6beb7513 100644 (file)
@@ -14153,10 +14153,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
 
        /* Stop Tx */
        bnx2x_tx_disable(bp);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-       if (CNIC_LOADED(bp))
-               bnx2x_del_all_napi_cnic(bp);
        netdev_reset_tc(bp->dev);
 
        del_timer_sync(&bp->timer);
@@ -14261,6 +14257,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                bnx2x_drain_tx_queues(bp);
                bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
                bnx2x_netif_stop(bp, 1);
+               bnx2x_del_all_napi(bp);
+
+               if (CNIC_LOADED(bp))
+                       bnx2x_del_all_napi_cnic(bp);
+
                bnx2x_free_irq(bp);
 
                /* Report UNLOAD_DONE to MCP */
index 1c28495875cfcaf1ff8968ca60f460cdf1fd2f8a..1d69fe0737a1c25d2ca5872b58c908454770866c 100644 (file)
@@ -2707,6 +2707,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
                        u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
                        struct bnxt_cp_ring_info *cpr2;
 
+                       /* No more budget for RX work */
+                       if (budget && work_done >= budget && idx == BNXT_RX_HDL)
+                               break;
+
                        cpr2 = cpr->cp_ring_arr[idx];
                        work_done += __bnxt_poll_work(bp, cpr2,
                                                      budget - work_done);
@@ -3253,6 +3257,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
                }
                qidx = bp->tc_to_qidx[j];
                ring->queue_id = bp->q_info[qidx].queue_id;
+               spin_lock_init(&txr->xdp_tx_lock);
                if (i < bp->tx_nr_rings_xdp)
                        continue;
                if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -10338,6 +10343,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        if (irq_re_init)
                udp_tunnel_nic_reset_ntf(bp->dev);
 
+       if (bp->tx_nr_rings_xdp < num_possible_cpus()) {
+               if (!static_key_enabled(&bnxt_xdp_locking_key))
+                       static_branch_enable(&bnxt_xdp_locking_key);
+       } else if (static_key_enabled(&bnxt_xdp_locking_key)) {
+               static_branch_disable(&bnxt_xdp_locking_key);
+       }
        set_bit(BNXT_STATE_OPEN, &bp->state);
        bnxt_enable_int(bp);
        /* Enable TX queues */
@@ -10976,7 +10987,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 
        if (bp->flags & BNXT_FLAG_CHIP_P5)
                return bnxt_rfs_supported(bp);
-       if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
+       if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
                return false;
 
        vnics = 1 + bp->rx_nr_rings;
@@ -13227,10 +13238,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
                goto init_dflt_ring_err;
 
        bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
-       if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
-               bp->flags |= BNXT_FLAG_RFS;
-               bp->dev->features |= NETIF_F_NTUPLE;
-       }
+
+       bnxt_set_dflt_rfs(bp);
+
 init_dflt_ring_err:
        bnxt_ulp_irq_restart(bp, rc);
        return rc;
index 61aa3e8c59527469df1c2f72544e5d122e1b24a3..98453a78cbd0408ce994677d31da782e0d49307c 100644 (file)
@@ -593,7 +593,8 @@ struct nqe_cn {
 #define BNXT_MAX_MTU           9500
 #define BNXT_MAX_PAGE_MODE_MTU \
        ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN -       \
-        XDP_PACKET_HEADROOM)
+        XDP_PACKET_HEADROOM - \
+        SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
 
 #define BNXT_MIN_PKT_SIZE      52
 
@@ -800,6 +801,8 @@ struct bnxt_tx_ring_info {
        u32                     dev_state;
 
        struct bnxt_ring_struct tx_ring_struct;
+       /* Synchronize simultaneous xdp_xmit on same ring */
+       spinlock_t              xdp_tx_lock;
 };
 
 #define BNXT_LEGACY_COAL_CMPL_PARAMS                                   \
index 9c2ad5e67a5d894078932c397cb37cf265b7743b..00f2f80c007332911cec143635c7589f474dcfc8 100644 (file)
@@ -846,13 +846,6 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
        if (rc)
                return rc;
 
-       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
-               bnxt_ptp_timecounter_init(bp, false);
-               rc = bnxt_ptp_init_rtc(bp, phc_cfg);
-               if (rc)
-                       goto out;
-       }
-
        if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
                return 0;
 
@@ -861,8 +854,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
        atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
        spin_lock_init(&ptp->ptp_lock);
 
-       if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+       if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+               bnxt_ptp_timecounter_init(bp, false);
+               rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+               if (rc)
+                       goto out;
+       } else {
                bnxt_ptp_timecounter_init(bp, true);
+       }
 
        ptp->ptp_info = bnxt_ptp_caps;
        if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
index 52fad0fdeacf31d3cd6fbb15e0106b9721e18eeb..03b1d6c04504856a43c4c5649f3fb2a8be765c34 100644 (file)
@@ -20,6 +20,8 @@
 #include "bnxt.h"
 #include "bnxt_xdp.h"
 
+DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
+
 struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
                                   struct bnxt_tx_ring_info *txr,
                                   dma_addr_t mapping, u32 len)
@@ -227,11 +229,16 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
        ring = smp_processor_id() % bp->tx_nr_rings_xdp;
        txr = &bp->tx_ring[ring];
 
+       if (READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING)
+               return -EINVAL;
+
+       if (static_branch_unlikely(&bnxt_xdp_locking_key))
+               spin_lock(&txr->xdp_tx_lock);
+
        for (i = 0; i < num_frames; i++) {
                struct xdp_frame *xdp = frames[i];
 
-               if (!txr || !bnxt_tx_avail(bp, txr) ||
-                   !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP))
+               if (!bnxt_tx_avail(bp, txr))
                        break;
 
                mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len,
@@ -250,6 +257,9 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
                bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
        }
 
+       if (static_branch_unlikely(&bnxt_xdp_locking_key))
+               spin_unlock(&txr->xdp_tx_lock);
+
        return nxmit;
 }
 
index 0df40c3beb05024b68116679b6194173f73e1524..067bb5e821f542bbc24e51cd7f303667f1a8ade5 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef BNXT_XDP_H
 #define BNXT_XDP_H
 
+DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
+
 struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
                                   struct bnxt_tx_ring_info *txr,
                                   dma_addr_t mapping, u32 len);
index 2dd79af9411bbc2042b043601d5a89dbbe4df68b..e87e46c47387ed5235a84cc2451631fb76ab398f 100644 (file)
@@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset)
        if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
                __raw_writel(value, offset);
        else
-               writel(value, offset);
+               writel_relaxed(value, offset);
 }
 
 static inline u32 bcmgenet_readl(void __iomem *offset)
@@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset)
        if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
                return __raw_readl(offset);
        else
-               return readl(offset);
+               return readl_relaxed(offset);
 }
 
 static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
@@ -2035,6 +2035,11 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev,
        return skb;
 }
 
+static void bcmgenet_hide_tsb(struct sk_buff *skb)
+{
+       __skb_pull(skb, sizeof(struct status_64));
+}
+
 static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -2141,6 +2146,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        GENET_CB(skb)->last_cb = tx_cb_ptr;
+
+       bcmgenet_hide_tsb(skb);
        skb_tx_timestamp(skb);
 
        /* Decrement total BD count and advance our write pointer */
@@ -3992,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
                goto err;
        }
        priv->wol_irq = platform_get_irq_optional(pdev, 2);
+       if (priv->wol_irq == -EPROBE_DEFER) {
+               err = priv->wol_irq;
+               goto err;
+       }
 
        priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base)) {
index 800d5ced580003c5eaff710ccf9ec8daf1dd68b1..61284baa0496ecef02de133435964e88debab07c 100644 (file)
@@ -1219,7 +1219,6 @@ static void gem_rx_refill(struct macb_queue *queue)
                /* Make hw descriptor updates visible to CPU */
                rmb();
 
-               queue->rx_prepared_head++;
                desc = macb_rx_desc(queue, entry);
 
                if (!queue->rx_skbuff[entry]) {
@@ -1258,6 +1257,7 @@ static void gem_rx_refill(struct macb_queue *queue)
                        dma_wmb();
                        desc->addr &= ~MACB_BIT(RX_USED);
                }
+               queue->rx_prepared_head++;
        }
 
        /* Make descriptor updates visible to hardware */
@@ -1658,6 +1658,7 @@ static void macb_tx_restart(struct macb_queue *queue)
        unsigned int head = queue->tx_head;
        unsigned int tail = queue->tx_tail;
        struct macb *bp = queue->bp;
+       unsigned int head_idx, tbqp;
 
        if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
                queue_writel(queue, ISR, MACB_BIT(TXUBR));
@@ -1665,6 +1666,13 @@ static void macb_tx_restart(struct macb_queue *queue)
        if (head == tail)
                return;
 
+       tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
+       tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
+       head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
+
+       if (tbqp == head_idx)
+               return;
+
        macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 }
 
index f2f1ce81fd9ccd8a365be7ab649070291db62818..0ec65ec634df57c39f7d8dde8a8f30d4c4551b53 100644 (file)
@@ -59,7 +59,7 @@ struct nicpf {
 
        /* MSI-X */
        u8                      num_vec;
-       bool                    irq_allocated[NIC_PF_MSIX_VECTORS];
+       unsigned int            irq_allocated[NIC_PF_MSIX_VECTORS];
        char                    irq_name[NIC_PF_MSIX_VECTORS][20];
 };
 
@@ -1150,7 +1150,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
        u64 intr;
        u8  vf;
 
-       if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
+       if (irq == nic->irq_allocated[NIC_PF_INTR_ID_MBOX0])
                mbx = 0;
        else
                mbx = 1;
@@ -1176,14 +1176,14 @@ static void nic_free_all_interrupts(struct nicpf *nic)
 
        for (irq = 0; irq < nic->num_vec; irq++) {
                if (nic->irq_allocated[irq])
-                       free_irq(pci_irq_vector(nic->pdev, irq), nic);
-               nic->irq_allocated[irq] = false;
+                       free_irq(nic->irq_allocated[irq], nic);
+               nic->irq_allocated[irq] = 0;
        }
 }
 
 static int nic_register_interrupts(struct nicpf *nic)
 {
-       int i, ret;
+       int i, ret, irq;
        nic->num_vec = pci_msix_vec_count(nic->pdev);
 
        /* Enable MSI-X */
@@ -1201,13 +1201,13 @@ static int nic_register_interrupts(struct nicpf *nic)
                sprintf(nic->irq_name[i],
                        "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
 
-               ret = request_irq(pci_irq_vector(nic->pdev, i),
-                                 nic_mbx_intr_handler, 0,
+               irq = pci_irq_vector(nic->pdev, i);
+               ret = request_irq(irq, nic_mbx_intr_handler, 0,
                                  nic->irq_name[i], nic);
                if (ret)
                        goto fail;
 
-               nic->irq_allocated[i] = true;
+               nic->irq_allocated[i] = irq;
        }
 
        /* Enable mailbox interrupt */
index e7b4e3ed056c725cb832302c2efec6d0416a283d..8d719f82854a9d3acb582ba3b8af13479d29771d 100644 (file)
@@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
                goto out;
        na = ret;
 
-       memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN));
+       memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN));
        strim(p->id);
-       memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN));
+       memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN));
        strim(p->sn);
-       memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN));
+       memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN));
        strim(p->pn);
-       memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN));
-       strim((char *)p->na);
+       memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN));
+       strim(p->na);
 
 out:
        vfree(vpd);
index 79df5a72877b83b4a01429c0e929ff86aa706e08..0040dcaab9455f7cac35286e0cb507ab6a1bed2b 100644 (file)
@@ -1399,8 +1399,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        /* alloc_etherdev ensures aligned and zeroed private structures */
        dev = alloc_etherdev (sizeof (*tp));
-       if (!dev)
+       if (!dev) {
+               pci_disable_device(pdev);
                return -ENOMEM;
+       }
 
        SET_NETDEV_DEV(dev, &pdev->dev);
        if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
@@ -1785,6 +1787,7 @@ err_out_free_res:
 
 err_out_free_netdev:
        free_netdev (dev);
+       pci_disable_device(pdev);
        return -ENODEV;
 }
 
index d5356db7539a43773e9496c29dc707995aaa00d2..5231818943c6e12f8cad1dd065129a538acf83c8 100644 (file)
@@ -1835,11 +1835,6 @@ static int ftgmac100_probe(struct platform_device *pdev)
                priv->rxdes0_edorr_mask = BIT(30);
                priv->txdes0_edotr_mask = BIT(30);
                priv->is_aspeed = true;
-               /* Disable ast2600 problematic HW arbitration */
-               if (of_device_is_compatible(np, "aspeed,ast2600-mac")) {
-                       iowrite32(FTGMAC100_TM_DEFAULT,
-                                 priv->base + FTGMAC100_OFFSET_TM);
-               }
        } else {
                priv->rxdes0_edorr_mask = BIT(15);
                priv->txdes0_edotr_mask = BIT(15);
@@ -1911,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
                err = ftgmac100_setup_clk(priv);
                if (err)
                        goto err_phy_connect;
+
+               /* Disable ast2600 problematic HW arbitration */
+               if (of_device_is_compatible(np, "aspeed,ast2600-mac"))
+                       iowrite32(FTGMAC100_TM_DEFAULT,
+                                 priv->base + FTGMAC100_OFFSET_TM);
        }
 
        /* Default ring sizes */
@@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
        /* AST2400  doesn't have working HW checksum generation */
        if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
                netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
+       /* AST2600 tx checksum with NCSI is broken */
+       if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
+               netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
        if (np && of_get_property(np, "no-hw-checksum", NULL))
                netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
        netdev->features |= netdev->hw_features;
index 763d2c7b5fb1a78225ad757e1aee7578ab310c36..5750f9a56393a038c3e50eb0502218f7696b33d0 100644 (file)
@@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
        info->phc_index = -1;
 
        fman_node = of_get_parent(mac_node);
-       if (fman_node)
+       if (fman_node) {
                ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0);
+               of_node_put(fman_node);
+       }
 
-       if (ptp_node)
+       if (ptp_node) {
                ptp_dev = of_find_device_by_node(ptp_node);
+               of_node_put(ptp_node);
+       }
 
        if (ptp_dev)
                ptp = platform_get_drvdata(ptp_dev);
index 5f5f8c53c4a0f0d9653865373cb09c7b61e3bc96..c8cb541572ffe6494df4e97b849bbb6bcffdd874 100644 (file)
@@ -167,7 +167,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
        base = of_iomap(node, 0);
        if (!base) {
                err = -ENOMEM;
-               goto err_close;
+               goto err_put;
        }
 
        err = fsl_mc_allocate_irqs(mc_dev);
@@ -210,6 +210,8 @@ err_free_mc_irq:
        fsl_mc_free_irqs(mc_dev);
 err_unmap:
        iounmap(base);
+err_put:
+       of_node_put(node);
 err_close:
        dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 err_free_mcp:
index 79afb1d7289b1c99c1cd845b777e50213f97c7c7..9182631856d58ee49d50d4d89ae061fbefba77e1 100644 (file)
@@ -297,10 +297,6 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
        if (tc < 0 || tc >= priv->num_tx_rings)
                return -EINVAL;
 
-       /* Do not support TXSTART and TX CSUM offload simutaniously */
-       if (ndev->features & NETIF_F_CSUM_MASK)
-               return -EBUSY;
-
        /* TSD and Qbv are mutually exclusive in hardware */
        if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
                return -EBUSY;
index 11227f51404cf6483320fd34d5dd525ce34e9387..9f33ec838b525d4d267a5a8921c9bb97778c1280 100644 (file)
@@ -3731,7 +3731,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
                                         ARRAY_SIZE(out_val));
        if (ret) {
                dev_dbg(&fep->pdev->dev, "no stop mode property\n");
-               return ret;
+               goto out;
        }
 
        fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np);
index 5d7aef73df61004ef7faeb0ebeacc8e8638c30e0..fb5120d90f26ac271c09876d59f6b8a4d87e5662 100644 (file)
@@ -586,8 +586,8 @@ static int fun_get_dev_limits(struct fun_dev *fdev)
        /* Calculate the max QID based on SQ/CQ/doorbell counts.
         * SQ/CQ doorbells alternate.
         */
-       num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) /
-                 (fdev->db_stride * 4);
+       num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >>
+                 (2 + NVME_CAP_STRIDE(fdev->cap_reg));
        fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1;
        fdev->kern_end_qid = fdev->max_qid + 1;
        return 0;
index 7edf8569514ccdc49c11073926ebeba315435e8e..928d934cb21a5af1a0fbdc144a12dbee5e4e1960 100644 (file)
@@ -1065,19 +1065,23 @@ int hns_mac_init(struct dsaf_device *dsaf_dev)
        device_for_each_child_node(dsaf_dev->dev, child) {
                ret = fwnode_property_read_u32(child, "reg", &port_id);
                if (ret) {
+                       fwnode_handle_put(child);
                        dev_err(dsaf_dev->dev,
                                "get reg fail, ret=%d!\n", ret);
                        return ret;
                }
                if (port_id >= max_port_num) {
+                       fwnode_handle_put(child);
                        dev_err(dsaf_dev->dev,
                                "reg(%u) out of range!\n", port_id);
                        return -EINVAL;
                }
                mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb),
                                      GFP_KERNEL);
-               if (!mac_cb)
+               if (!mac_cb) {
+                       fwnode_handle_put(child);
                        return -ENOMEM;
+               }
                mac_cb->fw_port = child;
                mac_cb->mac_id = (u8)port_id;
                dsaf_dev->mac_cb[port_id] = mac_cb;
index 0c60f41fca8a6f25446131feaa47ca04e294c837..f3c9395d8351cb31108973e1867332e00026c6ac 100644 (file)
@@ -75,7 +75,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
                ret = hclge_comm_cmd_send(hw, &desc, 1);
                if (ret) {
                        dev_err(&hw->cmq.csq.pdev->dev,
-                               "failed to get tqp stat, ret = %d, tx = %u.\n",
+                               "failed to get tqp stat, ret = %d, rx = %u.\n",
                                ret, i);
                        return ret;
                }
@@ -89,7 +89,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
                ret = hclge_comm_cmd_send(hw, &desc, 1);
                if (ret) {
                        dev_err(&hw->cmq.csq.pdev->dev,
-                               "failed to get tqp stat, ret = %d, rx = %u.\n",
+                               "failed to get tqp stat, ret = %d, tx = %u.\n",
                                ret, i);
                        return ret;
                }
index 44d9b560b3374f8f2ad8d7b0b1596e1b4f7bce50..93aeb615191d9064aee2be7e1f21c0003fb75322 100644 (file)
@@ -562,12 +562,12 @@ static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
 
        for (i = 0; i < ring_num; i++) {
                j = 0;
-               sprintf(result[j++], "%8u", i);
-               sprintf(result[j++], "%9u", ring->tx_copybreak);
-               sprintf(result[j++], "%3u", tx_spare->len);
-               sprintf(result[j++], "%3u", tx_spare->next_to_use);
-               sprintf(result[j++], "%3u", tx_spare->next_to_clean);
-               sprintf(result[j++], "%3u", tx_spare->last_to_clean);
+               sprintf(result[j++], "%u", i);
+               sprintf(result[j++], "%u", ring->tx_copybreak);
+               sprintf(result[j++], "%u", tx_spare->len);
+               sprintf(result[j++], "%u", tx_spare->next_to_use);
+               sprintf(result[j++], "%u", tx_spare->next_to_clean);
+               sprintf(result[j++], "%u", tx_spare->last_to_clean);
                sprintf(result[j++], "%pad", &tx_spare->dma);
                hns3_dbg_fill_content(content, sizeof(content),
                                      tx_spare_info_items,
@@ -598,35 +598,35 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
        u32 base_add_l, base_add_h;
        u32 j = 0;
 
-       sprintf(result[j++], "%8u", index);
+       sprintf(result[j++], "%u", index);
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_BD_NUM_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_BD_LEN_REG));
 
-       sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_TAIL_REG));
 
-       sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_HEAD_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_FBDNUM_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
-       sprintf(result[j++], "%9u", ring->rx_copybreak);
+       sprintf(result[j++], "%u", ring->rx_copybreak);
 
-       sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_EN_REG) ? "on" : "off");
 
        if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
-               sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+               sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
                        HNS3_RING_RX_EN_REG) ? "on" : "off");
        else
-               sprintf(result[j++], "%10s", "NA");
+               sprintf(result[j++], "%s", "NA");
 
        base_add_h = readl_relaxed(ring->tqp->io_base +
                                        HNS3_RING_RX_RING_BASEADDR_H_REG);
@@ -700,36 +700,36 @@ static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring,
        u32 base_add_l, base_add_h;
        u32 j = 0;
 
-       sprintf(result[j++], "%8u", index);
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", index);
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_BD_NUM_REG));
 
-       sprintf(result[j++], "%2u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_TC_REG));
 
-       sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_TAIL_REG));
 
-       sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_HEAD_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_FBDNUM_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_OFFSET_REG));
 
-       sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_TX_RING_PKTNUM_RECORD_REG));
 
-       sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+       sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
                HNS3_RING_EN_REG) ? "on" : "off");
 
        if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
-               sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+               sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
                        HNS3_RING_TX_EN_REG) ? "on" : "off");
        else
-               sprintf(result[j++], "%10s", "NA");
+               sprintf(result[j++], "%s", "NA");
 
        base_add_h = readl_relaxed(ring->tqp->io_base +
                                        HNS3_RING_TX_RING_BASEADDR_H_REG);
@@ -848,15 +848,15 @@ static void hns3_dump_rx_bd_info(struct hns3_nic_priv *priv,
 {
        unsigned int j = 0;
 
-       sprintf(result[j++], "%5d", idx);
+       sprintf(result[j++], "%d", idx);
        sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.l234_info));
-       sprintf(result[j++], "%7u", le16_to_cpu(desc->rx.pkt_len));
-       sprintf(result[j++], "%4u", le16_to_cpu(desc->rx.size));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.pkt_len));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.size));
        sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.rss_hash));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->rx.fd_id));
-       sprintf(result[j++], "%8u", le16_to_cpu(desc->rx.vlan_tag));
-       sprintf(result[j++], "%15u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb));
-       sprintf(result[j++], "%11u", le16_to_cpu(desc->rx.ot_vlan_tag));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.fd_id));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.vlan_tag));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->rx.ot_vlan_tag));
        sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.bd_base_info));
        if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) {
                u32 ol_info = le32_to_cpu(desc->rx.ol_info);
@@ -930,19 +930,19 @@ static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv,
 {
        unsigned int j = 0;
 
-       sprintf(result[j++], "%6d", idx);
+       sprintf(result[j++], "%d", idx);
        sprintf(result[j++], "%#llx", le64_to_cpu(desc->addr));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.vlan_tag));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.send_size));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->tx.vlan_tag));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->tx.send_size));
        sprintf(result[j++], "%#x",
                le32_to_cpu(desc->tx.type_cs_vlan_tso_len));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.outer_vlan_tag));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.tv));
-       sprintf(result[j++], "%10u",
+       sprintf(result[j++], "%u", le16_to_cpu(desc->tx.outer_vlan_tag));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->tx.tv));
+       sprintf(result[j++], "%u",
                le32_to_cpu(desc->tx.ol_type_vlan_len_msec));
        sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.paylen_ol4cs));
        sprintf(result[j++], "%#x", le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri));
-       sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.mss_hw_csum));
+       sprintf(result[j++], "%u", le16_to_cpu(desc->tx.mss_hw_csum));
 }
 
 static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
index 14dc12c2155d5f2247b466b575b328eb175214f4..a3ee7875d6a7aa4b47c4feb7c7c668022361a582 100644 (file)
@@ -5203,6 +5203,13 @@ static void hns3_state_init(struct hnae3_handle *handle)
                set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
 }
 
+static void hns3_state_uninit(struct hnae3_handle *handle)
+{
+       struct hns3_nic_priv *priv  = handle->priv;
+
+       clear_bit(HNS3_NIC_STATE_INITED, &priv->state);
+}
+
 static int hns3_client_init(struct hnae3_handle *handle)
 {
        struct pci_dev *pdev = handle->pdev;
@@ -5320,7 +5327,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
        return ret;
 
 out_reg_netdev_fail:
+       hns3_state_uninit(handle);
        hns3_dbg_uninit(handle);
+       hns3_client_stop(handle);
 out_client_start:
        hns3_free_rx_cpu_rmap(netdev);
        hns3_nic_uninit_irq(priv);
index 6799d16de34b9490f60fe67c94a2619e329e0ca4..7998ca617a92e8c25be2f778fa822729cdbcfecf 100644 (file)
@@ -94,6 +94,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
        enum hclge_comm_cmd_status status;
        struct hclge_desc desc;
 
+       if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) {
+               dev_err(&hdev->pdev->dev,
+                       "msg data length(=%u) exceeds maximum(=%u)\n",
+                       msg_len, HCLGE_MBX_MAX_MSG_SIZE);
+               return -EMSGSIZE;
+       }
+
        resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data;
 
        hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
@@ -176,7 +183,7 @@ static int hclge_get_ring_chain_from_mbx(
        ring_num = req->msg.ring_num;
 
        if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
-               return -ENOMEM;
+               return -EINVAL;
 
        for (i = 0; i < ring_num; i++) {
                if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
@@ -587,9 +594,9 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport,
        return hclge_set_vport_mtu(vport, mtu);
 }
 
-static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
-                                    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
-                                    struct hclge_respond_to_vf_msg *resp_msg)
+static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+                                   struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+                                   struct hclge_respond_to_vf_msg *resp_msg)
 {
        struct hnae3_handle *handle = &vport->nic;
        struct hclge_dev *hdev = vport->back;
@@ -599,17 +606,18 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
        if (queue_id >= handle->kinfo.num_tqps) {
                dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n",
                        queue_id, mbx_req->mbx_src_vfid);
-               return;
+               return -EINVAL;
        }
 
        qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
        memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf));
        resp_msg->len = sizeof(qid_in_pf);
+       return 0;
 }
 
-static void hclge_get_rss_key(struct hclge_vport *vport,
-                             struct hclge_mbx_vf_to_pf_cmd *mbx_req,
-                             struct hclge_respond_to_vf_msg *resp_msg)
+static int hclge_get_rss_key(struct hclge_vport *vport,
+                            struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+                            struct hclge_respond_to_vf_msg *resp_msg)
 {
 #define HCLGE_RSS_MBX_RESP_LEN 8
        struct hclge_dev *hdev = vport->back;
@@ -627,13 +635,14 @@ static void hclge_get_rss_key(struct hclge_vport *vport,
                dev_warn(&hdev->pdev->dev,
                         "failed to get the rss hash key, the index(%u) invalid !\n",
                         index);
-               return;
+               return -EINVAL;
        }
 
        memcpy(resp_msg->data,
               &rss_cfg->rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
               HCLGE_RSS_MBX_RESP_LEN);
        resp_msg->len = HCLGE_RSS_MBX_RESP_LEN;
+       return 0;
 }
 
 static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code)
@@ -809,10 +818,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                                        "VF fail(%d) to set mtu\n", ret);
                        break;
                case HCLGE_MBX_GET_QID_IN_PF:
-                       hclge_get_queue_id_in_pf(vport, req, &resp_msg);
+                       ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg);
                        break;
                case HCLGE_MBX_GET_RSS_KEY:
-                       hclge_get_rss_key(vport, req, &resp_msg);
+                       ret = hclge_get_rss_key(vport, req, &resp_msg);
                        break;
                case HCLGE_MBX_GET_LINK_MODE:
                        hclge_get_link_mode(vport, req);
index 2d9b06d7caadb96fe7dacdb66e179434c06dc379..f7dc7d825f63787b5c905e1b5fb1d9170073d61a 100644 (file)
@@ -771,7 +771,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
        /* If we only have one page, still need to get shadown wqe when
         * wqe rolling-over page
         */
-       if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) {
+       if (curr_pg != end_pg || end_prod_idx < *prod_idx) {
                void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
 
                copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx);
@@ -841,7 +841,10 @@ struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 
        *cons_idx = curr_cons_idx;
 
-       if (curr_pg != end_pg) {
+       /* If we only have one page, still need to get shadown wqe when
+        * wqe rolling-over page
+        */
+       if (curr_pg != end_pg || end_cons_idx < curr_cons_idx) {
                void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
 
                copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx);
index 77683909ca3d5b6c4c50e9eb4e5cdc4fe0c14814..5c5931dba51d78f01e131aff42e13eb82157a575 100644 (file)
@@ -3210,13 +3210,8 @@ static void ibmvnic_get_ringparam(struct net_device *netdev,
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
-       if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
-               ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
-               ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
-       } else {
-               ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
-               ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
-       }
+       ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+       ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
        ring->rx_mini_max_pending = 0;
        ring->rx_jumbo_max_pending = 0;
        ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
@@ -3231,23 +3226,21 @@ static int ibmvnic_set_ringparam(struct net_device *netdev,
                                 struct netlink_ext_ack *extack)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       int ret;
 
-       ret = 0;
+       if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq  ||
+           ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
+               netdev_err(netdev, "Invalid request.\n");
+               netdev_err(netdev, "Max tx buffers = %llu\n",
+                          adapter->max_rx_add_entries_per_subcrq);
+               netdev_err(netdev, "Max rx buffers = %llu\n",
+                          adapter->max_tx_entries_per_subcrq);
+               return -EINVAL;
+       }
+
        adapter->desired.rx_entries = ring->rx_pending;
        adapter->desired.tx_entries = ring->tx_pending;
 
-       ret = wait_for_reset(adapter);
-
-       if (!ret &&
-           (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending ||
-            adapter->req_tx_entries_per_subcrq != ring->tx_pending))
-               netdev_info(netdev,
-                           "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
-                           ring->rx_pending, ring->tx_pending,
-                           adapter->req_rx_add_entries_per_subcrq,
-                           adapter->req_tx_entries_per_subcrq);
-       return ret;
+       return wait_for_reset(adapter);
 }
 
 static void ibmvnic_get_channels(struct net_device *netdev,
@@ -3255,14 +3248,8 @@ static void ibmvnic_get_channels(struct net_device *netdev,
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
-       if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
-               channels->max_rx = adapter->max_rx_queues;
-               channels->max_tx = adapter->max_tx_queues;
-       } else {
-               channels->max_rx = IBMVNIC_MAX_QUEUES;
-               channels->max_tx = IBMVNIC_MAX_QUEUES;
-       }
-
+       channels->max_rx = adapter->max_rx_queues;
+       channels->max_tx = adapter->max_tx_queues;
        channels->max_other = 0;
        channels->max_combined = 0;
        channels->rx_count = adapter->req_rx_queues;
@@ -3275,22 +3262,11 @@ static int ibmvnic_set_channels(struct net_device *netdev,
                                struct ethtool_channels *channels)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       int ret;
 
-       ret = 0;
        adapter->desired.rx_queues = channels->rx_count;
        adapter->desired.tx_queues = channels->tx_count;
 
-       ret = wait_for_reset(adapter);
-
-       if (!ret &&
-           (adapter->req_rx_queues != channels->rx_count ||
-            adapter->req_tx_queues != channels->tx_count))
-               netdev_info(netdev,
-                           "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
-                           channels->rx_count, channels->tx_count,
-                           adapter->req_rx_queues, adapter->req_tx_queues);
-       return ret;
+       return wait_for_reset(adapter);
 }
 
 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -3298,43 +3274,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
        struct ibmvnic_adapter *adapter = netdev_priv(dev);
        int i;
 
-       switch (stringset) {
-       case ETH_SS_STATS:
-               for (i = 0; i < ARRAY_SIZE(ibmvnic_stats);
-                               i++, data += ETH_GSTRING_LEN)
-                       memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+       if (stringset != ETH_SS_STATS)
+               return;
 
-               for (i = 0; i < adapter->req_tx_queues; i++) {
-                       snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
-                       data += ETH_GSTRING_LEN;
+       for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
+               memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
 
-                       snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
-                       data += ETH_GSTRING_LEN;
+       for (i = 0; i < adapter->req_tx_queues; i++) {
+               snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+               data += ETH_GSTRING_LEN;
 
-                       snprintf(data, ETH_GSTRING_LEN,
-                                "tx%d_dropped_packets", i);
-                       data += ETH_GSTRING_LEN;
-               }
+               snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+               data += ETH_GSTRING_LEN;
 
-               for (i = 0; i < adapter->req_rx_queues; i++) {
-                       snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
-                       data += ETH_GSTRING_LEN;
+               snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
+               data += ETH_GSTRING_LEN;
+       }
 
-                       snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
-                       data += ETH_GSTRING_LEN;
+       for (i = 0; i < adapter->req_rx_queues; i++) {
+               snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+               data += ETH_GSTRING_LEN;
 
-                       snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
-                       data += ETH_GSTRING_LEN;
-               }
-               break;
+               snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+               data += ETH_GSTRING_LEN;
 
-       case ETH_SS_PRIV_FLAGS:
-               for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++)
-                       strcpy(data + i * ETH_GSTRING_LEN,
-                              ibmvnic_priv_flags[i]);
-               break;
-       default:
-               return;
+               snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+               data += ETH_GSTRING_LEN;
        }
 }
 
@@ -3347,8 +3312,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
                return ARRAY_SIZE(ibmvnic_stats) +
                       adapter->req_tx_queues * NUM_TX_STATS +
                       adapter->req_rx_queues * NUM_RX_STATS;
-       case ETH_SS_PRIV_FLAGS:
-               return ARRAY_SIZE(ibmvnic_priv_flags);
        default:
                return -EOPNOTSUPP;
        }
@@ -3401,26 +3364,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
        }
 }
 
-static u32 ibmvnic_get_priv_flags(struct net_device *netdev)
-{
-       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-
-       return adapter->priv_flags;
-}
-
-static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags)
-{
-       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES);
-
-       if (which_maxes)
-               adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES;
-       else
-               adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES;
-
-       return 0;
-}
-
 static const struct ethtool_ops ibmvnic_ethtool_ops = {
        .get_drvinfo            = ibmvnic_get_drvinfo,
        .get_msglevel           = ibmvnic_get_msglevel,
@@ -3434,8 +3377,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
        .get_sset_count         = ibmvnic_get_sset_count,
        .get_ethtool_stats      = ibmvnic_get_ethtool_stats,
        .get_link_ksettings     = ibmvnic_get_link_ksettings,
-       .get_priv_flags         = ibmvnic_get_priv_flags,
-       .set_priv_flags         = ibmvnic_set_priv_flags,
 };
 
 /* Routines for managing CRQs/sCRQs  */
index 8f5cefb932dd1e48717a3b2fc9cb2f8e0b3fc9f4..1310c861bf834e91a09a5303100e587b96a64d31 100644 (file)
 
 #define IBMVNIC_RESET_DELAY 100
 
-static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
-#define IBMVNIC_USE_SERVER_MAXES 0x1
-       "use-server-maxes"
-};
-
 struct ibmvnic_login_buffer {
        __be32 len;
        __be32 version;
@@ -883,7 +878,6 @@ struct ibmvnic_adapter {
        struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
        dma_addr_t ip_offload_ctrl_tok;
        u32 msg_enable;
-       u32 priv_flags;
 
        /* Vital Product Data (VPD) */
        struct ibmvnic_vpd *vpd;
index d60e2016d03c6116062ca882b2d5142db26cebca..e6c8e6d5234f81d000ce63258a8b31df5d6d9d98 100644 (file)
@@ -1009,8 +1009,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
 {
        u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
            link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
-       u16 max_ltr_enc_d = 0;  /* maximum LTR decoded by platform */
-       u16 lat_enc_d = 0;      /* latency decoded */
+       u32 max_ltr_enc_d = 0;  /* maximum LTR decoded by platform */
+       u32 lat_enc_d = 0;      /* latency decoded */
        u16 lat_enc = 0;        /* latency encoded */
 
        if (link) {
index 6778df2177a114a63114b9709610d3bdddc73778..98871f01499469332b03b38a278e89d1532ccff0 100644 (file)
@@ -7549,42 +7549,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
 static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
                            struct i40e_fwd_adapter *fwd)
 {
+       struct i40e_channel *ch = NULL, *ch_tmp, *iter;
        int ret = 0, num_tc = 1,  i, aq_err;
-       struct i40e_channel *ch, *ch_tmp;
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
 
-       if (list_empty(&vsi->macvlan_list))
-               return -EINVAL;
-
        /* Go through the list and find an available channel */
-       list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
-               if (!i40e_is_channel_macvlan(ch)) {
-                       ch->fwd = fwd;
+       list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
+               if (!i40e_is_channel_macvlan(iter)) {
+                       iter->fwd = fwd;
                        /* record configuration for macvlan interface in vdev */
                        for (i = 0; i < num_tc; i++)
                                netdev_bind_sb_channel_queue(vsi->netdev, vdev,
                                                             i,
-                                                            ch->num_queue_pairs,
-                                                            ch->base_queue);
-                       for (i = 0; i < ch->num_queue_pairs; i++) {
+                                                            iter->num_queue_pairs,
+                                                            iter->base_queue);
+                       for (i = 0; i < iter->num_queue_pairs; i++) {
                                struct i40e_ring *tx_ring, *rx_ring;
                                u16 pf_q;
 
-                               pf_q = ch->base_queue + i;
+                               pf_q = iter->base_queue + i;
 
                                /* Get to TX ring ptr */
                                tx_ring = vsi->tx_rings[pf_q];
-                               tx_ring->ch = ch;
+                               tx_ring->ch = iter;
 
                                /* Get the RX ring ptr */
                                rx_ring = vsi->rx_rings[pf_q];
-                               rx_ring->ch = ch;
+                               rx_ring->ch = iter;
                        }
+                       ch = iter;
                        break;
                }
        }
 
+       if (!ch)
+               return -EINVAL;
+
        /* Guarantee all rings are updated before we update the
         * MAC address filter.
         */
index 190590d32fafca1069c75533edf234d4de23bf97..7dfcf78b57fb54dbe67a69ecf1ac8be9acf8b339 100644 (file)
@@ -2871,7 +2871,6 @@ continue_reset:
        running = adapter->state == __IAVF_RUNNING;
 
        if (running) {
-               netdev->flags &= ~IFF_UP;
                netif_carrier_off(netdev);
                netif_tx_stop_all_queues(netdev);
                adapter->link_up = false;
@@ -2988,7 +2987,7 @@ continue_reset:
                 * to __IAVF_RUNNING
                 */
                iavf_up_complete(adapter);
-               netdev->flags |= IFF_UP;
+
                iavf_irq_enable(adapter, true);
        } else {
                iavf_change_state(adapter, __IAVF_DOWN);
@@ -3004,10 +3003,8 @@ continue_reset:
 reset_err:
        mutex_unlock(&adapter->client_lock);
        mutex_unlock(&adapter->crit_lock);
-       if (running) {
+       if (running)
                iavf_change_state(adapter, __IAVF_RUNNING);
-               netdev->flags |= IFF_UP;
-       }
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
        iavf_close(netdev);
 }
index d4f1874df7d0b168c18816e1cd8d3d2ef53ed712..a895e3a8e988c842f47e55341fa97f07f5c2d81e 100644 (file)
@@ -301,7 +301,6 @@ enum ice_vsi_state {
        ICE_VSI_NETDEV_REGISTERED,
        ICE_VSI_UMAC_FLTR_CHANGED,
        ICE_VSI_MMAC_FLTR_CHANGED,
-       ICE_VSI_VLAN_FLTR_CHANGED,
        ICE_VSI_PROMISC_CHANGED,
        ICE_VSI_STATE_NBITS             /* must be last */
 };
@@ -541,6 +540,7 @@ struct ice_pf {
        struct mutex avail_q_mutex;     /* protects access to avail_[rx|tx]qs */
        struct mutex sw_mutex;          /* lock for protecting VSI alloc flow */
        struct mutex tc_mutex;          /* lock to protect TC changes */
+       struct mutex adev_mutex;        /* lock to protect aux device access */
        u32 msg_enable;
        struct ice_ptp ptp;
        struct tty_driver *ice_gnss_tty_driver;
@@ -672,7 +672,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
 
 static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
 {
-       return !!vsi->xdp_prog;
+       return !!READ_ONCE(vsi->xdp_prog);
 }
 
 static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
index 5daade32ea62585c1219568677f82d0cfda39d11..fba178e0760097f70acbcb5288a9d6be38fe4e7d 100644 (file)
@@ -577,7 +577,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
 {
        struct net_device *netdev;
 
-       if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list)
+       if (!vsi || vsi->type != ICE_VSI_PF)
                return;
 
        netdev = vsi->netdev;
@@ -599,7 +599,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
        int base_idx, i;
 
        if (!vsi || vsi->type != ICE_VSI_PF)
-               return -EINVAL;
+               return 0;
 
        pf = vsi->back;
        netdev = vsi->netdev;
@@ -636,7 +636,6 @@ void ice_remove_arfs(struct ice_pf *pf)
        if (!pf_vsi)
                return;
 
-       ice_free_cpu_rx_rmap(pf_vsi);
        ice_clear_arfs(pf_vsi);
 }
 
@@ -653,9 +652,5 @@ void ice_rebuild_arfs(struct ice_pf *pf)
                return;
 
        ice_remove_arfs(pf);
-       if (ice_set_cpu_rx_rmap(pf_vsi)) {
-               dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n");
-               return;
-       }
        ice_init_arfs(pf_vsi);
 }
index 9a84d746a6c4ed9786c372770a4e66189f96dd78..6a463b242c7df2c93eb57cddc8aec5b1d10049ad 100644 (file)
@@ -361,7 +361,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
        np = netdev_priv(netdev);
        vsi = np->vsi;
 
-       if (ice_is_reset_in_progress(vsi->back->state))
+       if (ice_is_reset_in_progress(vsi->back->state) ||
+           test_bit(ICE_VF_DIS, vsi->back->state))
                return NETDEV_TX_BUSY;
 
        repr = ice_netdev_to_repr(netdev);
index bd58d9d2e56537993ea06e2df6bd72875d31bd2b..6a413331572b6a95e663d5820416e390e0c91539 100644 (file)
@@ -52,7 +52,7 @@ static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
 
 static inline int ice_eswitch_configure(struct ice_pf *pf)
 {
-       return -EOPNOTSUPP;
+       return 0;
 }
 
 static inline int ice_eswitch_rebuild(struct ice_pf *pf)
index af57eb114966309902cddc7db90d4056ad28e626..85a94483c2edca248436ed2bbc0674478c0ddc88 100644 (file)
@@ -58,7 +58,16 @@ int
 ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
                              u8 promisc_mask)
 {
-       return ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false);
+       struct ice_pf *pf = hw->back;
+       int result;
+
+       result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false);
+       if (result)
+               dev_err(ice_pf_to_dev(pf),
+                       "Error setting promisc mode on VSI %i (rc=%d)\n",
+                       vsi->vsi_num, result);
+
+       return result;
 }
 
 /**
@@ -73,7 +82,16 @@ int
 ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
                                u8 promisc_mask)
 {
-       return ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true);
+       struct ice_pf *pf = hw->back;
+       int result;
+
+       result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true);
+       if (result)
+               dev_err(ice_pf_to_dev(pf),
+                       "Error clearing promisc mode on VSI %i (rc=%d)\n",
+                       vsi->vsi_num, result);
+
+       return result;
 }
 
 /**
@@ -87,7 +105,16 @@ int
 ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
                           u16 vid)
 {
-       return ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+       struct ice_pf *pf = hw->back;
+       int result;
+
+       result = ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+       if (result)
+               dev_err(ice_pf_to_dev(pf),
+                       "Error clearing promisc mode on VSI %i for VID %u (rc=%d)\n",
+                       ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+       return result;
 }
 
 /**
@@ -101,7 +128,16 @@ int
 ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
                         u16 vid)
 {
-       return ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+       struct ice_pf *pf = hw->back;
+       int result;
+
+       result = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+       if (result)
+               dev_err(ice_pf_to_dev(pf),
+                       "Error setting promisc mode on VSI %i for VID %u (rc=%d)\n",
+                       ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+       return result;
 }
 
 /**
index 25a436d342c29094e2f72410d30737ae36af6753..3e3b2ed4cd5d9ec50ba46f5c14f18702505a9bcb 100644 (file)
@@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
        if (WARN_ON_ONCE(!in_task()))
                return;
 
+       mutex_lock(&pf->adev_mutex);
        if (!pf->adev)
-               return;
+               goto finish;
 
        device_lock(&pf->adev->dev);
        iadrv = ice_get_auxiliary_drv(pf);
        if (iadrv && iadrv->event_handler)
                iadrv->event_handler(pf, event);
        device_unlock(&pf->adev->dev);
+finish:
+       mutex_unlock(&pf->adev_mutex);
 }
 
 /**
@@ -290,7 +293,6 @@ int ice_plug_aux_dev(struct ice_pf *pf)
                return -ENOMEM;
 
        adev = &iadev->adev;
-       pf->adev = adev;
        iadev->pf = pf;
 
        adev->id = pf->aux_idx;
@@ -300,18 +302,20 @@ int ice_plug_aux_dev(struct ice_pf *pf)
 
        ret = auxiliary_device_init(adev);
        if (ret) {
-               pf->adev = NULL;
                kfree(iadev);
                return ret;
        }
 
        ret = auxiliary_device_add(adev);
        if (ret) {
-               pf->adev = NULL;
                auxiliary_device_uninit(adev);
                return ret;
        }
 
+       mutex_lock(&pf->adev_mutex);
+       pf->adev = adev;
+       mutex_unlock(&pf->adev_mutex);
+
        return 0;
 }
 
@@ -320,12 +324,17 @@ int ice_plug_aux_dev(struct ice_pf *pf)
  */
 void ice_unplug_aux_dev(struct ice_pf *pf)
 {
-       if (!pf->adev)
-               return;
+       struct auxiliary_device *adev;
 
-       auxiliary_device_delete(pf->adev);
-       auxiliary_device_uninit(pf->adev);
+       mutex_lock(&pf->adev_mutex);
+       adev = pf->adev;
        pf->adev = NULL;
+       mutex_unlock(&pf->adev_mutex);
+
+       if (adev) {
+               auxiliary_device_delete(adev);
+               auxiliary_device_uninit(adev);
+       }
 }
 
 /**
index b897926f817d191b83e7a75a180358fb70de50d8..454e01ae09b970638c93243a9a1d8c9a5cec2696 100644 (file)
@@ -1480,6 +1480,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
                ring->tx_tstamps = &pf->ptp.port.tx;
                ring->dev = dev;
                ring->count = vsi->num_tx_desc;
+               ring->txq_teid = ICE_INVAL_TEID;
                if (dvm_ena)
                        ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2;
                else
@@ -2688,6 +2689,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
                return;
 
        vsi->irqs_ready = false;
+       ice_free_cpu_rx_rmap(vsi);
+
        ice_for_each_q_vector(vsi, i) {
                u16 vector = i + base;
                int irq_num;
@@ -2701,7 +2704,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
                        continue;
 
                /* clear the affinity notifier in the IRQ descriptor */
-               irq_set_affinity_notifier(irq_num, NULL);
+               if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+                       irq_set_affinity_notifier(irq_num, NULL);
 
                /* clear the affinity_mask in the IRQ descriptor */
                irq_set_affinity_hint(irq_num, NULL);
@@ -2983,6 +2987,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
                }
        }
 
+       if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
+               ice_clear_dflt_vsi(pf->first_sw);
        ice_fltr_remove_all(vsi);
        ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
        err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
@@ -3037,8 +3043,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
        ice_for_each_q_vector(vsi, i) {
                struct ice_q_vector *q_vector = vsi->q_vectors[i];
 
-               coalesce[i].itr_tx = q_vector->tx.itr_setting;
-               coalesce[i].itr_rx = q_vector->rx.itr_setting;
+               coalesce[i].itr_tx = q_vector->tx.itr_settings;
+               coalesce[i].itr_rx = q_vector->rx.itr_settings;
                coalesce[i].intrl = q_vector->intrl;
 
                if (i < vsi->num_txq)
@@ -3094,21 +3100,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
                 */
                if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
                        rc = &vsi->q_vectors[i]->rx;
-                       rc->itr_setting = coalesce[i].itr_rx;
+                       rc->itr_settings = coalesce[i].itr_rx;
                        ice_write_itr(rc, rc->itr_setting);
                } else if (i < vsi->alloc_rxq) {
                        rc = &vsi->q_vectors[i]->rx;
-                       rc->itr_setting = coalesce[0].itr_rx;
+                       rc->itr_settings = coalesce[0].itr_rx;
                        ice_write_itr(rc, rc->itr_setting);
                }
 
                if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
                        rc = &vsi->q_vectors[i]->tx;
-                       rc->itr_setting = coalesce[i].itr_tx;
+                       rc->itr_settings = coalesce[i].itr_tx;
                        ice_write_itr(rc, rc->itr_setting);
                } else if (i < vsi->alloc_txq) {
                        rc = &vsi->q_vectors[i]->tx;
-                       rc->itr_setting = coalesce[0].itr_tx;
+                       rc->itr_settings = coalesce[0].itr_tx;
                        ice_write_itr(rc, rc->itr_setting);
                }
 
@@ -3122,12 +3128,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
        for (; i < vsi->num_q_vectors; i++) {
                /* transmit */
                rc = &vsi->q_vectors[i]->tx;
-               rc->itr_setting = coalesce[0].itr_tx;
+               rc->itr_settings = coalesce[0].itr_tx;
                ice_write_itr(rc, rc->itr_setting);
 
                /* receive */
                rc = &vsi->q_vectors[i]->rx;
-               rc->itr_setting = coalesce[0].itr_rx;
+               rc->itr_settings = coalesce[0].itr_rx;
                ice_write_itr(rc, rc->itr_setting);
 
                vsi->q_vectors[i]->intrl = coalesce[0].intrl;
index b588d79956310d872eedb4906a7ccabeca1d73ca..963a5f40e071b5bb68e15e357e500fc54948051d 100644 (file)
@@ -243,8 +243,7 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
 static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
 {
        return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
-              test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) ||
-              test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
+              test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 }
 
 /**
@@ -260,10 +259,15 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
        if (vsi->type != ICE_VSI_PF)
                return 0;
 
-       if (ice_vsi_has_non_zero_vlans(vsi))
-               status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m);
-       else
-               status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0);
+       if (ice_vsi_has_non_zero_vlans(vsi)) {
+               promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+               status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
+                                                      promisc_m);
+       } else {
+               status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                                 promisc_m, 0);
+       }
+
        return status;
 }
 
@@ -280,10 +284,15 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
        if (vsi->type != ICE_VSI_PF)
                return 0;
 
-       if (ice_vsi_has_non_zero_vlans(vsi))
-               status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m);
-       else
-               status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0);
+       if (ice_vsi_has_non_zero_vlans(vsi)) {
+               promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+               status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
+                                                        promisc_m);
+       } else {
+               status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                                   promisc_m, 0);
+       }
+
        return status;
 }
 
@@ -302,7 +311,6 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
        struct ice_pf *pf = vsi->back;
        struct ice_hw *hw = &pf->hw;
        u32 changed_flags = 0;
-       u8 promisc_m;
        int err;
 
        if (!vsi->netdev)
@@ -320,7 +328,6 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
        if (ice_vsi_fltr_changed(vsi)) {
                clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
                clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
-               clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 
                /* grab the netdev's addr_list_lock */
                netif_addr_lock_bh(netdev);
@@ -369,29 +376,15 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
        /* check for changes in promiscuous modes */
        if (changed_flags & IFF_ALLMULTI) {
                if (vsi->current_netdev_flags & IFF_ALLMULTI) {
-                       if (ice_vsi_has_non_zero_vlans(vsi))
-                               promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
-                       else
-                               promisc_m = ICE_MCAST_PROMISC_BITS;
-
-                       err = ice_set_promisc(vsi, promisc_m);
+                       err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
                        if (err) {
-                               netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n",
-                                          vsi->vsi_num);
                                vsi->current_netdev_flags &= ~IFF_ALLMULTI;
                                goto out_promisc;
                        }
                } else {
                        /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
-                       if (ice_vsi_has_non_zero_vlans(vsi))
-                               promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
-                       else
-                               promisc_m = ICE_MCAST_PROMISC_BITS;
-
-                       err = ice_clear_promisc(vsi, promisc_m);
+                       err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
                        if (err) {
-                               netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n",
-                                          vsi->vsi_num);
                                vsi->current_netdev_flags |= IFF_ALLMULTI;
                                goto out_promisc;
                        }
@@ -2517,6 +2510,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
                irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
        }
 
+       err = ice_set_cpu_rx_rmap(vsi);
+       if (err) {
+               netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n",
+                          vsi->vsi_num, ERR_PTR(err));
+               goto free_q_irqs;
+       }
+
        vsi->irqs_ready = true;
        return 0;
 
@@ -2569,7 +2569,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
                spin_lock_init(&xdp_ring->tx_lock);
                for (j = 0; j < xdp_ring->count; j++) {
                        tx_desc = ICE_TX_DESC(xdp_ring, j);
-                       tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE);
+                       tx_desc->cmd_type_offset_bsz = 0;
                }
        }
 
@@ -2765,8 +2765,10 @@ free_qmap:
 
        ice_for_each_xdp_txq(vsi, i)
                if (vsi->xdp_rings[i]) {
-                       if (vsi->xdp_rings[i]->desc)
+                       if (vsi->xdp_rings[i]->desc) {
+                               synchronize_rcu();
                                ice_free_tx_ring(vsi->xdp_rings[i]);
+                       }
                        kfree_rcu(vsi->xdp_rings[i], rcu);
                        vsi->xdp_rings[i] = NULL;
                }
@@ -3488,6 +3490,20 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
        if (!vid)
                return 0;
 
+       while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+               usleep_range(1000, 2000);
+
+       /* Add multicast promisc rule for the VLAN ID to be added if
+        * all-multicast is currently enabled.
+        */
+       if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+               ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                              ICE_MCAST_VLAN_PROMISC_BITS,
+                                              vid);
+               if (ret)
+                       goto finish;
+       }
+
        vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
 
        /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
@@ -3495,8 +3511,23 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
         */
        vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
        ret = vlan_ops->add_vlan(vsi, &vlan);
-       if (!ret)
-               set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
+       if (ret)
+               goto finish;
+
+       /* If all-multicast is currently enabled and this VLAN ID is only one
+        * besides VLAN-0 we have to update look-up type of multicast promisc
+        * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
+        */
+       if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
+           ice_vsi_num_non_zero_vlans(vsi) == 1) {
+               ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                          ICE_MCAST_PROMISC_BITS, 0);
+               ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                        ICE_MCAST_VLAN_PROMISC_BITS, 0);
+       }
+
+finish:
+       clear_bit(ICE_CFG_BUSY, vsi->state);
 
        return ret;
 }
@@ -3522,6 +3553,9 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
        if (!vid)
                return 0;
 
+       while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+               usleep_range(1000, 2000);
+
        vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
 
        /* Make sure VLAN delete is successful before updating VLAN
@@ -3530,10 +3564,33 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
        vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
        ret = vlan_ops->del_vlan(vsi, &vlan);
        if (ret)
-               return ret;
+               goto finish;
 
-       set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
-       return 0;
+       /* Remove multicast promisc rule for the removed VLAN ID if
+        * all-multicast is enabled.
+        */
+       if (vsi->current_netdev_flags & IFF_ALLMULTI)
+               ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                          ICE_MCAST_VLAN_PROMISC_BITS, vid);
+
+       if (!ice_vsi_has_non_zero_vlans(vsi)) {
+               /* Update look-up type of multicast promisc rule for VLAN 0
+                * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
+                * all-multicast is enabled and VLAN 0 is the only VLAN rule.
+                */
+               if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+                       ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                                  ICE_MCAST_VLAN_PROMISC_BITS,
+                                                  0);
+                       ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+                                                ICE_MCAST_PROMISC_BITS, 0);
+               }
+       }
+
+finish:
+       clear_bit(ICE_CFG_BUSY, vsi->state);
+
+       return ret;
 }
 
 /**
@@ -3642,20 +3699,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
         */
        ice_napi_add(vsi);
 
-       status = ice_set_cpu_rx_rmap(vsi);
-       if (status) {
-               dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n",
-                       vsi->vsi_num, status);
-               goto unroll_napi_add;
-       }
        status = ice_init_mac_fltr(pf);
        if (status)
-               goto free_cpu_rx_map;
+               goto unroll_napi_add;
 
        return 0;
 
-free_cpu_rx_map:
-       ice_free_cpu_rx_rmap(vsi);
 unroll_napi_add:
        ice_tc_indir_block_unregister(vsi);
 unroll_cfg_netdev:
@@ -3720,6 +3769,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf)
 static void ice_deinit_pf(struct ice_pf *pf)
 {
        ice_service_task_stop(pf);
+       mutex_destroy(&pf->adev_mutex);
        mutex_destroy(&pf->sw_mutex);
        mutex_destroy(&pf->tc_mutex);
        mutex_destroy(&pf->avail_q_mutex);
@@ -3798,6 +3848,7 @@ static int ice_init_pf(struct ice_pf *pf)
 
        mutex_init(&pf->sw_mutex);
        mutex_init(&pf->tc_mutex);
+       mutex_init(&pf->adev_mutex);
 
        INIT_HLIST_HEAD(&pf->aq_wait_list);
        spin_lock_init(&pf->aq_wait_lock);
@@ -5117,7 +5168,6 @@ static int __maybe_unused ice_suspend(struct device *dev)
                        continue;
                ice_vsi_free_q_vectors(pf->vsi[v]);
        }
-       ice_free_cpu_rx_rmap(ice_get_main_vsi(pf));
        ice_clear_interrupt_scheme(pf);
 
        pci_save_state(pdev);
@@ -5475,16 +5525,19 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
 
        /* Add filter for new MAC. If filter exists, return success */
        err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
-       if (err == -EEXIST)
+       if (err == -EEXIST) {
                /* Although this MAC filter is already present in hardware it's
                 * possible in some cases (e.g. bonding) that dev_addr was
                 * modified outside of the driver and needs to be restored back
                 * to this value.
                 */
                netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
-       else if (err)
+
+               return 0;
+       } else if (err) {
                /* error if the new filter addition failed */
                err = -EADDRNOTAVAIL;
+       }
 
 err_update_filters:
        if (err) {
@@ -6119,9 +6172,10 @@ static int ice_up_complete(struct ice_vsi *vsi)
                        ice_ptp_link_change(pf, pf->hw.pf_id, true);
        }
 
-       /* clear this now, and the first stats read will be used as baseline */
-       vsi->stat_offsets_loaded = false;
-
+       /* Perform an initial read of the statistics registers now to
+        * set the baseline so counters are ready when interface is up
+        */
+       ice_update_eth_stats(vsi);
        ice_service_task_schedule(pf);
 
        return 0;
@@ -6878,12 +6932,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
        dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
 
+#define ICE_EMP_RESET_SLEEP_MS 5000
        if (reset_type == ICE_RESET_EMPR) {
                /* If an EMP reset has occurred, any previously pending flash
                 * update will have completed. We no longer know whether or
                 * not the NVM update EMP reset is restricted.
                 */
                pf->fw_emp_reset_disabled = false;
+
+               msleep(ICE_EMP_RESET_SLEEP_MS);
        }
 
        err = ice_init_all_ctrlq(hw);
index 4eb0599714f4348d5e7b84314f5902d8a76543dd..13cdb5ea594d2f2e22650f397441ed7916e47952 100644 (file)
@@ -641,6 +641,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank,
        status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, 0,
                                       orom_data, hw->flash.banks.orom_size);
        if (status) {
+               vfree(orom_data);
                ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM data\n");
                return status;
        }
index a1cd33273ca49e1fbba6159cf2b127b55c224338..662947c882e8b1c84439eec7310a55c4893395ea 100644 (file)
@@ -500,12 +500,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
  * This function must be called periodically to ensure that the cached value
  * is never more than 2 seconds old. It must also be called whenever the PHC
  * time has been changed.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
  */
-static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
 {
        u64 systime;
        int i;
 
+       if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+               return -EAGAIN;
+
        /* Read the current PHC time */
        systime = ice_ptp_read_src_clk_reg(pf, NULL);
 
@@ -528,6 +535,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
                        WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
                }
        }
+       clear_bit(ICE_CFG_BUSY, pf->state);
+
+       return 0;
 }
 
 /**
@@ -2287,6 +2297,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
 
 /**
  * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @hw: pointer to the hw struct
  * @tx: PTP Tx tracker to clean up
  *
  * Loop through the Tx timestamp requests and see if any of them have been
@@ -2295,7 +2306,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
  * timestamp will never be captured. This might happen if the packet gets
  * discarded before it reaches the PHY timestamping block.
  */
-static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
+static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx)
 {
        u8 idx;
 
@@ -2304,11 +2315,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
 
        for_each_set_bit(idx, tx->in_use, tx->len) {
                struct sk_buff *skb;
+               u64 raw_tstamp;
 
                /* Check if this SKB has been waiting for too long */
                if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
                        continue;
 
+               /* Read tstamp to be able to use this register again */
+               ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
+                                   &raw_tstamp);
+
                spin_lock(&tx->lock);
                skb = tx->tstamps[idx].skb;
                tx->tstamps[idx].skb = NULL;
@@ -2324,17 +2340,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
 {
        struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
        struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+       int err;
 
        if (!test_bit(ICE_FLAG_PTP, pf->flags))
                return;
 
-       ice_ptp_update_cached_phctime(pf);
+       err = ice_ptp_update_cached_phctime(pf);
 
-       ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx);
+       ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx);
 
-       /* Run twice a second */
+       /* Run twice a second or reschedule if phc update failed */
        kthread_queue_delayed_work(ptp->kworker, &ptp->work,
-                                  msecs_to_jiffies(500));
+                                  msecs_to_jiffies(err ? 10 : 500));
 }
 
 /**
index 8915a9d39e36264bb5e70a9581eec357b6b11245..0c438219f7a39856623a00d116d5cdec351fbda2 100644 (file)
@@ -1046,8 +1046,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
        if (!num_vfs) {
                if (!pci_vfs_assigned(pdev)) {
-                       ice_mbx_deinit_snapshot(&pf->hw);
                        ice_free_vfs(pf);
+                       ice_mbx_deinit_snapshot(&pf->hw);
                        if (pf->lag)
                                ice_enable_lag(pf->lag);
                        return 0;
index cead3eb149bd5eb4b7bc21a6ca748fb0cbc9caf8..ffb3f6a589da4ce201907521a2b8eb792375387d 100644 (file)
@@ -384,9 +384,14 @@ struct ice_ring_container {
        /* this matches the maximum number of ITR bits, but in usec
         * values, so it is shifted left one bit (bit zero is ignored)
         */
-       u16 itr_setting:13;
-       u16 itr_reserved:2;
-       u16 itr_mode:1;
+       union {
+               struct {
+                       u16 itr_setting:13;
+                       u16 itr_reserved:2;
+                       u16 itr_mode:1;
+               };
+               u16 itr_settings;
+       };
        enum ice_container_type type;
 };
 
index 3f1a63815bac97601cb8d47bf81da546b0946def..2889e050a4c9384e0e0a1f814c25a1808ab23d6b 100644 (file)
@@ -1307,13 +1307,52 @@ error_param:
                                     NULL, 0);
 }
 
+/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+static int
+ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+       struct ice_txq_meta txq_meta = { 0 };
+       struct ice_tx_ring *ring;
+       int err;
+
+       if (!test_bit(q_id, vf->txq_ena))
+               dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+                       q_id, vsi->vsi_num);
+
+       ring = vsi->tx_rings[q_id];
+       if (!ring)
+               return -EINVAL;
+
+       ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+       err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+       if (err) {
+               dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+                       q_id, vsi->vsi_num);
+               return err;
+       }
+
+       /* Clear enabled queues flag */
+       clear_bit(q_id, vf->txq_ena);
+
+       return 0;
+}
+
 /**
  * ice_vc_dis_qs_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
  *
- * called from the VF to disable all or specific
- * queue(s)
+ * called from the VF to disable all or specific queue(s)
  */
 static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 {
@@ -1350,30 +1389,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
                q_map = vqs->tx_queues;
 
                for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
-                       struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id];
-                       struct ice_txq_meta txq_meta = { 0 };
-
                        if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
                        }
 
-                       /* Skip queue if not enabled */
-                       if (!test_bit(vf_q_id, vf->txq_ena))
-                               continue;
-
-                       ice_fill_txq_meta(vsi, ring, &txq_meta);
-
-                       if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
-                                                ring, &txq_meta)) {
-                               dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
-                                       vf_q_id, vsi->vsi_num);
+                       if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
                        }
-
-                       /* Clear enabled queues flag */
-                       clear_bit(vf_q_id, vf->txq_ena);
                }
        }
 
@@ -1622,6 +1646,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
                if (qpi->txq.ring_len > 0) {
                        vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
                        vsi->tx_rings[i]->count = qpi->txq.ring_len;
+
+                       /* Disable any existing queue first */
+                       if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
+                               v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+                               goto error_param;
+                       }
+
+                       /* Configure a queue with the requested settings */
                        if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
@@ -3625,6 +3657,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
                return;
        }
 
+       mutex_lock(&vf->cfg_lock);
+
        /* Check if VF is disabled. */
        if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
                err = -EPERM;
@@ -3642,32 +3676,20 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
                        err = -EINVAL;
        }
 
-       if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
-               ice_vc_send_msg_to_vf(vf, v_opcode,
-                                     VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
-                                     0);
-               ice_put_vf(vf);
-               return;
-       }
-
 error_handler:
        if (err) {
                ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
                                      NULL, 0);
                dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
                        vf_id, v_opcode, msglen, err);
-               ice_put_vf(vf);
-               return;
+               goto finish;
        }
 
-       /* VF is being configured in another context that triggers a VFR, so no
-        * need to process this message
-        */
-       if (!mutex_trylock(&vf->cfg_lock)) {
-               dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
-                        vf->vf_id);
-               ice_put_vf(vf);
-               return;
+       if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
+               ice_vc_send_msg_to_vf(vf, v_opcode,
+                                     VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
+                                     0);
+               goto finish;
        }
 
        switch (v_opcode) {
@@ -3780,6 +3802,7 @@ error_handler:
                         vf_id, v_opcode, err);
        }
 
+finish:
        mutex_unlock(&vf->cfg_lock);
        ice_put_vf(vf);
 }
index dfbcaf08520eeda8df9d4357e23d55cf7b6dc1be..9dd38f66705946f507fd2db2d70af209abf86930 100644 (file)
@@ -41,8 +41,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
 {
        ice_clean_tx_ring(vsi->tx_rings[q_idx]);
-       if (ice_is_xdp_ena_vsi(vsi))
+       if (ice_is_xdp_ena_vsi(vsi)) {
+               synchronize_rcu();
                ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+       }
        ice_clean_rx_ring(vsi->rx_rings[q_idx]);
 }
 
@@ -413,8 +415,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
  */
 static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
 {
+       u32 nb_buffs_extra = 0, nb_buffs = 0;
        union ice_32b_rx_flex_desc *rx_desc;
-       u32 nb_buffs_extra = 0, nb_buffs;
        u16 ntu = rx_ring->next_to_use;
        u16 total_count = count;
        struct xdp_buff **xdp;
@@ -426,6 +428,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
                nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
                                                   rx_desc,
                                                   rx_ring->count - ntu);
+               if (nb_buffs_extra != rx_ring->count - ntu) {
+                       ntu += nb_buffs_extra;
+                       goto exit;
+               }
                rx_desc = ICE_RX_DESC(rx_ring, 0);
                xdp = ice_xdp_buf(rx_ring, 0);
                ntu = 0;
@@ -439,6 +445,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
        if (ntu == rx_ring->count)
                ntu = 0;
 
+exit:
        if (rx_ring->next_to_use != ntu)
                ice_release_rx_desc(rx_ring, ntu);
 
@@ -918,7 +925,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
        struct ice_vsi *vsi = np->vsi;
        struct ice_tx_ring *ring;
 
-       if (test_bit(ICE_DOWN, vsi->state))
+       if (test_bit(ICE_VSI_DOWN, vsi->state))
                return -ENETDOWN;
 
        if (!ice_is_xdp_ena_vsi(vsi))
index 34b33b21e0dcdc507827a4ce4524865cf4fd6232..68be2976f539f723f3909e3f7f8a3ce7a0fa929b 100644 (file)
@@ -5505,7 +5505,8 @@ static void igb_watchdog_task(struct work_struct *work)
                                break;
                        }
 
-                       if (adapter->link_speed != SPEED_1000)
+                       if (adapter->link_speed != SPEED_1000 ||
+                           !hw->phy.ops.read_reg)
                                goto no_wait;
 
                        /* wait for Remote receiver status OK */
index 66ea566488d12bc7d5245e2d2dab959c9b3d3ab5..59d5c467ea6e356abecd381fe3420beac0521f5d 100644 (file)
@@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
 {
        u32 swfw_sync;
 
-       while (igc_get_hw_semaphore_i225(hw))
-               ; /* Empty */
+       /* Releasing the resource requires first getting the HW semaphore.
+        * If we fail to get the semaphore, there is nothing we can do,
+        * except log an error and quit. We are not allowed to hang here
+        * indefinitely, as it may cause denial of service or system crash.
+        */
+       if (igc_get_hw_semaphore_i225(hw)) {
+               hw_dbg("Failed to release SW_FW_SYNC.\n");
+               return;
+       }
 
        swfw_sync = rd32(IGC_SW_FW_SYNC);
        swfw_sync &= ~mask;
index 40dbf4b43234546634a3f8f04243fc651ea1f2e7..6961f65d36b9a41793b028b8e580507f31818cf2 100644 (file)
@@ -581,7 +581,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
         * the lower time out
         */
        for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
-               usleep_range(500, 1000);
+               udelay(50);
                mdic = rd32(IGC_MDIC);
                if (mdic & IGC_MDIC_READY)
                        break;
@@ -638,7 +638,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
         * the lower time out
         */
        for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
-               usleep_range(500, 1000);
+               udelay(50);
                mdic = rd32(IGC_MDIC);
                if (mdic & IGC_MDIC_READY)
                        break;
index 0d6e3215e98f57571e78d1a14541c0764c29c523..653e9f1e35b5c7023d26a78a021cab81a078319f 100644 (file)
@@ -992,6 +992,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter)
        igc_ptp_write_i225(adapter, &ts);
 }
 
+static void igc_ptm_stop(struct igc_adapter *adapter)
+{
+       struct igc_hw *hw = &adapter->hw;
+       u32 ctrl;
+
+       ctrl = rd32(IGC_PTM_CTRL);
+       ctrl &= ~IGC_PTM_CTRL_EN;
+
+       wr32(IGC_PTM_CTRL, ctrl);
+}
+
 /**
  * igc_ptp_suspend - Disable PTP work items and prepare for suspend
  * @adapter: Board private structure
@@ -1009,8 +1020,10 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
        adapter->ptp_tx_skb = NULL;
        clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
 
-       if (pci_device_is_present(adapter->pdev))
+       if (pci_device_is_present(adapter->pdev)) {
                igc_ptp_time_save(adapter);
+               igc_ptm_stop(adapter);
+       }
 }
 
 /**
index e596e1a9fc75788213bb24d620ce58d920af1643..69d11ff7677d6fc78641cc025934146cb3be50b2 100644 (file)
@@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
        /* Tx IPsec offload doesn't seem to work on this
         * device, so block these requests for now.
         */
-       if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
+       sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6;
+       if (sam->flags != XFRM_OFFLOAD_INBOUND) {
                err = -EOPNOTSUPP;
                goto err_out;
        }
index 5f9ab1842d4933b3be290fbac8093bd4bbb77f37..c188014906497ee088c28c5c52a7bfa5464583d1 100644 (file)
@@ -2751,7 +2751,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
        }
 
        ret = of_get_mac_address(pnp, ppd.mac_addr);
-       if (ret)
+       if (ret == -EPROBE_DEFER)
                return ret;
 
        mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size);
index 3ad10c793308e6ee9c5a49a3bd6bb943c1bcd9b1..66298e2235c912de5556634819f67099329e62a8 100644 (file)
@@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
        static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
        int i, k;
 
-       memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+       memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
 
        if (!IS_ENABLED(CONFIG_SOC_MT7621))
                return;
index 32d83421226a2c9c81d9d93269e5f2c4c5653552..5897940a418b6f4f1eb7e3519e66acf4a97e69a3 100644 (file)
@@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
                        break;
 
                ss->regmap[i] = syscon_node_to_regmap(np);
+               of_node_put(np);
                if (IS_ERR(ss->regmap[i]))
                        return PTR_ERR(ss->regmap[i]);
        }
index 538adab6878b5b087ce74b34eb9b0136b79eb1db..c5b560a8b026edebf029961dbe6332353a5ba7b8 100644 (file)
@@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
 struct mlx5_rsc_dump {
        u32 pdn;
        u32 mkey;
+       u32 number_of_menu_items;
        u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
 };
 
@@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
        return -EINVAL;
 }
 
-static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page)
+#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
+                                       MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
+                                       MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
+
+static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
+                                       int read_size, int start_idx)
 {
        void *data = page_address(page);
        enum mlx5_sgmt_type sgmt_idx;
        int num_of_items;
        char *sgmt_name;
        void *member;
+       int size = 0;
        void *menu;
        int i;
 
-       menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
-       num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records);
+       if (!start_idx) {
+               menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+               rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
+                                                         num_of_records);
+               size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
+               data += size;
+       }
+       num_of_items = rsc_dump->number_of_menu_items;
+
+       for (i = 0; start_idx + i < num_of_items; i++) {
+               size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
+               if (size >= read_size)
+                       return start_idx + i;
 
-       for (i = 0; i < num_of_items; i++) {
-               member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]);
+               member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
                sgmt_name =  MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
                sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
                if (sgmt_idx == -EINVAL)
@@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct
                rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
                                                               member, segment_type);
        }
+       return 0;
 }
 
 static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
@@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
        struct mlx5_rsc_dump_cmd *cmd = NULL;
        struct mlx5_rsc_key key = {};
        struct page *page;
+       int start_idx = 0;
        int size;
        int err;
 
@@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
                if (err < 0)
                        goto destroy_cmd;
 
-               mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page);
+               start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
 
        } while (err > 0);
 
index 673f1c82d38155b4c71fa7cbae95f8c5bdbafae8..c9d5d8d93994d3ba38692b038680eebeb4232ab7 100644 (file)
@@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
                if (err)
                        return err;
 
-               err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz,
-                                         xoff, &port_buffer, &update_buffer);
+               err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+                                         port_buff_cell_sz, &port_buffer, &update_buffer);
                if (err)
                        return err;
        }
index af37a8d247a16947133c084a25d10c83dfe74fde..2755c25ba324adfe726e4c5ab83718de964643b0 100644 (file)
@@ -145,8 +145,7 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
 
        flow_action_for_each(i, act, flow_action) {
                tc_act = mlx5e_tc_act_get(act->id, ns_type);
-               if (!tc_act || !tc_act->post_parse ||
-                   !tc_act->can_offload(parse_state, act, i, attr))
+               if (!tc_act || !tc_act->post_parse)
                        continue;
 
                err = tc_act->post_parse(parse_state, priv, attr);
index b9d38fe807df51ff63b6aa86506c236e6dc60f94..a829c94289c10fb95587cc45bcb6903e4e3d5465 100644 (file)
@@ -45,12 +45,41 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
        if (mlx5e_is_eswitch_flow(parse_state->flow))
                attr->esw_attr->split_count = attr->esw_attr->out_count;
 
-       if (!clear_action) {
+       if (clear_action) {
+               parse_state->ct_clear = true;
+       } else {
                attr->flags |= MLX5_ATTR_FLAG_CT;
                flow_flag_set(parse_state->flow, CT);
                parse_state->ct = true;
        }
-       parse_state->ct_clear = clear_action;
+
+       return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+                    struct mlx5e_priv *priv,
+                    struct mlx5_flow_attr *attr)
+{
+       struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+       int err;
+
+       /* If ct action exist, we can ignore previous ct_clear actions */
+       if (parse_state->ct)
+               return 0;
+
+       if (parse_state->ct_clear) {
+               err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(parse_state->extack,
+                                          "Failed to set registers for ct clear");
+                       return err;
+               }
+               attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+               /* Prevent handling of additional, redundant clear actions */
+               parse_state->ct_clear = false;
+       }
 
        return 0;
 }
@@ -70,5 +99,6 @@ struct mlx5e_tc_act mlx5e_tc_act_ct = {
        .can_offload = tc_act_can_offload_ct,
        .parse_action = tc_act_parse_ct,
        .is_multi_table_act = tc_act_is_multi_table_act_ct,
+       .post_parse = tc_act_post_parse_ct,
 };
 
index 59988e24b70410ef013fe58f621cdc074d0a9900..bec9ed0103a939d23dc57666ea40cd80e57ce3ad 100644 (file)
@@ -23,7 +23,7 @@ struct mlx5_ct_fs_smfs_matcher {
 };
 
 struct mlx5_ct_fs_smfs_matchers {
-       struct mlx5_ct_fs_smfs_matcher smfs_matchers[4];
+       struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
        struct list_head used;
 };
 
@@ -44,7 +44,8 @@ struct mlx5_ct_fs_smfs_rule {
 };
 
 static inline void
-mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+                         bool gre)
 {
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 
@@ -77,7 +78,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
                         ntohs(MLX5_CT_TCP_FLAGS_MASK));
-       } else {
+       } else if (!gre) {
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
        }
@@ -87,7 +88,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
 
 static struct mlx5dr_matcher *
 mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
-                              bool tcp, u32 priority)
+                              bool tcp, bool gre, u32 priority)
 {
        struct mlx5dr_matcher *dr_matcher;
        struct mlx5_flow_spec *spec;
@@ -96,7 +97,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
        if (!spec)
                return ERR_PTR(-ENOMEM);
 
-       mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp);
+       mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
 
        dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
@@ -108,7 +109,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
 }
 
 static struct mlx5_ct_fs_smfs_matcher *
-mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
 {
        struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
        struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
@@ -119,7 +120,7 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
        int prio;
 
        matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
-       smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp];
+       smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
 
        if (refcount_inc_not_zero(&smfs_matcher->ref))
                return smfs_matcher;
@@ -145,11 +146,11 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
        }
 
        tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
-       dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio);
+       dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
        if (IS_ERR(dr_matcher)) {
                netdev_warn(fs->netdev,
-                           "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n",
-                           nat, ipv4, tcp, PTR_ERR(dr_matcher));
+                           "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+                           nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
 
                smfs_matcher = ERR_CAST(dr_matcher);
                goto out_unlock;
@@ -222,16 +223,17 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
 static inline bool
 mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
 {
-#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
-       const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) |
-                              DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META);
-       const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP);
-       const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS);
-       const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP);
-       const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS);
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+       const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+       const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+       const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+       const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+       const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+       const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+       const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
 
        return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
-               used_keys == ipv6_udp);
+               used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
 }
 
 static bool
@@ -254,20 +256,24 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f
        flow_rule_match_control(flow_rule, &control);
        flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
        flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
-       flow_rule_match_ports(flow_rule, &ports);
-       flow_rule_match_tcp(flow_rule, &tcp);
+       if (basic.key->ip_proto != IPPROTO_GRE)
+               flow_rule_match_ports(flow_rule, &ports);
+       if (basic.key->ip_proto == IPPROTO_TCP)
+               flow_rule_match_tcp(flow_rule, &tcp);
 
        if (basic.mask->n_proto != htons(0xFFFF) ||
            (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
            basic.mask->ip_proto != 0xFF ||
-           (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) {
+           (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+            basic.key->ip_proto != IPPROTO_GRE)) {
                ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
                       ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
                       basic.key->ip_proto, basic.mask->ip_proto);
                return false;
        }
 
-       if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) {
+       if (basic.key->ip_proto != IPPROTO_GRE &&
+           (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
                ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
                       ports.mask->src, ports.mask->dst);
                return false;
@@ -291,7 +297,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
        struct mlx5dr_action *actions[5];
        struct mlx5dr_rule *rule;
        int num_actions = 0, err;
-       bool nat, tcp, ipv4;
+       bool nat, tcp, ipv4, gre;
 
        if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
                return ERR_PTR(-EOPNOTSUPP);
@@ -314,15 +320,17 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
        ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
        tcp = MLX5_GET(fte_match_param, spec->match_value,
                       outer_headers.ip_protocol) == IPPROTO_TCP;
+       gre = MLX5_GET(fte_match_param, spec->match_value,
+                      outer_headers.ip_protocol) == IPPROTO_GRE;
 
-       smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp);
+       smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
        if (IS_ERR(smfs_matcher)) {
                err = PTR_ERR(smfs_matcher);
                goto err_matcher;
        }
 
        rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
-                                    MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT);
+                                    spec->flow_context.flow_source);
        if (!rule) {
                err = -EINVAL;
                goto err_create;
index e49f51124c749853c79514f9108e301abf4c205b..ab4b0f3ee2a0a1cc1ec69c86181e24410b41c1b8 100644 (file)
@@ -582,6 +582,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
        return 0;
 }
 
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+                                struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+               return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
 static int
 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
                                   char *modact)
@@ -1410,9 +1416,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
                        const struct flow_action_entry *act,
                        struct netlink_ext_ack *extack)
 {
-       bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
-       int err;
-
        if (!priv) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "offload of ct action isn't available");
@@ -1423,17 +1426,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
        attr->ct_attr.ct_action = act->ct.action;
        attr->ct_attr.nf_ft = act->ct.flow_table;
 
-       if (!clear_action)
-               goto out;
-
-       err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
-       if (err) {
-               NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
-               return err;
-       }
-       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
-out:
        return 0;
 }
 
@@ -1749,6 +1741,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
 static void
 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
 {
+       struct mlx5e_priv *priv;
+
        if (!refcount_dec_and_test(&ft->refcount))
                return;
 
@@ -1758,6 +1752,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
        rhashtable_free_and_destroy(&ft->ct_entries_ht,
                                    mlx5_tc_ct_flush_ft_entry,
                                    ct_priv);
+       priv = netdev_priv(ct_priv->netdev);
+       flush_workqueue(priv->wq);
        mlx5_tc_ct_free_pre_ct_tables(ft);
        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
        kfree(ft);
index 36d3652bf82973f44c9dba4b870c6aa2e83abcae..00a3ba862afb7cba79e482050c31b6db7d9e9d44 100644 (file)
@@ -129,6 +129,10 @@ bool
 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
                         struct sk_buff *skb, u8 zone_restore_id);
 
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+                            struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
 #else /* CONFIG_MLX5_TC_CT */
 
 static inline struct mlx5_tc_ct_priv *
@@ -170,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
        return 0;
 }
 
+static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+                            struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
                        struct mlx5_flow_attr *attr,
index 378fc8e3bd9753daa90ed2d4015349b07387909c..d87bbb0be7c86fa5f11ba87c3b079b701ffc5b03 100644 (file)
@@ -713,6 +713,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
                              struct net_device *filter_dev)
 {
        struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_tc_int_port *int_port;
        TC_TUN_ROUTE_ATTR_INIT(attr);
        u16 vport_num;
@@ -747,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
                esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
                                                      misc_parameters.vxlan_vni);
                esw_attr->rx_tun_attr->decap_vport = vport_num;
-       } else if (netif_is_ovs_master(attr.route_dev)) {
+       } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
                int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
                                                 attr.route_dev->ifindex,
                                                 MLX5E_TC_INT_PORT_INGRESS);
index a55b066746cb763dddd2b4577b47342856ea0319..857840ab1e91885d7a22810fc9b86505663f0299 100644 (file)
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
        bool busy = false;
        int work_done = 0;
 
+       rcu_read_lock();
+
        ch_stats->poll++;
 
        work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
        busy |= work_done == budget;
        busy |= rq->post_wqes(rq);
 
-       if (busy)
-               return budget;
+       if (busy) {
+               work_done = budget;
+               goto out;
+       }
 
        if (unlikely(!napi_complete_done(napi, work_done)))
-               return work_done;
+               goto out;
 
        mlx5e_cq_arm(&rq->cq);
+
+out:
+       rcu_read_unlock();
        return work_done;
 }
 
index d659fe07d46452b563fc0fdc2ac59821fb04e495..8ead2c82a52aaa41706fb26f3b3b8727d9db8d6a 100644 (file)
@@ -1200,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
                return err;
        WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
 
+       if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
+               /*
+                * Align the driver state with the register state.
+                * Temporary state change is required to enable the app list reset.
+                */
+               priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
+               mlx5e_dcbnl_delete_app(priv);
+               priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+       }
+
        mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
                                                   priv->dcbx_dp.trust_state);
 
index 2f1dedc721d1e8b0344c6981227fbdc095340786..fa229998606c2ac49111257922fffea11191c250 100644 (file)
@@ -3864,6 +3864,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
        if (netdev->features & NETIF_F_NTUPLE)
                netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
 
+       features &= ~NETIF_F_GRO_HW;
+       if (netdev->features & NETIF_F_GRO_HW)
+               netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
        return features;
 }
 
@@ -3896,6 +3900,25 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                }
        }
 
+       if (params->xdp_prog) {
+               if (features & NETIF_F_LRO) {
+                       netdev_warn(netdev, "LRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_LRO;
+               }
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+
+       if (priv->xsk.refcnt) {
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+                                   priv->xsk.refcnt);
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+
        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
                features &= ~NETIF_F_RXHASH;
                if (netdev->features & NETIF_F_RXHASH)
@@ -4850,10 +4873,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-       if (!!MLX5_CAP_GEN(mdev, shampo) &&
-           mlx5e_check_fragmented_striding_rq_cap(mdev))
-               netdev->hw_features    |= NETIF_F_GRO_HW;
-
        if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
                netdev->hw_enc_features |= NETIF_F_TSO;
index e3fc15ae7bb106a055dad73466790accb04a6798..ac0f73074f7ab1d53de229a2952939c8e2b77995 100644 (file)
@@ -2459,6 +2459,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                 match.key->vlan_priority);
 
                        *match_level = MLX5_MATCH_L2;
+
+                       if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
+                           match.mask->vlan_eth_type &&
+                           MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
+                                                   ft_field_support.outer_second_vid,
+                                                   fs_type)) {
+                               MLX5_SET(fte_match_set_misc, misc_c,
+                                        outer_second_cvlan_tag, 1);
+                               spec->match_criteria_enable |=
+                                       MLX5_MATCH_MISC_PARAMETERS;
+                       }
                }
        } else if (*match_level != MLX5_MATCH_NONE) {
                /* cvlan_tag enabled in match criteria and
index 3f63df127091268599c6469a83914ad0ebaec68f..3b151332e2f8944b026fa99602d3c87ccf832958 100644 (file)
@@ -139,7 +139,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
                if (mlx5_esw_indir_table_decap_vport(attr))
                        vport = mlx5_esw_indir_table_decap_vport(attr);
 
-               if (esw_attr->int_port)
+               if (attr && !attr->chain && esw_attr->int_port)
                        metadata =
                                mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
                else
index 816d991f762106496ae504765dafa39d8eda4fd8..3ad67e6b5586d143bae5773fdd1f2467f3a58b22 100644 (file)
@@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
        clean_tree(&root_ns->ns.node);
 }
 
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_flow_steering *steering = dev->priv.steering;
-
-       cleanup_root_ns(steering->root_ns);
-       cleanup_root_ns(steering->fdb_root_ns);
-       steering->fdb_root_ns = NULL;
-       kfree(steering->fdb_sub_ns);
-       steering->fdb_sub_ns = NULL;
-       cleanup_root_ns(steering->port_sel_root_ns);
-       cleanup_root_ns(steering->sniffer_rx_root_ns);
-       cleanup_root_ns(steering->sniffer_tx_root_ns);
-       cleanup_root_ns(steering->rdma_rx_root_ns);
-       cleanup_root_ns(steering->rdma_tx_root_ns);
-       cleanup_root_ns(steering->egress_root_ns);
-       mlx5_cleanup_fc_stats(dev);
-       kmem_cache_destroy(steering->ftes_cache);
-       kmem_cache_destroy(steering->fgs_cache);
-       mlx5_ft_pool_destroy(dev);
-       kfree(steering);
-}
-
 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
 {
        struct fs_prio *prio;
@@ -3086,42 +3064,27 @@ cleanup:
        return err;
 }
 
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
 {
-       struct mlx5_flow_steering *steering;
-       int err = 0;
-
-       err = mlx5_init_fc_stats(dev);
-       if (err)
-               return err;
-
-       err = mlx5_ft_pool_init(dev);
-       if (err)
-               return err;
-
-       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
-       if (!steering) {
-               err = -ENOMEM;
-               goto err;
-       }
-
-       steering->dev = dev;
-       dev->priv.steering = steering;
+       struct mlx5_flow_steering *steering = dev->priv.steering;
 
-       if (mlx5_fs_dr_is_supported(dev))
-               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
-       else
-               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+       cleanup_root_ns(steering->root_ns);
+       cleanup_root_ns(steering->fdb_root_ns);
+       steering->fdb_root_ns = NULL;
+       kfree(steering->fdb_sub_ns);
+       steering->fdb_sub_ns = NULL;
+       cleanup_root_ns(steering->port_sel_root_ns);
+       cleanup_root_ns(steering->sniffer_rx_root_ns);
+       cleanup_root_ns(steering->sniffer_tx_root_ns);
+       cleanup_root_ns(steering->rdma_rx_root_ns);
+       cleanup_root_ns(steering->rdma_tx_root_ns);
+       cleanup_root_ns(steering->egress_root_ns);
+}
 
-       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
-                                               sizeof(struct mlx5_flow_group), 0,
-                                               0, NULL);
-       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
-                                                0, NULL);
-       if (!steering->ftes_cache || !steering->fgs_cache) {
-               err = -ENOMEM;
-               goto err;
-       }
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+       int err = 0;
 
        if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
              (MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
        }
 
        return 0;
+
+err:
+       mlx5_fs_core_cleanup(dev);
+       return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering = dev->priv.steering;
+
+       kmem_cache_destroy(steering->ftes_cache);
+       kmem_cache_destroy(steering->fgs_cache);
+       kfree(steering);
+       mlx5_ft_pool_destroy(dev);
+       mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+       struct mlx5_flow_steering *steering;
+       int err = 0;
+
+       err = mlx5_init_fc_stats(dev);
+       if (err)
+               return err;
+
+       err = mlx5_ft_pool_init(dev);
+       if (err)
+               goto err;
+
+       steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+       if (!steering) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       steering->dev = dev;
+       dev->priv.steering = steering;
+
+       if (mlx5_fs_dr_is_supported(dev))
+               steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+       else
+               steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+       steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+                                               sizeof(struct mlx5_flow_group), 0,
+                                               0, NULL);
+       steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+                                                0, NULL);
+       if (!steering->ftes_cache || !steering->fgs_cache) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       return 0;
+
 err:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_free(dev);
        return err;
 }
 
index c488a7c5b07e9f6e7ea3570061397ec50fe9250f..3f20523e514fd0aee50ac3079ea3bcf70bb2bf5e 100644 (file)
@@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
                                 enum mlx5_flow_steering_mode mode);
 
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
index 4aa22dce9b77ff62c4d4c456d92800f4d12e2f7f..81eb67fb95b04a1157dbe9765154907f4726f0aa 100644 (file)
@@ -8,7 +8,8 @@
 enum {
        MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
        MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
-       MLX5_FW_RESET_FLAGS_PENDING_COMP
+       MLX5_FW_RESET_FLAGS_PENDING_COMP,
+       MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
 };
 
 struct mlx5_fw_reset {
@@ -155,6 +156,28 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
        }
 }
 
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+       del_timer_sync(&fw_reset->timer);
+}
+
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+       if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+               mlx5_core_warn(dev, "Reset request was already cleared\n");
+               return -EALREADY;
+       }
+
+       mlx5_stop_sync_reset_poll(dev);
+       if (poll_health)
+               mlx5_start_health_poll(dev);
+       return 0;
+}
+
 static void mlx5_sync_reset_reload_work(struct work_struct *work)
 {
        struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -162,6 +185,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
        struct mlx5_core_dev *dev = fw_reset->dev;
        int err;
 
+       mlx5_sync_reset_clear_reset_requested(dev, false);
        mlx5_enter_error_state(dev, true);
        mlx5_unload_one(dev);
        err = mlx5_health_wait_pci_up(dev);
@@ -171,23 +195,6 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
        mlx5_fw_reset_complete_reload(dev);
 }
 
-static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
-{
-       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
-
-       del_timer_sync(&fw_reset->timer);
-}
-
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
-{
-       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
-
-       mlx5_stop_sync_reset_poll(dev);
-       clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
-       if (poll_health)
-               mlx5_start_health_poll(dev);
-}
-
 #define MLX5_RESET_POLL_INTERVAL       (HZ / 10)
 static void poll_sync_reset(struct timer_list *t)
 {
@@ -202,8 +209,10 @@ static void poll_sync_reset(struct timer_list *t)
 
        if (fatal_error) {
                mlx5_core_warn(dev, "Got Device Reset\n");
-               mlx5_sync_reset_clear_reset_requested(dev, false);
-               queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+               if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+                       queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+               else
+                       mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
                return;
        }
 
@@ -229,13 +238,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
        return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
 }
 
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
 {
        struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
 
+       if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+               mlx5_core_warn(dev, "Reset request was already set\n");
+               return -EALREADY;
+       }
        mlx5_stop_health_poll(dev, true);
-       set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
        mlx5_start_sync_reset_poll(dev);
+       return 0;
 }
 
 static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -264,7 +277,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
                               err ? "Failed" : "Sent");
                return;
        }
-       mlx5_sync_reset_set_reset_requested(dev);
+       if (mlx5_sync_reset_set_reset_requested(dev))
+               return;
+
        err = mlx5_fw_reset_set_reset_sync_ack(dev);
        if (err)
                mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -362,7 +377,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
        struct mlx5_core_dev *dev = fw_reset->dev;
        int err;
 
-       mlx5_sync_reset_clear_reset_requested(dev, false);
+       if (mlx5_sync_reset_clear_reset_requested(dev, false))
+               return;
 
        mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
 
@@ -391,10 +407,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
                                                      reset_abort_work);
        struct mlx5_core_dev *dev = fw_reset->dev;
 
-       if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+       if (mlx5_sync_reset_clear_reset_requested(dev, true))
                return;
-
-       mlx5_sync_reset_clear_reset_requested(dev, true);
        mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
 }
 
@@ -423,9 +437,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
        struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
        struct mlx5_eqe *eqe = data;
 
+       if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+               return NOTIFY_DONE;
+
        switch (eqe->sub_type) {
        case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
-                       queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+               queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
                break;
        case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
                mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -469,6 +486,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
        mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
 }
 
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+       set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+       cancel_work_sync(&fw_reset->fw_live_patch_work);
+       cancel_work_sync(&fw_reset->reset_request_work);
+       cancel_work_sync(&fw_reset->reset_reload_work);
+       cancel_work_sync(&fw_reset->reset_now_work);
+       cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
 {
        struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
index 694fc7cb268457e460374c7d014fc530f524b549..dc141c7e641a307b9579c1509a650538c7371c06 100644 (file)
@@ -16,6 +16,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
 int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
 void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
 
index 4a6ec15ef0460231d9d6f4ad18558235b0a191aa..d6c3e6dfd71fc0576f71f88158047b1358f33c7f 100644 (file)
@@ -100,6 +100,14 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
        flush_workqueue(mp->wq);
 }
 
+static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+{
+       mp->fib.mfi = fi;
+       mp->fib.priority = fi->fib_priority;
+       mp->fib.dst = dst;
+       mp->fib.dst_len = dst_len;
+}
+
 struct mlx5_fib_event_work {
        struct work_struct work;
        struct mlx5_lag *ldev;
@@ -110,10 +118,10 @@ struct mlx5_fib_event_work {
        };
 };
 
-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
-                                    unsigned long event,
-                                    struct fib_info *fi)
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
+                                    struct fib_entry_notifier_info *fen_info)
 {
+       struct fib_info *fi = fen_info->fi;
        struct lag_mp *mp = &ldev->lag_mp;
        struct fib_nh *fib_nh0, *fib_nh1;
        unsigned int nhs;
@@ -121,13 +129,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
        /* Handle delete event */
        if (event == FIB_EVENT_ENTRY_DEL) {
                /* stop track */
-               if (mp->mfi == fi)
-                       mp->mfi = NULL;
+               if (mp->fib.mfi == fi)
+                       mp->fib.mfi = NULL;
                return;
        }
 
        /* Handle multipath entry with lower priority value */
-       if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
+       if (mp->fib.mfi && mp->fib.mfi != fi &&
+           (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
+           fi->fib_priority >= mp->fib.priority)
                return;
 
        /* Handle add/replace event */
@@ -143,9 +153,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 
                        i++;
                        mlx5_lag_set_port_affinity(ldev, i);
+                       mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
                }
 
-               mp->mfi = fi;
                return;
        }
 
@@ -165,7 +175,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
        }
 
        /* First time we see multipath route */
-       if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+       if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
                struct lag_tracker tracker;
 
                tracker = ldev->tracker;
@@ -173,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
        }
 
        mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
-       mp->mfi = fi;
+       mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
 }
 
 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
@@ -184,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
        struct lag_mp *mp = &ldev->lag_mp;
 
        /* Check the nh event is related to the route */
-       if (!mp->mfi || mp->mfi != fi)
+       if (!mp->fib.mfi || mp->fib.mfi != fi)
                return;
 
        /* nh added/removed */
@@ -214,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work)
        case FIB_EVENT_ENTRY_REPLACE:
        case FIB_EVENT_ENTRY_DEL:
                mlx5_lag_fib_route_event(ldev, fib_work->event,
-                                        fib_work->fen_info.fi);
+                                        &fib_work->fen_info);
                fib_info_put(fib_work->fen_info.fi);
                break;
        case FIB_EVENT_NH_ADD:
@@ -313,7 +323,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
        /* Clear mfi, as it might become stale when a route delete event
         * has been missed, see mlx5_lag_fib_route_event().
         */
-       ldev->lag_mp.mfi = NULL;
+       ldev->lag_mp.fib.mfi = NULL;
 }
 
 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
@@ -324,7 +334,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
        /* always clear mfi, as it might become stale when a route delete event
         * has been missed
         */
-       mp->mfi = NULL;
+       mp->fib.mfi = NULL;
 
        if (mp->fib_nb.notifier_call)
                return 0;
@@ -354,5 +364,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
        unregister_fib_notifier(&init_net, &mp->fib_nb);
        destroy_workqueue(mp->wq);
        mp->fib_nb.notifier_call = NULL;
-       mp->mfi = NULL;
+       mp->fib.mfi = NULL;
 }
index 57af962cad298c5326d36cfd3ddeaaeee65df54d..056a066da604b2a6c5f0650111e41e2f48afe703 100644 (file)
@@ -15,7 +15,12 @@ enum mlx5_lag_port_affinity {
 
 struct lag_mp {
        struct notifier_block     fib_nb;
-       struct fib_info           *mfi; /* used in tracking fib events */
+       struct {
+               const void        *mfi; /* used in tracking fib events */
+               u32               priority;
+               u32               dst;
+               int               dst_len;
+       } fib;
        struct workqueue_struct   *wq;
 };
 
index a6592f9c3c05fc8d2c38f9ea491e3360e9527e80..5be322528279a3c13ba4255d41538a73aa6f1df2 100644 (file)
@@ -505,7 +505,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
        struct ttc_params ttc_params = {};
 
        mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
-       port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params);
+       port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
        if (IS_ERR(port_sel->inner.ttc))
                return PTR_ERR(port_sel->inner.ttc);
 
index b63dec24747ab618ebe2e478bbb0219afc6a5003..b78f2ba25c19bf69821e6708f42ee5b173e04ef3 100644 (file)
@@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
        for (tt = 0; tt < MLX5_NUM_TT; tt++) {
                struct mlx5_ttc_rule *rule = &rules[tt];
 
+               if (test_bit(tt, params->ignore_dests))
+                       continue;
                rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
                                                          &params->dests[tt],
                                                          ttc_rules[tt].etype,
index 2589e39eb9c72604df2c6afa29d88002dfb8a46d..ef196cb764e2a3edaf640c29e928954f1c1d1475 100644 (file)
@@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
                goto err_sf_table_cleanup;
        }
 
+       err = mlx5_fs_core_alloc(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc flow steering\n");
+               goto err_fs;
+       }
+
        dev->dm = mlx5_dm_create(dev);
        if (IS_ERR(dev->dm))
                mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
@@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 
        return 0;
 
+err_fs:
+       mlx5_sf_table_cleanup(dev);
 err_sf_table_cleanup:
        mlx5_sf_hw_table_cleanup(dev);
 err_sf_hw_table_cleanup:
@@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_hv_vhca_destroy(dev->hv_vhca);
        mlx5_fw_tracer_destroy(dev->tracer);
        mlx5_dm_cleanup(dev);
+       mlx5_fs_core_free(dev);
        mlx5_sf_table_cleanup(dev);
        mlx5_sf_hw_table_cleanup(dev);
        mlx5_vhca_event_cleanup(dev);
@@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
                goto err_tls_start;
        }
 
-       err = mlx5_init_fs(dev);
+       err = mlx5_fs_core_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to init flow steering\n");
                goto err_fs;
@@ -1236,7 +1245,7 @@ err_ec:
 err_vhca:
        mlx5_vhca_event_stop(dev);
 err_set_hca:
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
 err_fs:
        mlx5_accel_tls_cleanup(dev);
 err_tls_start:
@@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
-       mlx5_cleanup_fs(dev);
+       mlx5_fs_core_cleanup(dev);
        mlx5_accel_ipsec_cleanup(dev);
        mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
@@ -1618,6 +1627,10 @@ static void remove_one(struct pci_dev *pdev)
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct devlink *devlink = priv_to_devlink(dev);
 
+       /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+        * fw_reset before unregistering the devlink.
+        */
+       mlx5_drain_fw_reset(dev);
        devlink_unregister(devlink);
        mlx5_sriov_disable(pdev);
        mlx5_crdump_disable(dev);
index 850937cd8bf9cd6eccafdacd4c2d845115e2af07..1383550f44c1297e25159b0cdb7fc6f81a947d51 100644 (file)
@@ -530,6 +530,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
        return 0;
 }
 
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+                                       struct mlx5dr_ste_actions_attr *attr,
+                                       bool rx_rule,
+                                       bool *recalc_cs_required)
+{
+       *recalc_cs_required = false;
+
+       /* if device supports csum recalculation - no adjustment needed */
+       if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+               return;
+
+       /* no adjustment needed on TX rules */
+       if (!rx_rule)
+               return;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+               /* Ignore the modify TTL action.
+                * It is always kept as last HW action.
+                */
+               attr->modify_actions--;
+               return;
+       }
+
+       if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+               /* Due to a HW bug on some devices, modifying TTL on RX flows
+                * will cause an incorrect checksum calculation. In such cases
+                * we will use a FW table to recalculate the checksum.
+                */
+               *recalc_cs_required = true;
+}
+
 static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
                                     struct mlx5dr_action *actions[],
                                     int last_idx)
@@ -650,8 +681,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                case DR_ACTION_TYP_MODIFY_HDR:
                        attr.modify_index = action->rewrite->index;
                        attr.modify_actions = action->rewrite->num_of_actions;
-                       recalc_cs_required = action->rewrite->modify_ttl &&
-                                            !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps);
+                       if (action->rewrite->modify_ttl)
+                               dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+                                                           &recalc_cs_required);
                        break;
                case DR_ACTION_TYP_L2_TO_TNL_L2:
                case DR_ACTION_TYP_L2_TO_TNL_L3:
@@ -732,12 +764,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
        *new_hw_ste_arr_sz = nic_matcher->num_of_builders;
        last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
 
-       /* Due to a HW bug in some devices, modifying TTL on RX flows will
-        * cause an incorrect checksum calculation. In this case we will
-        * use a FW table to recalculate.
-        */
-       if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
-           rx_rule && recalc_cs_required && dest_action) {
+       if (recalc_cs_required && dest_action) {
                ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
                if (ret) {
                        mlx5dr_err(dmn,
@@ -842,7 +869,8 @@ struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
                                   u32 num_of_dests,
-                                  bool ignore_flow_level)
+                                  bool ignore_flow_level,
+                                  u32 flow_source)
 {
        struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
        struct mlx5dr_action **ref_actions;
@@ -914,7 +942,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                      reformat_req,
                                      &action->dest_tbl->fw_tbl.id,
                                      &action->dest_tbl->fw_tbl.group_id,
-                                     ignore_flow_level);
+                                     ignore_flow_level,
+                                     flow_source);
        if (ret)
                goto free_action;
 
@@ -1556,12 +1585,6 @@ dr_action_modify_check_is_ttl_modify(const void *sw_action)
        return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
 }
 
-static bool dr_action_modify_ttl_ignore(struct mlx5dr_domain *dmn)
-{
-       return !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps) &&
-              !MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify);
-}
-
 static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
                                            u32 max_hw_actions,
                                            u32 num_sw_actions,
@@ -1573,6 +1596,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
        const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
        const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
        struct mlx5dr_domain *dmn = action->rewrite->dmn;
+       __be64 *modify_ttl_sw_action = NULL;
        int ret, i, hw_idx = 0;
        __be64 *sw_action;
        __be64 hw_action;
@@ -1585,8 +1609,14 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
        action->rewrite->allow_rx = 1;
        action->rewrite->allow_tx = 1;
 
-       for (i = 0; i < num_sw_actions; i++) {
-               sw_action = &sw_actions[i];
+       for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+               /* modify TTL is handled separately, as a last action */
+               if (i == num_sw_actions) {
+                       sw_action = modify_ttl_sw_action;
+                       modify_ttl_sw_action = NULL;
+               } else {
+                       sw_action = &sw_actions[i];
+               }
 
                ret = dr_action_modify_check_field_limitation(action,
                                                              sw_action);
@@ -1595,10 +1625,9 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
 
                if (!(*modify_ttl) &&
                    dr_action_modify_check_is_ttl_modify(sw_action)) {
-                       if (dr_action_modify_ttl_ignore(dmn))
-                               continue;
-
+                       modify_ttl_sw_action = sw_action;
                        *modify_ttl = true;
+                       continue;
                }
 
                /* Convert SW action to HW action */
index 68a4c32d5f34c535f557cd199fb85dc011302527..f05ef0cd54baca456a72d063f0da3fa01843af30 100644 (file)
@@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            bool reformat_req,
                            u32 *tbl_id,
                            u32 *group_id,
-                           bool ignore_flow_level)
+                           bool ignore_flow_level,
+                           u32 flow_source)
 {
        struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
        struct mlx5dr_cmd_fte_info fte_info = {};
@@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
        fte_info.val = val;
        fte_info.dest_arr = dest;
        fte_info.ignore_flow_level = ignore_flow_level;
+       fte_info.flow_context.flow_source = flow_source;
 
        ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
        if (ret) {
index 5a322335f2043d570302fd810ca7f5a06737d80e..2010d4ac651909e1ff2d5d8a7ca2a485ae13e50c 100644 (file)
@@ -420,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
         * encapsulation. The reason for that is that we support
         * modify headers for outer headers only
         */
-       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
                dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
                dr_ste_v0_set_rewrite_actions(last_ste,
                                              attr->modify_actions,
@@ -513,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
                }
        }
 
-       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+       if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
                if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
                        dr_ste_v0_arr_init_next(&last_ste,
                                                added_stes,
index 46866a5fc5ca3b402d282546a33eabf4a85f3717..98320e3945adb3028b4ccb9cb379b6b0097ba577 100644 (file)
@@ -1461,7 +1461,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            bool reformat_req,
                            u32 *tbl_id,
                            u32 *group_id,
-                           bool ignore_flow_level);
+                           bool ignore_flow_level,
+                           u32 flow_source);
 void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
                              u32 group_id);
 #endif  /* _DR_TYPES_H_ */
index 045b0cf90063b1b1b45081465102c9038eb9ace9..728f8188258929dcc900f42049575e7d8215898c 100644 (file)
@@ -520,6 +520,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
        } else if (num_term_actions > 1) {
                bool ignore_flow_level =
                        !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+               u32 flow_source = fte->flow_context.flow_source;
 
                if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
                    fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
@@ -529,7 +530,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
                tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
                                                                term_actions,
                                                                num_term_actions,
-                                                               ignore_flow_level);
+                                                               ignore_flow_level,
+                                                               flow_source);
                if (!tmp_action) {
                        err = -EOPNOTSUPP;
                        goto free_actions;
index ec5cbec0d4553d22eb2c90ccb4954498e6a325fd..7626c85643b1f171d46f11a5b9d7f696fc847dd6 100644 (file)
@@ -99,7 +99,8 @@ struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
                                   u32 num_of_dests,
-                                  bool ignore_flow_level);
+                                  bool ignore_flow_level,
+                                  u32 flow_source);
 
 struct mlx5dr_action *mlx5dr_action_create_drop(void);
 
index 939b692ffc335ecad1265a002ae674926cf6a37a..ce843ea9146466d402a97f4e249aec548bcd06ed 100644 (file)
@@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
        return 0;
 
 errout:
+       mutex_destroy(&mlxsw_i2c->cmd.lock);
        i2c_set_clientdata(client, NULL);
 
        return err;
index 01cf5a6a26bd323aa583f17684f739da10cbe660..a2ee695a3f17856d23afff3979d842ff11ad38d9 100644 (file)
@@ -568,10 +568,8 @@ static int
 mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
                                 struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-       struct __ip6_tnl_parm parms6;
-
-       parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
-       return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr,
+       return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp,
+                                                &ipip_entry->parms.daddr.addr6,
                                                 &ipip_entry->dip_kvdl_index);
 }
 
@@ -579,10 +577,7 @@ static void
 mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
                                   const struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-       struct __ip6_tnl_parm parms6;
-
-       parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
-       mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr);
+       mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6);
 }
 
 static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = {
index b73466470f75b15810b8f538ed4b79c818f2a3db..fe663b0ab7086a807f752ecbf98eb7b19bada5b6 100644 (file)
@@ -423,7 +423,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
 
        parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
        ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
-                           0, 0, parms.link, tun->fwmark, 0);
+                           0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0);
 
        rt = ip_route_output_key(tun->net, &fl4);
        if (IS_ERR(rt))
index 93df3049cdc05ee766dc51db0bf1d844141688e8..830363bafcce7742effbcca0e30e06955892faef 100644 (file)
@@ -28,6 +28,7 @@ config KS8842
 config KS8851
        tristate "Micrel KS8851 SPI"
        depends on SPI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MII
        select CRC32
        select EEPROM_93CX6
@@ -39,6 +40,7 @@ config KS8851
 config KS8851_MLL
        tristate "Micrel KS8851 MLL"
        depends on HAS_IOMEM
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MII
        select CRC32
        select EEPROM_93CX6
index ce5970bdcc6a0791242a6663ae8b1e81f7897ad3..005e56ea5da1204c98e12778aa791b39b56b9dc9 100644 (file)
@@ -346,7 +346,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
 
                        lan966x_mac_process_raw_entry(&raw_entries[column],
                                                      mac, &vid, &dest_idx);
-                       WARN_ON(dest_idx > lan966x->num_phys_ports);
+                       if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
+                               continue;
 
                        /* If the entry in SW is found, then there is nothing
                         * to do
@@ -392,7 +393,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
 
                lan966x_mac_process_raw_entry(&raw_entries[column],
                                              mac, &vid, &dest_idx);
-               WARN_ON(dest_idx > lan966x->num_phys_ports);
+               if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
+                       continue;
 
                mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
                if (!mac_entry)
index 1f8c67f0261bf8583ef2bd3da425162188034de5..05f6dcc9dfd5295a51807073095b9ec3bdbfafc3 100644 (file)
@@ -103,6 +103,24 @@ static int lan966x_create_targets(struct platform_device *pdev,
        return 0;
 }
 
+static bool lan966x_port_unique_address(struct net_device *dev)
+{
+       struct lan966x_port *port = netdev_priv(dev);
+       struct lan966x *lan966x = port->lan966x;
+       int p;
+
+       for (p = 0; p < lan966x->num_phys_ports; ++p) {
+               port = lan966x->ports[p];
+               if (!port || port->dev == dev)
+                       continue;
+
+               if (ether_addr_equal(dev->dev_addr, port->dev->dev_addr))
+                       return false;
+       }
+
+       return true;
+}
+
 static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
 {
        struct lan966x_port *port = netdev_priv(dev);
@@ -110,16 +128,26 @@ static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
        const struct sockaddr *addr = p;
        int ret;
 
+       if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+               return 0;
+
        /* Learn the new net device MAC address in the mac table. */
        ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, HOST_PVID);
        if (ret)
                return ret;
 
+       /* If there is another port with the same address as the dev, then don't
+        * delete it from the MAC table
+        */
+       if (!lan966x_port_unique_address(dev))
+               goto out;
+
        /* Then forget the previous one. */
        ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, HOST_PVID);
        if (ret)
                return ret;
 
+out:
        eth_hw_addr_set(dev, addr->sa_data);
        return ret;
 }
@@ -446,6 +474,12 @@ static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port,
                     ANA_CPU_FWD_CFG_MLD_REDIR_ENA)))
                return true;
 
+       if (eth_type_vlan(skb->protocol)) {
+               skb = skb_vlan_untag(skb);
+               if (unlikely(!skb))
+                       return false;
+       }
+
        if (skb->protocol == htons(ETH_P_IP) &&
            ip_hdr(skb)->protocol == IPPROTO_IGMP)
                return false;
@@ -665,6 +699,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x)
                disable_irq(lan966x->ana_irq);
                lan966x->ana_irq = -ENXIO;
        }
+
+       if (lan966x->ptp_irq)
+               devm_free_irq(lan966x->dev, lan966x->ptp_irq, lan966x);
 }
 
 static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
index ae782778d6dd44739a3493f65c906522f372bc4a..0a1041da43842181a16b1df3f23d3be433eb500e 100644 (file)
@@ -29,10 +29,10 @@ enum {
 
 static u64 lan966x_ptp_get_nominal_value(void)
 {
-       u64 res = 0x304d2df1;
-
-       res <<= 32;
-       return res;
+       /* This is the default value that for each system clock, the time of day
+        * is increased. It has the format 5.59 nanosecond.
+        */
+       return 0x304d4873ecade305;
 }
 
 int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr)
index e3555c94294dfe26eece08b8cc2b2205b3175ad7..df2bee6785598ac0fb5b2eade1a99d2223e9da79 100644 (file)
@@ -322,8 +322,7 @@ static int lan966x_port_prechangeupper(struct net_device *dev,
 
        if (netif_is_bridge_master(info->upper_dev) && !info->linking)
                switchdev_bridge_port_unoffload(port->dev, port,
-                                               &lan966x_switchdev_nb,
-                                               &lan966x_switchdev_blocking_nb);
+                                               NULL, NULL);
 
        return NOTIFY_DONE;
 }
index e443bd8b2d09acd2294d5f9a8735bdf63dd185a1..20ceac81a2c2c3b19f0f969f5dc277f0e8a73789 100644 (file)
@@ -551,7 +551,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
        struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
        struct ocelot_port *ocelot_port = ocelot->ports[port];
        struct ocelot_vcap_filter *filter;
-       int err;
+       int err = 0;
        u32 val;
 
        list_for_each_entry(filter, &block->rules, list) {
@@ -570,7 +570,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
        if (vlan_aware)
                err = ocelot_del_vlan_unaware_pvid(ocelot, port,
                                                   ocelot_port->bridge);
-       else
+       else if (ocelot_port->bridge)
                err = ocelot_add_vlan_unaware_pvid(ocelot, port,
                                                   ocelot_port->bridge);
        if (err)
@@ -629,6 +629,13 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
 {
        int err;
 
+       /* Ignore VID 0 added to our RX filter by the 8021q module, since
+        * that collides with OCELOT_STANDALONE_PVID and changes it from
+        * egress-untagged to egress-tagged.
+        */
+       if (!vid)
+               return 0;
+
        err = ocelot_vlan_member_add(ocelot, port, vid, untagged);
        if (err)
                return err;
@@ -651,6 +658,9 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
        bool del_pvid = false;
        int err;
 
+       if (!vid)
+               return 0;
+
        if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid)
                del_pvid = true;
 
@@ -1612,7 +1622,7 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
                trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
                trap->action.port_mask = 0;
                trap->take_ts = take_ts;
-               list_add_tail(&trap->trap_list, &ocelot->traps);
+               trap->is_trap = true;
                new = true;
        }
 
@@ -1624,10 +1634,8 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
                err = ocelot_vcap_filter_replace(ocelot, trap);
        if (err) {
                trap->ingress_port_mask &= ~BIT(port);
-               if (!trap->ingress_port_mask) {
-                       list_del(&trap->trap_list);
+               if (!trap->ingress_port_mask)
                        kfree(trap);
-               }
                return err;
        }
 
@@ -1647,11 +1655,8 @@ int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie)
                return 0;
 
        trap->ingress_port_mask &= ~BIT(port);
-       if (!trap->ingress_port_mask) {
-               list_del(&trap->trap_list);
-
+       if (!trap->ingress_port_mask)
                return ocelot_vcap_filter_del(ocelot, trap);
-       }
 
        return ocelot_vcap_filter_replace(ocelot, trap);
 }
@@ -2859,6 +2864,8 @@ static void ocelot_port_set_mcast_flood(struct ocelot *ocelot, int port,
                val = BIT(port);
 
        ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MC);
+       ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV4);
+       ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV6);
 }
 
 static void ocelot_port_set_bcast_flood(struct ocelot *ocelot, int port,
index 03b5e59d033e43d7166ada44495ace28de3b6fcc..51cf241ff7d07a289bdceebe0fea5baadfc1da2e 100644 (file)
@@ -280,9 +280,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
                        filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
                        break;
                case FLOW_ACTION_TRAP:
-                       if (filter->block_id != VCAP_IS2) {
+                       if (filter->block_id != VCAP_IS2 ||
+                           filter->lookup != 0) {
                                NL_SET_ERR_MSG_MOD(extack,
-                                                  "Trap action can only be offloaded to VCAP IS2");
+                                                  "Trap action can only be offloaded to VCAP IS2 lookup 0");
                                return -EOPNOTSUPP;
                        }
                        if (filter->goto_target != -1) {
@@ -295,7 +296,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
                        filter->action.cpu_copy_ena = true;
                        filter->action.cpu_qu_num = 0;
                        filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
-                       list_add_tail(&filter->trap_list, &ocelot->traps);
+                       filter->is_trap = true;
                        break;
                case FLOW_ACTION_POLICE:
                        if (filter->block_id == PSFP_BLOCK_ID) {
@@ -878,8 +879,6 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
 
        ret = ocelot_flower_parse(ocelot, port, ingress, f, filter);
        if (ret) {
-               if (!list_empty(&filter->trap_list))
-                       list_del(&filter->trap_list);
                kfree(filter);
                return ret;
        }
index c8701ac955a8ff927bb6ef75641d78db0f0deb36..eeb4cc07dd16f407604fb3790bafaf2886cc09cf 100644 (file)
@@ -374,7 +374,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
                         OCELOT_VCAP_BIT_0);
        vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
                     ~filter->ingress_port_mask);
-       vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY);
        vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH,
                         OCELOT_VCAP_BIT_ANY);
        vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
@@ -1217,6 +1216,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
                struct ocelot_vcap_filter *tmp;
 
                tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+               /* Read back the filter's counters before moving it */
+               vcap_entry_get(ocelot, i - 1, tmp);
                vcap_entry_set(ocelot, i, tmp);
        }
 
@@ -1250,7 +1251,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
        struct ocelot_vcap_filter del_filter;
        int i, index;
 
+       /* Need to inherit the block_id so that vcap_entry_set()
+        * does not get confused and knows where to install it.
+        */
        memset(&del_filter, 0, sizeof(del_filter));
+       del_filter.block_id = filter->block_id;
 
        /* Gets index of the filter */
        index = ocelot_vcap_block_get_filter_index(block, filter);
@@ -1265,6 +1270,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
                struct ocelot_vcap_filter *tmp;
 
                tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+               /* Read back the filter's counters before moving it */
+               vcap_entry_get(ocelot, i + 1, tmp);
                vcap_entry_set(ocelot, i, tmp);
        }
 
index 50ac3ee2577a2999bd952a41469f92409fd7015d..21d2645885cef45bfe16542ef4a520a803793367 100644 (file)
@@ -2903,11 +2903,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
                status = myri10ge_xmit(curr, dev);
                if (status != 0) {
                        dev_kfree_skb_any(curr);
-                       if (segs != NULL) {
-                               curr = segs;
-                               segs = next;
+                       skb_list_walk_safe(next, curr, next) {
                                curr->next = NULL;
-                               dev_kfree_skb_any(segs);
+                               dev_kfree_skb_any(curr);
                        }
                        goto drop;
                }
index 6ffc62c411655c0ba7a4e693db339ab36e9944b9..0a7a757494bc5f8448a03e4a82ad10aff462d0f5 100644 (file)
@@ -256,7 +256,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        err = ionic_map_bars(ionic);
        if (err)
-               goto err_out_pci_disable_device;
+               goto err_out_pci_release_regions;
 
        /* Configure the device */
        err = ionic_setup(ionic);
@@ -360,6 +360,7 @@ err_out_teardown:
 
 err_out_unmap_bars:
        ionic_unmap_bars(ionic);
+err_out_pci_release_regions:
        pci_release_regions(pdev);
 err_out_pci_disable_device:
        pci_disable_device(pdev);
index e3edca187ddfaf68e03961ab9f4e23eeca7c43c1..5250d1d1e49ca188e4bb2bb1db69d4537b9ea4c5 100644 (file)
@@ -489,7 +489,7 @@ struct split_type_defs {
 
 #define STATIC_DEBUG_LINE_DWORDS       9
 
-#define NUM_COMMON_GLOBAL_PARAMS       11
+#define NUM_COMMON_GLOBAL_PARAMS       10
 
 #define MAX_RECURSION_DEPTH            10
 
index b242000a77fd8db322672e541df074be9b5ce9ef..b7cc36589f592e995e3a12bb80bc7c8e3af7dc42 100644 (file)
@@ -748,6 +748,9 @@ qede_build_skb(struct qede_rx_queue *rxq,
        buf = page_address(bd->data) + bd->page_offset;
        skb = build_skb(buf, rxq->rx_buf_seg_size);
 
+       if (unlikely(!skb))
+               return NULL;
+
        skb_reserve(skb, pad);
        skb_put(skb, len);
 
index b30589a135c248254cf3fa2eda98e871b34781cc..06f4d9a9e9388634ca3d08e3911efa4cd09e4d25 100644 (file)
@@ -3614,7 +3614,8 @@ static void ql_reset_work(struct work_struct *work)
                qdev->mem_map_registers;
        unsigned long hw_flags;
 
-       if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) {
+       if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) ||
+           test_bit(QL_RESET_START, &qdev->flags)) {
                clear_bit(QL_LINK_MASTER, &qdev->flags);
 
                /*
index 50d535981a35f02f90bfb58f2faf522176d49f90..f8edb3f1b73ad7c174588a1a9702086252db1fc4 100644 (file)
@@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx)
                n_parts++;
        }
 
+       if (!n_parts) {
+               kfree(parts);
+               return 0;
+       }
+
        rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
 fail:
        if (rc)
index f9064532beb66af039c120dd6cbdd57b92376b37..40df910aa1401c266f03aa7b42173ad2663481fc 100644 (file)
@@ -786,9 +786,90 @@ void efx_remove_channels(struct efx_nic *efx)
        kfree(efx->xdp_tx_queues);
 }
 
+static int efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number,
+                               struct efx_tx_queue *tx_queue)
+{
+       if (xdp_queue_number >= efx->xdp_tx_queue_count)
+               return -EINVAL;
+
+       netif_dbg(efx, drv, efx->net_dev,
+                 "Channel %u TXQ %u is XDP %u, HW %u\n",
+                 tx_queue->channel->channel, tx_queue->label,
+                 xdp_queue_number, tx_queue->queue);
+       efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+       return 0;
+}
+
+static void efx_set_xdp_channels(struct efx_nic *efx)
+{
+       struct efx_tx_queue *tx_queue;
+       struct efx_channel *channel;
+       unsigned int next_queue = 0;
+       int xdp_queue_number = 0;
+       int rc;
+
+       /* We need to mark which channels really have RX and TX
+        * queues, and adjust the TX queue numbers if we have separate
+        * RX-only and TX-only channels.
+        */
+       efx_for_each_channel(channel, efx) {
+               if (channel->channel < efx->tx_channel_offset)
+                       continue;
+
+               if (efx_channel_is_xdp_tx(channel)) {
+                       efx_for_each_channel_tx_queue(tx_queue, channel) {
+                               tx_queue->queue = next_queue++;
+                               rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
+                                                         tx_queue);
+                               if (rc == 0)
+                                       xdp_queue_number++;
+                       }
+               } else {
+                       efx_for_each_channel_tx_queue(tx_queue, channel) {
+                               tx_queue->queue = next_queue++;
+                               netif_dbg(efx, drv, efx->net_dev,
+                                         "Channel %u TXQ %u is HW %u\n",
+                                         channel->channel, tx_queue->label,
+                                         tx_queue->queue);
+                       }
+
+                       /* If XDP is borrowing queues from net stack, it must
+                        * use the queue with no csum offload, which is the
+                        * first one of the channel
+                        * (note: tx_queue_by_type is not initialized yet)
+                        */
+                       if (efx->xdp_txq_queues_mode ==
+                           EFX_XDP_TX_QUEUES_BORROWED) {
+                               tx_queue = &channel->tx_queue[0];
+                               rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
+                                                         tx_queue);
+                               if (rc == 0)
+                                       xdp_queue_number++;
+                       }
+               }
+       }
+       WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
+               xdp_queue_number != efx->xdp_tx_queue_count);
+       WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
+               xdp_queue_number > efx->xdp_tx_queue_count);
+
+       /* If we have more CPUs than assigned XDP TX queues, assign the already
+        * existing queues to the exceeding CPUs
+        */
+       next_queue = 0;
+       while (xdp_queue_number < efx->xdp_tx_queue_count) {
+               tx_queue = efx->xdp_tx_queues[next_queue++];
+               rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
+               if (rc == 0)
+                       xdp_queue_number++;
+       }
+}
+
 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 {
-       struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+       struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
+                          *ptp_channel = efx_ptp_channel(efx);
+       struct efx_ptp_data *ptp_data = efx->ptp_data;
        unsigned int i, next_buffer_table = 0;
        u32 old_rxq_entries, old_txq_entries;
        int rc, rc2;
@@ -857,7 +938,9 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
                efx_init_napi_channel(efx->channel[i]);
        }
 
+       efx_set_xdp_channels(efx);
 out:
+       efx->ptp_data = NULL;
        /* Destroy unused channel structures */
        for (i = 0; i < efx->n_channels; i++) {
                channel = other_channel[i];
@@ -868,6 +951,7 @@ out:
                }
        }
 
+       efx->ptp_data = ptp_data;
        rc2 = efx_soft_enable_interrupts(efx);
        if (rc2) {
                rc = rc ? rc : rc2;
@@ -886,29 +970,13 @@ rollback:
        efx->txq_entries = old_txq_entries;
        for (i = 0; i < efx->n_channels; i++)
                swap(efx->channel[i], other_channel[i]);
+       efx_ptp_update_channel(efx, ptp_channel);
        goto out;
 }
 
-static inline int
-efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number,
-                    struct efx_tx_queue *tx_queue)
-{
-       if (xdp_queue_number >= efx->xdp_tx_queue_count)
-               return -EINVAL;
-
-       netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
-                 tx_queue->channel->channel, tx_queue->label,
-                 xdp_queue_number, tx_queue->queue);
-       efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
-       return 0;
-}
-
 int efx_set_channels(struct efx_nic *efx)
 {
-       struct efx_tx_queue *tx_queue;
        struct efx_channel *channel;
-       unsigned int next_queue = 0;
-       int xdp_queue_number;
        int rc;
 
        efx->tx_channel_offset =
@@ -926,61 +994,14 @@ int efx_set_channels(struct efx_nic *efx)
                        return -ENOMEM;
        }
 
-       /* We need to mark which channels really have RX and TX
-        * queues, and adjust the TX queue numbers if we have separate
-        * RX-only and TX-only channels.
-        */
-       xdp_queue_number = 0;
        efx_for_each_channel(channel, efx) {
                if (channel->channel < efx->n_rx_channels)
                        channel->rx_queue.core_index = channel->channel;
                else
                        channel->rx_queue.core_index = -1;
-
-               if (channel->channel >= efx->tx_channel_offset) {
-                       if (efx_channel_is_xdp_tx(channel)) {
-                               efx_for_each_channel_tx_queue(tx_queue, channel) {
-                                       tx_queue->queue = next_queue++;
-                                       rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
-                                       if (rc == 0)
-                                               xdp_queue_number++;
-                               }
-                       } else {
-                               efx_for_each_channel_tx_queue(tx_queue, channel) {
-                                       tx_queue->queue = next_queue++;
-                                       netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
-                                                 channel->channel, tx_queue->label,
-                                                 tx_queue->queue);
-                               }
-
-                               /* If XDP is borrowing queues from net stack, it must use the queue
-                                * with no csum offload, which is the first one of the channel
-                                * (note: channel->tx_queue_by_type is not initialized yet)
-                                */
-                               if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) {
-                                       tx_queue = &channel->tx_queue[0];
-                                       rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
-                                       if (rc == 0)
-                                               xdp_queue_number++;
-                               }
-                       }
-               }
        }
-       WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
-               xdp_queue_number != efx->xdp_tx_queue_count);
-       WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
-               xdp_queue_number > efx->xdp_tx_queue_count);
 
-       /* If we have more CPUs than assigned XDP TX queues, assign the already
-        * existing queues to the exceeding CPUs
-        */
-       next_queue = 0;
-       while (xdp_queue_number < efx->xdp_tx_queue_count) {
-               tx_queue = efx->xdp_tx_queues[next_queue++];
-               rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
-               if (rc == 0)
-                       xdp_queue_number++;
-       }
+       efx_set_xdp_channels(efx);
 
        rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
        if (rc)
@@ -1124,7 +1145,7 @@ void efx_start_channels(struct efx_nic *efx)
        struct efx_rx_queue *rx_queue;
        struct efx_channel *channel;
 
-       efx_for_each_channel(channel, efx) {
+       efx_for_each_channel_rev(channel, efx) {
                efx_for_each_channel_tx_queue(tx_queue, channel) {
                        efx_init_tx_queue(tx_queue);
                        atomic_inc(&efx->active_queues);
index f0ef515e2ade51d3715473616032edadad6ec2f4..4625f85acab2ea9b2016a89a0ca146dab13a52bb 100644 (file)
@@ -45,6 +45,7 @@
 #include "farch_regs.h"
 #include "tx.h"
 #include "nic.h" /* indirectly includes ptp.h */
+#include "efx_channels.h"
 
 /* Maximum number of events expected to make up a PTP event */
 #define        MAX_EVENT_FRAGS                 3
@@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
        return efx->ptp_data ? efx->ptp_data->channel : NULL;
 }
 
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel)
+{
+       if (efx->ptp_data)
+               efx->ptp_data->channel = channel;
+}
+
 static u32 last_sync_timestamp_major(struct efx_nic *efx)
 {
        struct efx_channel *channel = efx_ptp_channel(efx);
@@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
        int rc = 0;
        unsigned int pos;
 
+       if (efx->ptp_data) {
+               efx->ptp_data->channel = channel;
+               return 0;
+       }
+
        ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
        efx->ptp_data = ptp;
        if (!efx->ptp_data)
@@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
        .pre_probe              = efx_ptp_probe_channel,
        .post_remove            = efx_ptp_remove_channel,
        .get_name               = efx_ptp_get_channel_name,
-       /* no copy operation; there is no need to reallocate this channel */
+       .copy                   = efx_copy_channel,
        .receive_skb            = efx_ptp_rx,
        .want_txqs              = efx_ptp_want_txqs,
        .keep_eventq            = false,
index 9855e8c9e544d7a90c1fd6c375d8911d1d8d9672..7b1ef7002b3f047c6d049e8b8625979b4138aea5 100644 (file)
@@ -16,6 +16,7 @@ struct ethtool_ts_info;
 int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
 struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel);
 void efx_ptp_remove(struct efx_nic *efx);
 int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
 int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
index 1b22c7be0088e74194e4a556d3ee2341aec4b5fd..fa8b9aacca112169f20ef409d83b60602da28a3e 100644 (file)
@@ -150,6 +150,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
        struct efx_nic *efx = rx_queue->efx;
        int i;
 
+       if (unlikely(!rx_queue->page_ring))
+               return;
+
        /* Unmap and release the pages in the recycle ring. Remove the ring. */
        for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
                struct page *page = rx_queue->page_ring[i];
index d16e031e95f44d09c4fcc37456ef68bf23e8aa6c..6983799e1c05d6533a2840ec3be8e5a650268c34 100644 (file)
@@ -443,6 +443,9 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
        if (unlikely(!tx_queue))
                return -EINVAL;
 
+       if (!tx_queue->initialised)
+               return -EINVAL;
+
        if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED)
                HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu);
 
index d530cde2b86480f40a110c0327e887e19a5952e4..9bc8281b7f5bdd3d95924c6f8294d39202424a27 100644 (file)
@@ -101,6 +101,8 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
        netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
                  "shutting down TX queue %d\n", tx_queue->queue);
 
+       tx_queue->initialised = false;
+
        if (!tx_queue->buffer)
                return;
 
index 7a50ba00f8ae30dc8ed6a2e0d54145e4259af94e..c854efdf1f25fe372850e20eeb588428f74d5df3 100644 (file)
@@ -2431,7 +2431,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        if (irq == -EPROBE_DEFER) {
                retval = -EPROBE_DEFER;
                goto out_0;
-       } else if (irq <= 0) {
+       } else if (irq < 0) {
                pr_warn("Could not allocate irq resource\n");
                retval = -ENODEV;
                goto out_0;
index cd478d2cd871ae46640bff2b0dd06eba384de777..00f6d347eaf75b33ec43c60aea8aca1fcaebdc7a 100644 (file)
 #define TSE_PCS_USE_SGMII_ENA                          BIT(0)
 #define TSE_PCS_IF_USE_SGMII                           0x03
 
-#define SGMII_ADAPTER_CTRL_REG                         0x00
-#define SGMII_ADAPTER_DISABLE                          0x0001
-#define SGMII_ADAPTER_ENABLE                           0x0000
-
 #define AUTONEGO_LINK_TIMER                            20
 
 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
@@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
                           unsigned int speed)
 {
        void __iomem *tse_pcs_base = pcs->tse_pcs_base;
-       void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
        u32 val;
 
-       writew(SGMII_ADAPTER_ENABLE,
-              sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
-
        pcs->autoneg = phy_dev->autoneg;
 
        if (phy_dev->autoneg == AUTONEG_ENABLE) {
index 442812c0a4bdccb6d93eb6e754b7800df8fcad7e..694ac25ef426ba2e6a9848b6881b9e3386d32ef6 100644 (file)
 #include <linux/phy.h>
 #include <linux/timer.h>
 
+#define SGMII_ADAPTER_CTRL_REG         0x00
+#define SGMII_ADAPTER_ENABLE           0x0000
+#define SGMII_ADAPTER_DISABLE          0x0001
+
 struct tse_pcs {
        struct device *dev;
        void __iomem *tse_pcs_base;
index 63754a9c4ba72447cbcef5e79961381eca6a2be6..0b0be0898ac573390d9661a5d8bc1b14a6c85f8b 100644 (file)
@@ -454,6 +454,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
        plat->has_gmac4 = 1;
        plat->force_sf_dma_mode = 0;
        plat->tso_en = 1;
+       plat->sph_disable = 1;
 
        /* Multiplying factor to the clk_eee_i clock time
         * period to make it closer to 100 ns. This value
index ecf759ee1c9f5e21e2519c416e3d1323cdc0e474..017dbbda0c1c45a9b19e01214dda57beb60a3de0 100644 (file)
@@ -205,7 +205,7 @@ static const struct pci_device_id loongson_dwmac_id_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table);
 
-struct pci_driver loongson_dwmac_driver = {
+static struct pci_driver loongson_dwmac_driver = {
        .name = "dwmac-loongson-pci",
        .id_table = loongson_dwmac_id_table,
        .probe = loongson_dwmac_probe,
index b7c2579c963b68a72190a9b43f60dd852614858b..6b447d8f0bd8a3fb25dc3076d8ec5f455512f640 100644 (file)
@@ -18,9 +18,6 @@
 
 #include "altr_tse_pcs.h"
 
-#define SGMII_ADAPTER_CTRL_REG                          0x00
-#define SGMII_ADAPTER_DISABLE                           0x0001
-
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
@@ -62,14 +59,13 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
 {
        struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
        void __iomem *splitter_base = dwmac->splitter_base;
-       void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
        void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
        struct device *dev = dwmac->dev;
        struct net_device *ndev = dev_get_drvdata(dev);
        struct phy_device *phy_dev = ndev->phydev;
        u32 val;
 
-       if ((tse_pcs_base) && (sgmii_adapter_base))
+       if (sgmii_adapter_base)
                writew(SGMII_ADAPTER_DISABLE,
                       sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
 
@@ -93,8 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
                writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
        }
 
-       if (tse_pcs_base && sgmii_adapter_base)
+       if (phy_dev && sgmii_adapter_base) {
+               writew(SGMII_ADAPTER_ENABLE,
+                      sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
                tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
+       }
 }
 
 static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
index f86cc83003f2dccfdb91f7a6c63875623e83b7f0..f834472599f75cceded3275d15edca72203959cf 100644 (file)
@@ -907,6 +907,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv)
 
        ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn,
                            &gmac->mux_handle, priv, priv->mii);
+       of_node_put(mdio_mux);
        return ret;
 }
 
index 22fea0f67245c7f90d478c61cdaaaa63ac51ceb6..92d32940aff00660663e709fd8b7196500e96e16 100644 (file)
@@ -71,9 +71,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
        writel(value, ioaddr + PTP_TCR);
 
        /* wait for present system time initialize to complete */
-       return readl_poll_timeout(ioaddr + PTP_TCR, value,
+       return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value,
                                 !(value & PTP_TCR_TSINIT),
-                                10000, 100000);
+                                10, 100000);
 }
 
 static int config_addend(void __iomem *ioaddr, u32 addend)
index 4a4b3651ab3e2aa12539dde380ca06f9a2868b83..2525a80353b70a68dda4a0c1f846d2378c5f92b7 100644 (file)
@@ -7021,7 +7021,7 @@ int stmmac_dvr_probe(struct device *device,
                dev_info(priv->device, "TSO feature enabled\n");
        }
 
-       if (priv->dma_cap.sphen) {
+       if (priv->dma_cap.sphen && !priv->plat->sph_disable) {
                ndev->hw_features |= NETIF_F_GRO;
                priv->sph_cap = true;
                priv->sph = priv->sph_cap;
index fcf17d8a0494b7a464e2e704bf049d44738640fe..644bb54f5f0204bc117e112810603eccef1a1b7b 100644 (file)
@@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
                return -ENOMEM;
 
        /* Enable pci device */
-       ret = pci_enable_device(pdev);
+       ret = pcim_enable_device(pdev);
        if (ret) {
                dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
                        __func__);
@@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev)
                pcim_iounmap_regions(pdev, BIT(i));
                break;
        }
-
-       pci_disable_device(pdev);
 }
 
 static int __maybe_unused stmmac_pci_suspend(struct device *dev)
index 5d29f336315b79d87844af9f4c6fa3f71803eecd..11e1055e8260f49962ca88271337eb022b3fa3c5 100644 (file)
@@ -431,8 +431,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
        plat->phylink_node = np;
 
        /* Get max speed of operation from device tree */
-       if (of_property_read_u32(np, "max-speed", &plat->max_speed))
-               plat->max_speed = -1;
+       of_property_read_u32(np, "max-speed", &plat->max_speed);
 
        plat->bus_id = of_alias_get_id(np, "ethernet");
        if (plat->bus_id < 0)
index bd4b1528cf99258d52ea70864d89b411f79e4a36..79e850fe4621c0a9e466f0bb4bb816f267df3658 100644 (file)
@@ -1246,8 +1246,10 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
        data->slave_data = devm_kcalloc(dev, CPSW_SLAVE_PORTS_NUM,
                                        sizeof(struct cpsw_slave_data),
                                        GFP_KERNEL);
-       if (!data->slave_data)
+       if (!data->slave_data) {
+               of_node_put(tmp_node);
                return -ENOMEM;
+       }
 
        /* Populate all the child nodes here...
         */
@@ -1341,6 +1343,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
 
 err_node_put:
        of_node_put(port_np);
+       of_node_put(tmp_node);
        return ret;
 }
 
index 0f9c88dd1a4a4035c19728a0bc4c189dc8616b8d..d5c1e5c4a50858905ebe1bdafd67a0e678f849c4 100644 (file)
@@ -433,8 +433,6 @@ struct axienet_local {
        struct net_device *ndev;
        struct device *dev;
 
-       struct device_node *phy_node;
-
        struct phylink *phylink;
        struct phylink_config phylink_config;
 
index c7eb05e4a6bf40f20cdae60e3444f4ac83dd9771..d6fc3f7acdf0d666b4a4e4e7bf6593d6f621f776 100644 (file)
@@ -2064,25 +2064,33 @@ static int axienet_probe(struct platform_device *pdev)
        if (ret)
                goto cleanup_clk;
 
-       lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
-       if (lp->phy_node) {
-               ret = axienet_mdio_setup(lp);
-               if (ret)
-                       dev_warn(&pdev->dev,
-                                "error registering MDIO bus: %d\n", ret);
-       }
+       ret = axienet_mdio_setup(lp);
+       if (ret)
+               dev_warn(&pdev->dev,
+                        "error registering MDIO bus: %d\n", ret);
+
        if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII ||
            lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) {
-               if (!lp->phy_node) {
-                       dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n");
+               np = of_parse_phandle(pdev->dev.of_node, "pcs-handle", 0);
+               if (!np) {
+                       /* Deprecated: Always use "pcs-handle" for pcs_phy.
+                        * Falling back to "phy-handle" here is only for
+                        * backward compatibility with old device trees.
+                        */
+                       np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+               }
+               if (!np) {
+                       dev_err(&pdev->dev, "pcs-handle (preferred) or phy-handle required for 1000BaseX/SGMII\n");
                        ret = -EINVAL;
                        goto cleanup_mdio;
                }
-               lp->pcs_phy = of_mdio_find_device(lp->phy_node);
+               lp->pcs_phy = of_mdio_find_device(np);
                if (!lp->pcs_phy) {
                        ret = -EPROBE_DEFER;
+                       of_node_put(np);
                        goto cleanup_mdio;
                }
+               of_node_put(np);
                lp->pcs.ops = &axienet_pcs_ops;
                lp->pcs.poll = true;
        }
@@ -2125,8 +2133,6 @@ cleanup_mdio:
                put_device(&lp->pcs_phy->dev);
        if (lp->mii_bus)
                axienet_mdio_teardown(lp);
-       of_node_put(lp->phy_node);
-
 cleanup_clk:
        clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
        clk_disable_unprepare(lp->axi_clk);
@@ -2155,9 +2161,6 @@ static int axienet_remove(struct platform_device *pdev)
        clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
        clk_disable_unprepare(lp->axi_clk);
 
-       of_node_put(lp->phy_node);
-       lp->phy_node = NULL;
-
        free_netdev(ndev);
 
        return 0;
index 57a24f62e353d95cad3b791eae49e3eda9b4cb77..d770b3ac3f74fb2fdb7f691de0f989c7daba9f11 100644 (file)
@@ -823,10 +823,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
 static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
 {
        struct mii_bus *bus;
-       int rc;
        struct resource res;
        struct device_node *np = of_get_parent(lp->phy_node);
        struct device_node *npp;
+       int rc, ret;
 
        /* Don't register the MDIO bus if the phy_node or its parent node
         * can't be found.
@@ -836,8 +836,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
                return -ENODEV;
        }
        npp = of_get_parent(np);
-
-       of_address_to_resource(npp, 0, &res);
+       ret = of_address_to_resource(npp, 0, &res);
+       of_node_put(npp);
+       if (ret) {
+               dev_err(dev, "%s resource error!\n",
+                       dev->of_node->full_name);
+               of_node_put(np);
+               return ret;
+       }
        if (lp->ndev->mem_start != res.start) {
                struct phy_device *phydev;
                phydev = of_phy_find_device(lp->phy_node);
@@ -846,6 +852,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
                                 "MDIO of the phy is not registered yet\n");
                else
                        put_device(&phydev->mdio.dev);
+               of_node_put(np);
                return 0;
        }
 
@@ -858,6 +865,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
        bus = mdiobus_alloc();
        if (!bus) {
                dev_err(dev, "Failed to allocate mdiobus\n");
+               of_node_put(np);
                return -ENOMEM;
        }
 
@@ -870,6 +878,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
        bus->parent = dev;
 
        rc = of_mdiobus_register(bus, np);
+       of_node_put(np);
        if (rc) {
                dev_err(dev, "Failed to register mdio bus.\n");
                goto err_register;
@@ -926,8 +935,6 @@ static int xemaclite_open(struct net_device *dev)
        xemaclite_disable_interrupts(lp);
 
        if (lp->phy_node) {
-               u32 bmcr;
-
                lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
                                             xemaclite_adjust_link, 0,
                                             PHY_INTERFACE_MODE_MII);
@@ -938,19 +945,6 @@ static int xemaclite_open(struct net_device *dev)
 
                /* EmacLite doesn't support giga-bit speeds */
                phy_set_max_speed(lp->phy_dev, SPEED_100);
-
-               /* Don't advertise 1000BASE-T Full/Half duplex speeds */
-               phy_write(lp->phy_dev, MII_CTRL1000, 0);
-
-               /* Advertise only 10 and 100mbps full/half duplex speeds */
-               phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL |
-                         ADVERTISE_CSMA);
-
-               /* Restart auto negotiation */
-               bmcr = phy_read(lp->phy_dev, MII_BMCR);
-               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
-               phy_write(lp->phy_dev, MII_BMCR, bmcr);
-
                phy_start(lp->phy_dev);
        }
 
index 16105292b140bbc44c97cd9370c4f6db163dea2d..74e845fa2e07ed8e2430c57c2d24b830be361be8 100644 (file)
@@ -1355,7 +1355,9 @@ static int rr_close(struct net_device *dev)
 
        rrpriv->fw_running = 0;
 
+       spin_unlock_irqrestore(&rrpriv->lock, flags);
        del_timer_sync(&rrpriv->timer);
+       spin_lock_irqsave(&rrpriv->lock, flags);
 
        writel(0, &regs->TxPi);
        writel(0, &regs->IpRxPi);
index bc981043cc808287608b271f7b0cf8a421ed3966..a701178a1d139a98ecf643d0c7d133fe2f99020a 100644 (file)
@@ -1367,9 +1367,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
        struct gsi_event *event_done;
        struct gsi_event *event;
        struct gsi_trans *trans;
+       u32 trans_count = 0;
        u32 byte_count = 0;
-       u32 old_index;
        u32 event_avail;
+       u32 old_index;
 
        trans_info = &channel->trans_info;
 
@@ -1390,6 +1391,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
        do {
                trans->len = __le16_to_cpu(event->len);
                byte_count += trans->len;
+               trans_count++;
 
                /* Move on to the next event and transaction */
                if (--event_avail)
@@ -1401,7 +1403,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
 
        /* We record RX bytes when they are received */
        channel->byte_count += byte_count;
-       channel->trans_count++;
+       channel->trans_count += trans_count;
 }
 
 /* Initialize a ring, including allocating DMA memory for its entries */
index 888e94278a84fa1b6724560cc420b7558779cd59..cea7b2e2ce969831c9a351c6d2b30bd0edf23b0d 100644 (file)
@@ -1150,13 +1150,12 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
                return;
 
        skb = __dev_alloc_skb(len, GFP_ATOMIC);
-       if (!skb)
-               return;
-
-       /* Copy the data into the socket buffer and receive it */
-       skb_put(skb, len);
-       memcpy(skb->data, data, len);
-       skb->truesize += extra;
+       if (skb) {
+               /* Copy the data into the socket buffer and receive it */
+               skb_put(skb, len);
+               memcpy(skb->data, data, len);
+               skb->truesize += extra;
+       }
 
        ipa_modem_skb_rx(endpoint->netdev, skb);
 }
index 90f3aec55b365735051400f322cabc42e77155d8..ec010cf2e816a9f46336a66a3d52f4bb827c20fd 100644 (file)
@@ -125,7 +125,7 @@ static void ipa_qmi_indication(struct ipa_qmi *ipa_qmi)
  */
 static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi)
 {
-       struct ipa *ipa = container_of(ipa_qmi, struct ipa, qmi);
+       struct ipa *ipa;
        int ret;
 
        /* We aren't ready until the modem and microcontroller are */
index 069e8824c264adbb712ed47d24eafd3de43571fe..b00bc8173abea8478954adc5d632e10cb8da7aef 100644 (file)
@@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
                        return RX_HANDLER_CONSUMED;
                *pskb = skb;
                eth = eth_hdr(skb);
-               if (macvlan_forward_source(skb, port, eth->h_source))
+               if (macvlan_forward_source(skb, port, eth->h_source)) {
+                       kfree_skb(skb);
                        return RX_HANDLER_CONSUMED;
+               }
                src = macvlan_hash_lookup(port, eth->h_source);
                if (src && src->mode != MACVLAN_MODE_VEPA &&
                    src->mode != MACVLAN_MODE_BRIDGE) {
@@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
                return RX_HANDLER_PASS;
        }
 
-       if (macvlan_forward_source(skb, port, eth->h_source))
+       if (macvlan_forward_source(skb, port, eth->h_source)) {
+               kfree_skb(skb);
                return RX_HANDLER_CONSUMED;
+       }
        if (macvlan_passthru(port))
                vlan = list_first_or_null_rcu(&port->vlans,
                                              struct macvlan_dev, list);
index baf7afac7857e308025f7256f9b5ed4428a5e5b2..53846c6b56ca2fc3b631d875e2aa02156140ddc4 100644 (file)
@@ -553,7 +553,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
        hdr->source_slave = ((llsrc << 1) & 0xff) | 0x01;
        mhdr->ver = 0x01;
 
-       return 0;
+       return sizeof(struct mctp_i2c_hdr);
 }
 
 static int mctp_i2c_tx_thread(void *data)
index 1becb1a731f67555fc44e099bd90ee432086203c..1c1584fca63277a0d6b286e055a72678fa9677fd 100644 (file)
@@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
        int rc;
 
        rc = fwnode_irq_get(child, 0);
+       /* Don't wait forever if the IRQ provider doesn't become available,
+        * just fall back to poll mode
+        */
+       if (rc == -EPROBE_DEFER)
+               rc = driver_deferred_probe_check_state(&phy->mdio.dev);
        if (rc == -EPROBE_DEFER)
                return rc;
 
index c483ba67c21f193747a539df87ea23002150b944..582969751b4cfecdf76ed1f556a4b0b777f20257 100644 (file)
@@ -102,6 +102,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
        u32 val;
        int ret;
 
+       if (regnum & MII_ADDR_C45)
+               return -EOPNOTSUPP;
+
        ret = mscc_miim_wait_pending(bus);
        if (ret)
                goto out;
@@ -145,6 +148,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id,
        struct mscc_miim_dev *miim = bus->priv;
        int ret;
 
+       if (regnum & MII_ADDR_C45)
+               return -EOPNOTSUPP;
+
        ret = mscc_miim_wait_pending(bus);
        if (ret < 0)
                goto out;
index 6dcbf987d61b5fc93567b4a2b4d39a3b4f9840ce..8b444a8eb6b55601d0b3e04be8465b54f49abeb2 100644 (file)
@@ -115,7 +115,7 @@ static int bcm6368_mdiomux_probe(struct platform_device *pdev)
        md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
        if (!md->mii_bus) {
                dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
-               return ENOMEM;
+               return -ENOMEM;
        }
 
        bus = md->mii_bus;
index b6fea119fe137e978aebd20cef4bd3870294a2bb..2b7d0720720b6b2aa38d28139e6db013909beb99 100644 (file)
@@ -880,7 +880,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev)
 
        cssr1 = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_CSSR1);
        if (cssr1 < 0)
-               return val;
+               return cssr1;
 
        /* If the link settings are not resolved, mark the link down */
        if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) {
index 19b11e896460f46f1fdf89b0c4392e7d83f4e436..cd9aa353b653f62a4735c0a51bdf4db2d06986ca 100644 (file)
 #define PTP_TIMESTAMP_EN_PDREQ_                        BIT(2)
 #define PTP_TIMESTAMP_EN_PDRES_                        BIT(3)
 
-#define PTP_RX_LATENCY_1000                    0x0224
-#define PTP_TX_LATENCY_1000                    0x0225
-
-#define PTP_RX_LATENCY_100                     0x0222
-#define PTP_TX_LATENCY_100                     0x0223
-
-#define PTP_RX_LATENCY_10                      0x0220
-#define PTP_TX_LATENCY_10                      0x0221
-
 #define PTP_TX_PARSE_L2_ADDR_EN                        0x0284
 #define PTP_RX_PARSE_L2_ADDR_EN                        0x0244
 
@@ -268,15 +259,6 @@ struct lan8814_ptp_rx_ts {
        u16 seq_id;
 };
 
-struct kszphy_latencies {
-       u16 rx_10;
-       u16 tx_10;
-       u16 rx_100;
-       u16 tx_100;
-       u16 rx_1000;
-       u16 tx_1000;
-};
-
 struct kszphy_ptp_priv {
        struct mii_timestamper mii_ts;
        struct phy_device *phydev;
@@ -296,7 +278,6 @@ struct kszphy_ptp_priv {
 
 struct kszphy_priv {
        struct kszphy_ptp_priv ptp_priv;
-       struct kszphy_latencies latencies;
        const struct kszphy_type *type;
        int led_mode;
        bool rmii_ref_clk_sel;
@@ -304,14 +285,6 @@ struct kszphy_priv {
        u64 stats[ARRAY_SIZE(kszphy_hw_stats)];
 };
 
-static struct kszphy_latencies lan8814_latencies = {
-       .rx_10          = 0x22AA,
-       .tx_10          = 0x2E4A,
-       .rx_100         = 0x092A,
-       .tx_100         = 0x02C1,
-       .rx_1000        = 0x01AD,
-       .tx_1000        = 0x00C9,
-};
 static const struct kszphy_type ksz8021_type = {
        .led_mode_reg           = MII_KSZPHY_CTRL_2,
        .has_broadcast_disable  = true,
@@ -1770,7 +1743,7 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev,
 
 static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr)
 {
-       u32 data;
+       int data;
 
        phy_lock_mdio_bus(phydev);
        __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
@@ -2471,8 +2444,7 @@ static int lan8804_config_init(struct phy_device *phydev)
 
 static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
 {
-       u16 tsu_irq_status;
-       int irq_status;
+       int irq_status, tsu_irq_status;
 
        irq_status = phy_read(phydev, LAN8814_INTS);
        if (irq_status > 0 && (irq_status & LAN8814_INT_LINK))
@@ -2618,55 +2590,6 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev)
        return 0;
 }
 
-static int lan8814_read_status(struct phy_device *phydev)
-{
-       struct kszphy_priv *priv = phydev->priv;
-       struct kszphy_latencies *latencies = &priv->latencies;
-       int err;
-       int regval;
-
-       err = genphy_read_status(phydev);
-       if (err)
-               return err;
-
-       switch (phydev->speed) {
-       case SPEED_1000:
-               lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_1000,
-                                     latencies->rx_1000);
-               lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_1000,
-                                     latencies->tx_1000);
-               break;
-       case SPEED_100:
-               lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_100,
-                                     latencies->rx_100);
-               lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_100,
-                                     latencies->tx_100);
-               break;
-       case SPEED_10:
-               lanphy_write_page_reg(phydev, 5, PTP_RX_LATENCY_10,
-                                     latencies->rx_10);
-               lanphy_write_page_reg(phydev, 5, PTP_TX_LATENCY_10,
-                                     latencies->tx_10);
-               break;
-       default:
-               break;
-       }
-
-       /* Make sure the PHY is not broken. Read idle error count,
-        * and reset the PHY if it is maxed out.
-        */
-       regval = phy_read(phydev, MII_STAT1000);
-       if ((regval & 0xFF) == 0xFF) {
-               phy_init_hw(phydev);
-               phydev->link = 0;
-               if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
-                       phydev->drv->config_intr(phydev);
-               return genphy_config_aneg(phydev);
-       }
-
-       return 0;
-}
-
 static int lan8814_config_init(struct phy_device *phydev)
 {
        int val;
@@ -2690,30 +2613,8 @@ static int lan8814_config_init(struct phy_device *phydev)
        return 0;
 }
 
-static void lan8814_parse_latency(struct phy_device *phydev)
-{
-       const struct device_node *np = phydev->mdio.dev.of_node;
-       struct kszphy_priv *priv = phydev->priv;
-       struct kszphy_latencies *latency = &priv->latencies;
-       u32 val;
-
-       if (!of_property_read_u32(np, "lan8814,latency_rx_10", &val))
-               latency->rx_10 = val;
-       if (!of_property_read_u32(np, "lan8814,latency_tx_10", &val))
-               latency->tx_10 = val;
-       if (!of_property_read_u32(np, "lan8814,latency_rx_100", &val))
-               latency->rx_100 = val;
-       if (!of_property_read_u32(np, "lan8814,latency_tx_100", &val))
-               latency->tx_100 = val;
-       if (!of_property_read_u32(np, "lan8814,latency_rx_1000", &val))
-               latency->rx_1000 = val;
-       if (!of_property_read_u32(np, "lan8814,latency_tx_1000", &val))
-               latency->tx_1000 = val;
-}
-
 static int lan8814_probe(struct phy_device *phydev)
 {
-       const struct device_node *np = phydev->mdio.dev.of_node;
        struct kszphy_priv *priv;
        u16 addr;
        int err;
@@ -2724,13 +2625,10 @@ static int lan8814_probe(struct phy_device *phydev)
 
        priv->led_mode = -1;
 
-       priv->latencies = lan8814_latencies;
-
        phydev->priv = priv;
 
        if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) ||
-           !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING) ||
-           of_property_read_bool(np, "lan8814,ignore-ts"))
+           !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING))
                return 0;
 
        /* Strap-in value for PHY address, below register read gives starting
@@ -2746,7 +2644,6 @@ static int lan8814_probe(struct phy_device *phydev)
                        return err;
        }
 
-       lan8814_parse_latency(phydev);
        lan8814_ptp_init(phydev);
 
        return 0;
@@ -2759,6 +2656,7 @@ static struct phy_driver ksphy_driver[] = {
        .name           = "Micrel KS8737",
        /* PHY_BASIC_FEATURES */
        .driver_data    = &ks8737_type,
+       .probe          = kszphy_probe,
        .config_init    = kszphy_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
@@ -2884,8 +2782,8 @@ static struct phy_driver ksphy_driver[] = {
        .config_init    = ksz8061_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
-       .suspend        = kszphy_suspend,
-       .resume         = kszphy_resume,
+       .suspend        = genphy_suspend,
+       .resume         = genphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ9021,
        .phy_id_mask    = 0x000ffffe,
@@ -2928,7 +2826,7 @@ static struct phy_driver ksphy_driver[] = {
        .config_init    = lan8814_config_init,
        .probe          = lan8814_probe,
        .soft_reset     = genphy_soft_reset,
-       .read_status    = lan8814_read_status,
+       .read_status    = ksz9031_read_status,
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
index 389df3f4293c8bb0becdc30dc567e706dcbcc671..c2c0e361fd3d7af5eb379a08912df95caae2c27c 100644 (file)
@@ -706,7 +706,6 @@ static int lan87xx_read_status(struct phy_device *phydev)
 static int lan87xx_config_aneg(struct phy_device *phydev)
 {
        u16 ctl = 0;
-       int rc;
 
        switch (phydev->master_slave_set) {
        case MASTER_SLAVE_CFG_MASTER_FORCE:
@@ -722,11 +721,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev)
                return -EOPNOTSUPP;
        }
 
-       rc = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl);
-       if (rc == 1)
-               rc = genphy_soft_reset(phydev);
-
-       return rc;
+       return phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl);
 }
 
 static struct phy_driver microchip_t1_phy_driver[] = {
@@ -748,6 +743,7 @@ static struct phy_driver microchip_t1_phy_driver[] = {
        {
                PHY_ID_MATCH_MODEL(PHY_ID_LAN937X),
                .name           = "Microchip LAN937x T1",
+               .flags          = PHY_POLL_CABLE_TEST,
                .features       = PHY_BASIC_T1_FEATURES,
                .config_init    = lan87xx_config_init,
                .suspend        = genphy_suspend,
index beb2b66da13246db6f668ae65f2037a693b44ff1..f122026c4682674f7e7b0c1c0832972339483341 100644 (file)
@@ -970,8 +970,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 {
        struct phy_device *phydev = phy_dat;
        struct phy_driver *drv = phydev->drv;
+       irqreturn_t ret;
 
-       return drv->handle_interrupt(phydev);
+       mutex_lock(&phydev->lock);
+       ret = drv->handle_interrupt(phydev);
+       mutex_unlock(&phydev->lock);
+
+       return ret;
 }
 
 /**
index 4dfb79807823dae1576cb9f8b14cd8fc14f0ec82..9a5d5a10560fb135d5b7a547795dfcdce6e94377 100644 (file)
@@ -250,6 +250,7 @@ struct sfp {
        struct sfp_eeprom_id id;
        unsigned int module_power_mW;
        unsigned int module_t_start_up;
+       bool tx_fault_ignore;
 
 #if IS_ENABLED(CONFIG_HWMON)
        struct sfp_diag diag;
@@ -1956,6 +1957,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
        else
                sfp->module_t_start_up = T_START_UP;
 
+       if (!memcmp(id.base.vendor_name, "HUAWEI          ", 16) &&
+           !memcmp(id.base.vendor_pn, "MA5671A         ", 16))
+               sfp->tx_fault_ignore = true;
+       else
+               sfp->tx_fault_ignore = false;
+
        return 0;
 }
 
@@ -2409,7 +2416,10 @@ static void sfp_check_state(struct sfp *sfp)
        mutex_lock(&sfp->st_mutex);
        state = sfp_get_state(sfp);
        changed = state ^ sfp->state;
-       changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
+       if (sfp->tx_fault_ignore)
+               changed &= SFP_F_PRESENT | SFP_F_LOS;
+       else
+               changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
 
        for (i = 0; i < GPIO_MAX; i++)
                if (changed & BIT(i))
index 3619520340b746faba44a5dd86ef2bea9665b178..e172743948ed777d4672b85f3584e1f03ba3bd53 100644 (file)
@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
        path->encap.proto = htons(ETH_P_PPP_SES);
        path->encap.id = be16_to_cpu(po->num);
        memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+       memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
        path->dev = ctx->dev;
        ctx->dev = dev;
 
index 88396ff99f03f72313ffacfc9efd2a8a715bf657..6865d32270e5d0c4fc26e284e27182efb4cbc83e 100644 (file)
@@ -469,7 +469,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue)
        spin_lock(&sl->lock);
 
        if (netif_queue_stopped(dev)) {
-               if (!netif_running(dev))
+               if (!netif_running(dev) || !sl->tty)
                        goto out;
 
                /* May be we must check transmitter timeout here ?
index 276a0e42ca8eaa85a96b366ab56f5abda3ff7a27..dbe4c0a4be2cd626824604e75848b4f9f2859a49 100644 (file)
@@ -1124,7 +1124,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /* NETIF_F_LLTX requires to do our own update of trans_start */
        queue = netdev_get_tx_queue(dev, txq);
-       queue->trans_start = jiffies;
+       txq_trans_cond_update(queue);
 
        /* Notify and wake up reader process */
        if (tfile->flags & TUN_FASYNC)
index ea06d10e1c21a60fd2c4ebc572d74479a8bbf712..ca409d450a29620e36a013637147fda6247eff92 100644 (file)
@@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
        if (start_of_descs != desc_offset)
                goto err;
 
-       /* self check desc_offset from header*/
-       if (desc_offset >= skb_len)
+       /* self check desc_offset from header and make sure that the
+        * bounds of the metadata array are inside the SKB
+        */
+       if (pkt_count * 2 + desc_offset >= skb_len)
                goto err;
 
+       /* Packets must not overlap the metadata array */
+       skb_trim(skb, desc_offset);
+
        if (pkt_count == 0)
                goto err;
 
index 1b5714926d816dbf83c59745f0d7bca9935b229c..eb0121a64d6d2ecbc9b868428e9a042094b10013 100644 (file)
@@ -320,7 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 
        rcu_read_lock();
        rcv = rcu_dereference(priv->peer);
-       if (unlikely(!rcv)) {
+       if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
                kfree_skb(skb);
                goto drop;
        }
index 87838cbe38cf6cced970ea075b328c511fc0a6dd..cbba9d2e8f322155f1add23d85564c14fecfe5b2 100644 (file)
@@ -1005,6 +1005,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                         * xdp.data_meta were adjusted
                         */
                        len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
+
+                       /* recalculate headroom if xdp.data or xdp_data_meta
+                        * were adjusted, note that offset should always point
+                        * to the start of the reserved bytes for virtio_net
+                        * header which are followed by xdp.data, that means
+                        * that offset is equal to the headroom (when buf is
+                        * starting at the beginning of the page, otherwise
+                        * there is a base offset inside the page) but it's used
+                        * with a different starting point (buf start) than
+                        * xdp.data (buf start + vnet hdr size). If xdp.data or
+                        * data_meta were adjusted by the xdp prog then the
+                        * headroom size has changed and so has the offset, we
+                        * can use data_hard_start, which points at buf start +
+                        * vnet hdr size, to calculate the new headroom and use
+                        * it later to compute buf start in page_to_skb()
+                        */
+                       headroom = xdp.data - xdp.data_hard_start - metasize;
+
                        /* We can only create skb based on xdp_page. */
                        if (unlikely(xdp_page != page)) {
                                rcu_read_unlock();
@@ -1012,7 +1030,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                head_skb = page_to_skb(vi, rq, xdp_page, offset,
                                                       len, PAGE_SIZE, false,
                                                       metasize,
-                                                      VIRTIO_XDP_HEADROOM);
+                                                      headroom);
                                return head_skb;
                        }
                        break;
index d9d90baac72a2a783f7f14411d462ab4cf937ff8..93e8d119d45f6b88f6237425a20497127bef2d1a 100644 (file)
@@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        dev_kfree_skb_any(rbi->skb);
+                                       rbi->skb = NULL;
                                        rq->stats.rx_buf_alloc_failure++;
                                        break;
                                }
@@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        put_page(rbi->page);
+                                       rbi->page = NULL;
                                        rq->stats.rx_buf_alloc_failure++;
                                        break;
                                }
@@ -1666,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
        u32 i, ring_idx;
        struct Vmxnet3_RxDesc *rxd;
 
+       /* ring has already been cleaned up */
+       if (!rq->rx_ring[0].base)
+               return;
+
        for (ring_idx = 0; ring_idx < 2; ring_idx++) {
                for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
 #ifdef __BIG_ENDIAN_BITFIELD
index 85e362461d71358a0ff685b9c9dd7792e438ced0..cfc30ce4c6e1acec8c152cfb7e292bda54b504ea 100644 (file)
@@ -1265,6 +1265,7 @@ static int vrf_prepare_mac_header(struct sk_buff *skb,
        eth = (struct ethhdr *)skb->data;
 
        skb_reset_mac_header(skb);
+       skb_reset_mac_len(skb);
 
        /* we set the ethernet destination and the source addresses to the
         * address of the VRF device.
@@ -1294,9 +1295,9 @@ static int vrf_prepare_mac_header(struct sk_buff *skb,
  */
 static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
                                       struct net_device *vrf_dev,
-                                      u16 proto)
+                                      u16 proto, struct net_device *orig_dev)
 {
-       if (skb_mac_header_was_set(skb))
+       if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev))
                return 0;
 
        return vrf_prepare_mac_header(skb, vrf_dev, proto);
@@ -1402,6 +1403,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 
        /* if packet is NDISC then keep the ingress interface */
        if (!is_ndisc) {
+               struct net_device *orig_dev = skb->dev;
+
                vrf_rx_stats(vrf_dev, skb->len);
                skb->dev = vrf_dev;
                skb->skb_iif = vrf_dev->ifindex;
@@ -1410,7 +1413,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
                        int err;
 
                        err = vrf_add_mac_header_if_unset(skb, vrf_dev,
-                                                         ETH_P_IPV6);
+                                                         ETH_P_IPV6,
+                                                         orig_dev);
                        if (likely(!err)) {
                                skb_push(skb, skb->mac_len);
                                dev_queue_xmit_nit(skb, vrf_dev);
@@ -1440,6 +1444,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
                                  struct sk_buff *skb)
 {
+       struct net_device *orig_dev = skb->dev;
+
        skb->dev = vrf_dev;
        skb->skb_iif = vrf_dev->ifindex;
        IPCB(skb)->flags |= IPSKB_L3SLAVE;
@@ -1460,7 +1466,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
        if (!list_empty(&vrf_dev->ptype_all)) {
                int err;
 
-               err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP);
+               err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP,
+                                                 orig_dev);
                if (likely(!err)) {
                        skb_push(skb, skb->mac_len);
                        dev_queue_xmit_nit(skb, vrf_dev);
index de97ff98d36e949194b741f5e88b23255a07d43c..8a5e3a6d32d7ce0dec79e3061f6d356d014f47cf 100644 (file)
@@ -651,11 +651,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 
        rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
        if (rd == NULL)
-               return -ENOBUFS;
+               return -ENOMEM;
 
        if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
                kfree(rd);
-               return -ENOBUFS;
+               return -ENOMEM;
        }
 
        rd->remote_ip = *ip;
index 23d2954d97475a551192fee9085a11281896a672..1e5672019922fc2b2d192427ea033123b6f58b9a 100644 (file)
@@ -349,7 +349,7 @@ static int __init cosa_init(void)
                }
        } else {
                cosa_major = register_chrdev(0, "cosa", &cosa_fops);
-               if (!cosa_major) {
+               if (cosa_major < 0) {
                        pr_warn("unable to register chardev\n");
                        err = -EIO;
                        goto out;
index 0fad1331303c0240c497e21a420c9340ab4153fb..aa9a7a5970fda6fbf51b9748f4386b6ce1542a56 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
 #include <linux/suspend.h>
+#include <net/dst_metadata.h>
 #include <net/icmp.h>
 #include <net/rtnetlink.h>
 #include <net/ip_tunnels.h>
@@ -167,7 +168,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
                goto err_peer;
        }
 
-       mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+       mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
        __skb_queue_head_init(&packets);
        if (!skb_is_gso(skb)) {
index 63e1c2d783c5fe29ca030ac1f2f497d658b5dac9..73693c66cef12182a4cadcb533a3cfcb6b8f5d9c 100644 (file)
@@ -1633,7 +1633,7 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar)
                return;
        }
 
-       ret = mmc_hw_reset(ar_sdio->func->card->host);
+       ret = mmc_hw_reset(ar_sdio->func->card);
        if (ret)
                ath10k_warn(ar, "unable to reset sdio: %d\n", ret);
 
index 71eb7d04c3bf22f320358a0962a720f557ec0c46..90a5df1fbdbd27219dd30f802b9e8768e9cb13ba 100644 (file)
@@ -1288,6 +1288,7 @@ static void ath11k_core_restart(struct work_struct *work)
 
                ieee80211_stop_queues(ar->hw);
                ath11k_mac_drain_tx(ar);
+               complete(&ar->completed_11d_scan);
                complete(&ar->scan.started);
                complete(&ar->scan.completed);
                complete(&ar->peer_assoc_done);
index c0228e91a596b1f486efe04f0d1cc7548675205f..b8634eddf49aa3c1fc6026f1795e48d904d1bf47 100644 (file)
@@ -38,6 +38,8 @@
 
 extern unsigned int ath11k_frame_mode;
 
+#define ATH11K_SCAN_TIMEOUT_HZ (20 * HZ)
+
 #define ATH11K_MON_TIMER_INTERVAL  10
 
 enum ath11k_supported_bw {
@@ -189,6 +191,12 @@ enum ath11k_scan_state {
        ATH11K_SCAN_ABORTING,
 };
 
+enum ath11k_11d_state {
+       ATH11K_11D_IDLE,
+       ATH11K_11D_PREPARING,
+       ATH11K_11D_RUNNING,
+};
+
 enum ath11k_dev_flags {
        ATH11K_CAC_RUNNING,
        ATH11K_FLAG_CORE_REGISTERED,
@@ -607,9 +615,8 @@ struct ath11k {
        bool dfs_block_radar_events;
        struct ath11k_thermal thermal;
        u32 vdev_id_11d_scan;
-       struct completion finish_11d_scan;
-       struct completion finish_11d_ch_list;
-       bool pending_11d;
+       struct completion completed_11d_scan;
+       enum ath11k_11d_state state_11d;
        bool regdom_set_by_user;
        int hw_rate_code;
        u8 twt_enabled;
index d5b83f90d27a1c1751f43855658b7c0d364de994..58ff761393db19803acbe8dd640c797c30bb6d68 100644 (file)
@@ -3136,6 +3136,20 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
                        arvif->do_not_send_tmpl = true;
                else
                        arvif->do_not_send_tmpl = false;
+
+               if (vif->bss_conf.he_support) {
+                       ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
+                                                           WMI_VDEV_PARAM_BA_MODE,
+                                                           WMI_BA_MODE_BUFFER_SIZE_256);
+                       if (ret)
+                               ath11k_warn(ar->ab,
+                                           "failed to set BA BUFFER SIZE 256 for vdev: %d\n",
+                                           arvif->vdev_id);
+                       else
+                               ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
+                                          "Set BA BUFFER SIZE 256 for VDEV: %d\n",
+                                          arvif->vdev_id);
+               }
        }
 
        if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) {
@@ -3171,14 +3185,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
 
                if (arvif->is_up && vif->bss_conf.he_support &&
                    vif->bss_conf.he_oper.params) {
-                       ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
-                                                           WMI_VDEV_PARAM_BA_MODE,
-                                                           WMI_BA_MODE_BUFFER_SIZE_256);
-                       if (ret)
-                               ath11k_warn(ar->ab,
-                                           "failed to set BA BUFFER SIZE 256 for vdev: %d\n",
-                                           arvif->vdev_id);
-
                        param_id = WMI_VDEV_PARAM_HEOPS_0_31;
                        param_value = vif->bss_conf.he_oper.params;
                        ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
@@ -3595,26 +3601,6 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw,
        if (ret)
                goto exit;
 
-       /* Currently the pending_11d=true only happened 1 time while
-        * wlan interface up in ath11k_mac_11d_scan_start(), it is called by
-        * ath11k_mac_op_add_interface(), after wlan interface up,
-        * pending_11d=false always.
-        * If remove below wait, it always happened scan fail and lead connect
-        * fail while wlan interface up, because it has a 11d scan which is running
-        * in firmware, and lead this scan failed.
-        */
-       if (ar->pending_11d) {
-               long time_left;
-               unsigned long timeout = 5 * HZ;
-
-               if (ar->supports_6ghz)
-                       timeout += 5 * HZ;
-
-               time_left = wait_for_completion_timeout(&ar->finish_11d_ch_list, timeout);
-               ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
-                          "mac wait 11d channel list time left %ld\n", time_left);
-       }
-
        memset(&arg, 0, sizeof(arg));
        ath11k_wmi_start_scan_init(ar, &arg);
        arg.vdev_id = arvif->vdev_id;
@@ -3680,6 +3666,10 @@ exit:
                kfree(arg.extraie.ptr);
 
        mutex_unlock(&ar->conf_mutex);
+
+       if (ar->state_11d == ATH11K_11D_PREPARING)
+               ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
+
        return ret;
 }
 
@@ -5808,7 +5798,7 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw)
 
        /* TODO: Do we need to enable ANI? */
 
-       ath11k_reg_update_chan_list(ar);
+       ath11k_reg_update_chan_list(ar, false);
 
        ar->num_started_vdevs = 0;
        ar->num_created_vdevs = 0;
@@ -5875,6 +5865,11 @@ static void ath11k_mac_op_stop(struct ieee80211_hw *hw)
        cancel_work_sync(&ar->ab->update_11d_work);
        cancel_work_sync(&ar->ab->rfkill_work);
 
+       if (ar->state_11d == ATH11K_11D_PREPARING) {
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        spin_lock_bh(&ar->data_lock);
        list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) {
                list_del(&ppdu_stats->list);
@@ -6045,7 +6040,7 @@ static bool ath11k_mac_vif_ap_active_any(struct ath11k_base *ab)
        return false;
 }
 
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id)
 {
        struct wmi_11d_scan_start_params param;
        int ret;
@@ -6073,28 +6068,22 @@ void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
 
        ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac start 11d scan\n");
 
-       if (wait)
-               reinit_completion(&ar->finish_11d_scan);
-
        ret = ath11k_wmi_send_11d_scan_start_cmd(ar, &param);
        if (ret) {
                ath11k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n",
                            vdev_id, ret);
        } else {
                ar->vdev_id_11d_scan = vdev_id;
-               if (wait) {
-                       ar->pending_11d = true;
-                       ret = wait_for_completion_timeout(&ar->finish_11d_scan,
-                                                         5 * HZ);
-                       ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
-                                  "mac 11d scan left time %d\n", ret);
-
-                       if (!ret)
-                               ar->pending_11d = false;
-               }
+               if (ar->state_11d == ATH11K_11D_PREPARING)
+                       ar->state_11d = ATH11K_11D_RUNNING;
        }
 
 fin:
+       if (ar->state_11d == ATH11K_11D_PREPARING) {
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        mutex_unlock(&ar->ab->vdev_id_11d_lock);
 }
 
@@ -6117,12 +6106,15 @@ void ath11k_mac_11d_scan_stop(struct ath11k *ar)
                vdev_id = ar->vdev_id_11d_scan;
 
                ret = ath11k_wmi_send_11d_scan_stop_cmd(ar, vdev_id);
-               if (ret)
+               if (ret) {
                        ath11k_warn(ar->ab,
                                    "failed to stopt 11d scan vdev %d ret: %d\n",
                                    vdev_id, ret);
-               else
+               } else {
                        ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
+                       ar->state_11d = ATH11K_11D_IDLE;
+                       complete(&ar->completed_11d_scan);
+               }
        }
        mutex_unlock(&ar->ab->vdev_id_11d_lock);
 }
@@ -6318,8 +6310,10 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
                        goto err_peer_del;
                }
 
-               ath11k_mac_11d_scan_start(ar, arvif->vdev_id, true);
-
+               if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map)) {
+                       reinit_completion(&ar->completed_11d_scan);
+                       ar->state_11d = ATH11K_11D_PREPARING;
+               }
                break;
        case WMI_VDEV_TYPE_MONITOR:
                set_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
@@ -7184,7 +7178,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
        }
 
        if (arvif->vdev_type == WMI_VDEV_TYPE_STA)
-               ath11k_mac_11d_scan_start(ar, arvif->vdev_id, false);
+               ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
 
        mutex_unlock(&ar->conf_mutex);
 }
@@ -8665,8 +8659,7 @@ int ath11k_mac_allocate(struct ath11k_base *ab)
                ar->monitor_vdev_id = -1;
                clear_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
                ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
-               init_completion(&ar->finish_11d_scan);
-               init_completion(&ar->finish_11d_ch_list);
+               init_completion(&ar->completed_11d_scan);
        }
 
        return 0;
index 0e6c870b09c887679eff6832f2efc1185ff69b5c..29b523af66dd2d731983001665ce1d767676e2ed 100644 (file)
@@ -130,7 +130,7 @@ extern const struct htt_rx_ring_tlv_filter ath11k_mac_mon_status_filter_default;
 #define ATH11K_SCAN_11D_INTERVAL               600000
 #define ATH11K_11D_INVALID_VDEV_ID             0xFFFF
 
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait);
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id);
 void ath11k_mac_11d_scan_stop(struct ath11k *ar);
 void ath11k_mac_11d_scan_stop_all(struct ath11k_base *ab);
 
index 81e11cde31d7b24c5470c94837ad88355cc30779..80a6977713932c18811f2258940cd0a6fc68c489 100644 (file)
@@ -102,7 +102,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request)
        ar->regdom_set_by_user = true;
 }
 
-int ath11k_reg_update_chan_list(struct ath11k *ar)
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait)
 {
        struct ieee80211_supported_band **bands;
        struct scan_chan_list_params *params;
@@ -111,7 +111,32 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
        struct channel_param *ch;
        enum nl80211_band band;
        int num_channels = 0;
-       int i, ret;
+       int i, ret, left;
+
+       if (wait && ar->state_11d != ATH11K_11D_IDLE) {
+               left = wait_for_completion_timeout(&ar->completed_11d_scan,
+                                                  ATH11K_SCAN_TIMEOUT_HZ);
+               if (!left) {
+                       ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                                  "failed to receive 11d scan complete: timed out\n");
+                       ar->state_11d = ATH11K_11D_IDLE;
+               }
+               ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                          "reg 11d scan wait left time %d\n", left);
+       }
+
+       if (wait &&
+           (ar->scan.state == ATH11K_SCAN_STARTING ||
+           ar->scan.state == ATH11K_SCAN_RUNNING)) {
+               left = wait_for_completion_timeout(&ar->scan.completed,
+                                                  ATH11K_SCAN_TIMEOUT_HZ);
+               if (!left)
+                       ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                                  "failed to receive hw scan complete: timed out\n");
+
+               ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+                          "reg hw scan wait left time %d\n", left);
+       }
 
        bands = hw->wiphy->bands;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
@@ -193,11 +218,6 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
        ret = ath11k_wmi_send_scan_chan_list_cmd(ar, params);
        kfree(params);
 
-       if (ar->pending_11d) {
-               complete(&ar->finish_11d_ch_list);
-               ar->pending_11d = false;
-       }
-
        return ret;
 }
 
@@ -263,15 +283,8 @@ int ath11k_regd_update(struct ath11k *ar)
                goto err;
        }
 
-       if (ar->pending_11d)
-               complete(&ar->finish_11d_scan);
-
        rtnl_lock();
        wiphy_lock(ar->hw->wiphy);
-
-       if (ar->pending_11d)
-               reinit_completion(&ar->finish_11d_ch_list);
-
        ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy);
        wiphy_unlock(ar->hw->wiphy);
        rtnl_unlock();
@@ -282,7 +295,7 @@ int ath11k_regd_update(struct ath11k *ar)
                goto err;
 
        if (ar->state == ATH11K_STATE_ON) {
-               ret = ath11k_reg_update_chan_list(ar);
+               ret = ath11k_reg_update_chan_list(ar, true);
                if (ret)
                        goto err;
        }
index 5fb9dc03a74e82a2a76048ca35ee236b950f895b..2f284f26378d1f6df1c0afd957bf844b2cdd9126 100644 (file)
@@ -32,5 +32,5 @@ struct ieee80211_regdomain *
 ath11k_reg_build_regd(struct ath11k_base *ab,
                      struct cur_regulatory_info *reg_info, bool intersect);
 int ath11k_regd_update(struct ath11k *ar);
-int ath11k_reg_update_chan_list(struct ath11k *ar);
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait);
 #endif
index b4f86c45d81f8a30d72d53a9060add05300ff220..2751fe8814df79e79ae4419f04e329702b64ce1c 100644 (file)
@@ -2015,7 +2015,10 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar,
 {
        /* setup commonly used values */
        arg->scan_req_id = 1;
-       arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
+       if (ar->state_11d == ATH11K_11D_PREPARING)
+               arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM;
+       else
+               arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
        arg->dwell_time_active = 50;
        arg->dwell_time_active_2g = 0;
        arg->dwell_time_passive = 150;
@@ -6350,8 +6353,10 @@ static void ath11k_wmi_op_ep_tx_credits(struct ath11k_base *ab)
 static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *skb)
 {
        const struct wmi_11d_new_cc_ev *ev;
+       struct ath11k *ar;
+       struct ath11k_pdev *pdev;
        const void **tb;
-       int ret;
+       int ret, i;
 
        tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
        if (IS_ERR(tb)) {
@@ -6377,6 +6382,13 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s
 
        kfree(tb);
 
+       for (i = 0; i < ab->num_radios; i++) {
+               pdev = &ab->pdevs[i];
+               ar = pdev->ar;
+               ar->state_11d = ATH11K_11D_IDLE;
+               complete(&ar->completed_11d_scan);
+       }
+
        queue_work(ab->workqueue, &ab->update_11d_work);
 
        return 0;
index 98090e40e1cf48a264755999bd5d29966cdfc4b9..e2791d45f5f595b3c0d218a9a4b060e6077e438a 100644 (file)
@@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix)
                        continue;
 
                txinfo = IEEE80211_SKB_CB(bf->bf_mpdu);
-               fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0];
+               fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0];
                if (fi->keyix == keyix)
                        return true;
        }
index d0caf1de2bdec5f57c20f58c69962069fa955937..db83cc4ba810af9a04eb0009ce5992c515fb7775 100644 (file)
@@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb)
 {
        struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
        BUILD_BUG_ON(sizeof(struct ath_frame_info) >
-                    sizeof(tx_info->rate_driver_data));
-       return (struct ath_frame_info *) &tx_info->rate_driver_data[0];
+                    sizeof(tx_info->status.status_driver_data));
+       return (struct ath_frame_info *) &tx_info->status.status_driver_data[0];
 }
 
 static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno)
@@ -2542,6 +2542,16 @@ skip_tx_complete:
        spin_unlock_irqrestore(&sc->tx.txbuflock, flags);
 }
 
+static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info)
+{
+       void *ptr = &tx_info->status;
+
+       memset(ptr + sizeof(tx_info->status.rates), 0,
+              sizeof(tx_info->status) -
+              sizeof(tx_info->status.rates) -
+              sizeof(tx_info->status.status_driver_data));
+}
+
 static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
                             struct ath_tx_status *ts, int nframes, int nbad,
                             int txok)
@@ -2553,6 +2563,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
        struct ath_hw *ah = sc->sc_ah;
        u8 i, tx_rateindex;
 
+       ath_clear_tx_status(tx_info);
+
        if (txok)
                tx_info->status.ack_signal = ts->ts_rssi;
 
@@ -2567,6 +2579,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
        tx_info->status.ampdu_len = nframes;
        tx_info->status.ampdu_ack_len = nframes - nbad;
 
+       tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
+
+       for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
+               tx_info->status.rates[i].count = 0;
+               tx_info->status.rates[i].idx = -1;
+       }
+
        if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 &&
            (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) {
                /*
@@ -2588,16 +2607,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
                        tx_info->status.rates[tx_rateindex].count =
                                hw->max_rate_tries;
        }
-
-       for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
-               tx_info->status.rates[i].count = 0;
-               tx_info->status.rates[i].idx = -1;
-       }
-
-       tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
-
-       /* we report airtime in ath_tx_count_airtime(), don't report twice */
-       tx_info->status.tx_time = 0;
 }
 
 static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq)
index ba3c159111d3155c0475385bfc43e063a303c286..212fbbe1cd7ec4c02460af013e2e32e8c85578b8 100644 (file)
@@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype {
        BRCMF_SDIO_FT_SUB,
 };
 
-#define SDIOD_DRVSTR_KEY(chip, pmu)     (((chip) << 16) | (pmu))
+#define SDIOD_DRVSTR_KEY(chip, pmu)     (((unsigned int)(chip) << 16) | (pmu))
 
 /* SDIO Pad drive strength to select value mappings */
 struct sdiod_drive_str {
@@ -4165,7 +4165,7 @@ static int brcmf_sdio_bus_reset(struct device *dev)
 
        /* reset the adapter */
        sdio_claim_host(sdiodev->func1);
-       mmc_hw_reset(sdiodev->func1->card->host);
+       mmc_hw_reset(sdiodev->func1->card);
        sdio_release_host(sdiodev->func1);
 
        brcmf_bus_change_state(sdiodev->bus_if, BRCMF_BUS_DOWN);
index 866a33f49915f16b7a4f32187ba654e03b33fb26..3237d4b528b5d9ed74ce74ce7c1b3a237d848fc1 100644 (file)
@@ -371,7 +371,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans)
        struct iwl_dbg_tlv_timer_node *node, *tmp;
 
        list_for_each_entry_safe(node, tmp, timer_list, list) {
-               del_timer(&node->timer);
+               del_timer_sync(&node->timer);
                list_del(&node->list);
                kfree(node);
        }
index 28bfa7b7b73c09ab0e6cad98b9587fc7b4bea39f..e9ec63e0e395ba0615092cf7f28eb268604188c3 100644 (file)
@@ -2202,11 +2202,14 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw,
        if (!data->use_chanctx) {
                confbw = data->bw;
        } else {
-               struct ieee80211_chanctx_conf *chanctx_conf =
-                       rcu_dereference(vif->chanctx_conf);
+               struct ieee80211_chanctx_conf *chanctx_conf;
+
+               rcu_read_lock();
+               chanctx_conf = rcu_dereference(vif->chanctx_conf);
 
                if (!WARN_ON(!chanctx_conf))
                        confbw = chanctx_conf->def.width;
+               rcu_read_unlock();
        }
 
        WARN(bw > hwsim_get_chanwidth(confbw),
@@ -2475,11 +2478,13 @@ static void hw_scan_work(struct work_struct *work)
                        if (req->ie_len)
                                skb_put_data(probe, req->ie, req->ie_len);
 
+                       rcu_read_lock();
                        if (!ieee80211_tx_prepare_skb(hwsim->hw,
                                                      hwsim->hw_scan_vif,
                                                      probe,
                                                      hwsim->tmp_chan->band,
                                                      NULL)) {
+                               rcu_read_unlock();
                                kfree_skb(probe);
                                continue;
                        }
@@ -2487,6 +2492,7 @@ static void hw_scan_work(struct work_struct *work)
                        local_bh_disable();
                        mac80211_hwsim_tx_frame(hwsim->hw, probe,
                                                hwsim->tmp_chan);
+                       rcu_read_unlock();
                        local_bh_enable();
                }
        }
index bde9e4bbfffe79d3a4ab7d2beaa68223d75ad26a..4f3238d2a171a7d6a6c7bd92d782c29d13c53195 100644 (file)
@@ -2639,7 +2639,7 @@ static void mwifiex_sdio_card_reset_work(struct mwifiex_adapter *adapter)
 
        /* Run a HW reset of the SDIO interface. */
        sdio_claim_host(func);
-       ret = mmc_hw_reset(func->card->host);
+       ret = mmc_hw_reset(func->card);
        sdio_release_host(func);
 
        switch (ret) {
index 8a22ee5816748c5a9f306508b0d2e57735350e29..df85ebc6e1df07a7c2e48c30efa50cdeec9f993f 100644 (file)
@@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);
 
        /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
-       mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);
+       mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf);
 
        /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
        mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
index 72fc41ac83c0d0a0c9da1bba7b94b98ccaa2b0bf..9140b0163474438025236d76302a6e2614e0e817 100644 (file)
@@ -146,7 +146,7 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue)
         * To guarantee that the SDIO card is power cycled, as required to make
         * the FW programming to succeed, let's do a brute force HW reset.
         */
-       mmc_hw_reset(card->host);
+       mmc_hw_reset(card);
 
        sdio_enable_func(func);
        sdio_release_host(func);
index 2fcf545012b166cafb09dc13aa06031b6f124861..1a5284de4341b41d29a176c8231cd9d8665552f2 100644 (file)
@@ -183,6 +183,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
 {
        struct nci_dev *ndev = priv->ndev;
 
+       nci_unregister_device(ndev);
        if (priv->ndev->nfc_dev->fw_download_in_progress)
                nfcmrvl_fw_dnld_abort(priv);
 
@@ -191,7 +192,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
        if (gpio_is_valid(priv->config.reset_n_io))
                gpio_free(priv->config.reset_n_io);
 
-       nci_unregister_device(ndev);
        nci_free_device(ndev);
        kfree(priv);
 }
index a491db46e3bd468e0c835b6c706a1252ea110987..d9f6367b9993dd4b1880cce6625b8f6e2d985af1 100644 (file)
@@ -2787,13 +2787,14 @@ void pn53x_common_clean(struct pn533 *priv)
 {
        struct pn533_cmd *cmd, *n;
 
+       /* delete the timer before cleanup the worker */
+       del_timer_sync(&priv->listen_timer);
+
        flush_delayed_work(&priv->poll_work);
        destroy_workqueue(priv->wq);
 
        skb_queue_purge(&priv->resp_q);
 
-       del_timer(&priv->listen_timer);
-
        list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) {
                list_del(&cmd->queue);
                kfree(cmd);
index 7d49eb34b348e73fc4552107ae21f1ad39beb9c2..4910543f00ff969515e10c2c8d5d1af378804d1d 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright (c) 2022, Oracle and/or its affiliates
  */
 
-#include <linux/blkdev.h>
 #include "nvme.h"
 
 #ifdef CONFIG_NVME_VERBOSE_ERRORS
@@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
        [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
        [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
        [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
+       [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
        [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
        [NVME_SC_LBA_RANGE] = "LBA Out of Range",
        [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
@@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
        [NVME_SC_COMPARE_FAILED] = "Compare Failure",
        [NVME_SC_ACCESS_DENIED] = "Access Denied",
        [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
+       [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
        [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
        [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
        [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
+       [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
        [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
+       [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
 };
 
 const unsigned char *nvme_get_error_status_str(u16 status)
index efb85c6d8e2d5d723e177116db1986c65f065ab0..72f7c955c7078548403c49471ead75e21c8c8ab4 100644 (file)
@@ -366,7 +366,7 @@ static inline void nvme_end_req(struct request *req)
 {
        blk_status_t status = nvme_error_status(nvme_req(req)->status);
 
-       if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS))
+       if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
                nvme_log_error(req);
        nvme_end_req_zoned(req);
        nvme_trace_bio_complete(req);
@@ -1015,6 +1015,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                        goto out;
        }
 
+       req->rq_flags |= RQF_QUIET;
        ret = nvme_execute_rq(req, at_head);
        if (result && ret >= 0)
                *result = nvme_req(req)->result;
@@ -1206,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
 
        rq->timeout = ctrl->kato * HZ;
        rq->end_io_data = ctrl;
+       rq->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
 }
 
@@ -1287,6 +1289,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
                                 warn_str, cur->nidl);
                        return -1;
                }
+               if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+                       return NVME_NIDT_EUI64_LEN;
                memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
                return NVME_NIDT_EUI64_LEN;
        case NVME_NIDT_NGUID:
@@ -1295,6 +1299,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
                                 warn_str, cur->nidl);
                        return -1;
                }
+               if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+                       return NVME_NIDT_NGUID_LEN;
                memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
                return NVME_NIDT_NGUID_LEN;
        case NVME_NIDT_UUID:
@@ -1303,6 +1309,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
                                 warn_str, cur->nidl);
                        return -1;
                }
+               if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
+                       return NVME_NIDT_UUID_LEN;
                uuid_copy(&ids->uuid, data + sizeof(*cur));
                return NVME_NIDT_UUID_LEN;
        case NVME_NIDT_CSI:
@@ -1399,12 +1407,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
        if ((*id)->ncap == 0) /* namespace not allocated or attached */
                goto out_free_id;
 
-       if (ctrl->vs >= NVME_VS(1, 1, 0) &&
-           !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
-               memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
-       if (ctrl->vs >= NVME_VS(1, 2, 0) &&
-           !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
-               memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
+
+       if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
+               dev_info(ctrl->device,
+                        "Ignoring bogus Namespace Identifiers\n");
+       } else {
+               if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+                   !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+                       memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
+               if (ctrl->vs >= NVME_VS(1, 2, 0) &&
+                   !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+                       memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
+       }
 
        return 0;
 
@@ -1413,6 +1427,32 @@ out_free_id:
        return error;
 }
 
+static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
+                       struct nvme_id_ns_cs_indep **id)
+{
+       struct nvme_command c = {
+               .identify.opcode        = nvme_admin_identify,
+               .identify.nsid          = cpu_to_le32(nsid),
+               .identify.cns           = NVME_ID_CNS_NS_CS_INDEP,
+       };
+       int ret;
+
+       *id = kmalloc(sizeof(**id), GFP_KERNEL);
+       if (!*id)
+               return -ENOMEM;
+
+       ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
+       if (ret) {
+               dev_warn(ctrl->device,
+                        "Identify namespace (CS independent) failed (%d)\n",
+                        ret);
+               kfree(*id);
+               return ret;
+       }
+
+       return 0;
+}
+
 static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
                unsigned int dword11, void *buffer, size_t buflen, u32 *result)
 {
@@ -1608,20 +1648,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
        u32 size = queue_logical_block_size(queue);
 
        if (ctrl->max_discard_sectors == 0) {
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+               blk_queue_max_discard_sectors(queue, 0);
                return;
        }
 
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
 
        /* If discard is already enabled, don't reset queue limits */
-       if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+       if (queue->limits.max_discard_sectors)
                return;
 
+       if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
+               ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+
        blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
        blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
 
@@ -1758,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
        blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
-       blk_queue_dma_alignment(q, 7);
+       blk_queue_dma_alignment(q, 3);
        blk_queue_write_cache(q, vwc, vwc);
 }
 
@@ -2087,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
        .pr_ops         = &nvme_pr_ops,
 };
 
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
 {
-       unsigned long timeout =
-               ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+       unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
        u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
        int ret;
 
@@ -2103,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
                usleep_range(1000, 2000);
                if (fatal_signal_pending(current))
                        return -EINTR;
-               if (time_after(jiffies, timeout)) {
+               if (time_after(jiffies, timeout_jiffies)) {
                        dev_err(ctrl->device,
                                "Device not ready; aborting %s, CSTS=0x%x\n",
                                enabled ? "initialisation" : "reset", csts);
@@ -2134,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
        if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
                msleep(NVME_QUIRK_DELAY_AMOUNT);
 
-       return nvme_wait_ready(ctrl, ctrl->cap, false);
+       return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
 }
 EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
 
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 {
        unsigned dev_page_min;
+       u32 timeout;
        int ret;
 
        ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@@ -2161,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
                ctrl->ctrl_config = NVME_CC_CSS_CSI;
        else
                ctrl->ctrl_config = NVME_CC_CSS_NVM;
+
+       if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
+               u32 crto;
+
+               ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
+               if (ret) {
+                       dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
+                               ret);
+                       return ret;
+               }
+
+               if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
+                       ctrl->ctrl_config |= NVME_CC_CRIME;
+                       timeout = NVME_CRTO_CRIMT(crto);
+               } else {
+                       timeout = NVME_CRTO_CRWMT(crto);
+               }
+       } else {
+               timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+       }
+
        ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
        ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
        ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
@@ -2169,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
        ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
        if (ret)
                return ret;
-       return nvme_wait_ready(ctrl, ctrl->cap, true);
+       return nvme_wait_ready(ctrl, timeout, true);
 }
 EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
@@ -2881,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
 
        if (id->dmrl)
                ctrl->max_discard_segments = id->dmrl;
-       if (id->dmrsl)
-               ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
+       ctrl->dmrsl = le32_to_cpu(id->dmrsl);
        if (id->wzsl)
                ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
 
@@ -3067,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
        if (ret)
                return ret;
 
-       ret = nvme_init_non_mdts_limits(ctrl);
-       if (ret < 0)
-               return ret;
-
        ret = nvme_configure_apst(ctrl);
        if (ret < 0)
                return ret;
@@ -3133,6 +3191,7 @@ static const struct file_operations nvme_dev_fops = {
        .release        = nvme_dev_release,
        .unlocked_ioctl = nvme_dev_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_dev_uring_cmd,
 };
 
 static ssize_t nvme_sysfs_reset(struct device *dev,
@@ -3686,6 +3745,7 @@ static const struct file_operations nvme_ns_chr_fops = {
        .release        = nvme_ns_chr_release,
        .unlocked_ioctl = nvme_ns_chr_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_ns_chr_uring_cmd,
 };
 
 static int nvme_add_ns_cdev(struct nvme_ns *ns)
@@ -4077,11 +4137,26 @@ out:
 static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns_ids ids = { };
+       struct nvme_id_ns_cs_indep *id;
        struct nvme_ns *ns;
+       bool ready = true;
 
        if (nvme_identify_ns_descs(ctrl, nsid, &ids))
                return;
 
+       /*
+        * Check if the namespace is ready.  If not ignore it, we will get an
+        * AEN once it becomes ready and restart the scan.
+        */
+       if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
+           !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
+               ready = id->nstat & NVME_NSTAT_NRDY;
+               kfree(id);
+       }
+
+       if (!ready)
+               return;
+
        ns = nvme_find_get_ns(ctrl, nsid);
        if (ns) {
                nvme_validate_ns(ns, &ids);
@@ -4224,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
 {
        struct nvme_ctrl *ctrl =
                container_of(work, struct nvme_ctrl, scan_work);
+       int ret;
 
        /* No tagset on a live ctrl means IO queues could not created */
        if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
                return;
 
+       /*
+        * Identify controller limits can change at controller reset due to
+        * new firmware download, even though it is not common we cannot ignore
+        * such scenario. Controller's non-mdts limits are reported in the unit
+        * of logical blocks that is dependent on the format of attached
+        * namespace. Hence re-read the limits at the time of ns allocation.
+        */
+       ret = nvme_init_non_mdts_limits(ctrl);
+       if (ret < 0) {
+               dev_warn(ctrl->device,
+                       "reading non-mdts-limits failed: %d\n", ret);
+               return;
+       }
+
        if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
                dev_info(ctrl->device, "rescanning namespaces.\n");
                nvme_clear_changed_ns_log(ctrl);
@@ -4826,6 +4916,8 @@ static inline void _nvme_check_size(void)
        BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+       BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
+                       NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
index 1e3a09cad96113971b78c8ecda5b4a5d7c4ffb8a..46d6e194ac2be5d886f8f9a47e28be69c1295326 100644 (file)
@@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
        return ctrl->subsys->subnqn;
 }
 
+static inline void nvmf_complete_timed_out_request(struct request *rq)
+{
+       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
+               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+               blk_mq_complete_request(rq);
+       }
+}
+
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
index 080f85f4105f3dc7a1d9215175727414ef9ed8f5..7ae72c7a211b975ecba3009cb337211dbacbee5d 100644 (file)
@@ -3831,6 +3831,9 @@ process_local_list:
        return count;
 }
 
+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
+
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
 /* Parse the cgroup id from a buf and return the length of cgrpid */
 static int fc_parse_cgrpid(const char *buf, u64 *id)
 {
@@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
 }
 
 /*
- * fc_update_appid: Parse and update the appid in the blkcg associated with
- * cgroupid.
- * @buf: buf contains both cgrpid and appid info
- * @count: size of the buffer
+ * Parse and update the appid in the blkcg associated with the cgroupid.
  */
-static int fc_update_appid(const char *buf, size_t count)
+static ssize_t fc_appid_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
 {
        u64 cgrp_id;
        int appid_len = 0;
@@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count)
                return ret;
        return count;
 }
-
-static ssize_t fc_appid_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t count)
-{
-       int ret  = 0;
-
-       ret = fc_update_appid(buf, count);
-       if (ret < 0)
-               return -EINVAL;
-       return count;
-}
-static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
 static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
+#endif /* CONFIG_BLK_CGROUP_FC_APPID */
 
 static struct attribute *nvme_fc_attrs[] = {
        &dev_attr_nvme_discovery.attr,
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
        &dev_attr_appid_store.attr,
+#endif
        NULL
 };
 
index 554566371ffa49cdc5629bd27a8dc5dd25318ccb..096b1b47d750e93744fdd26622e52f0d639ff10e 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include <linux/ptrace.h>      /* for force_successful_syscall_return */
 #include <linux/nvme_ioctl.h>
+#include <linux/io_uring.h>
 #include "nvme.h"
 
 /*
@@ -53,10 +54,21 @@ out:
        return ERR_PTR(ret);
 }
 
-static int nvme_submit_user_cmd(struct request_queue *q,
+static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
+               void *meta, unsigned len, int ret)
+{
+       if (!ret && req_op(req) == REQ_OP_DRV_IN &&
+           copy_to_user(ubuf, meta, len))
+               ret = -EFAULT;
+       kfree(meta);
+       return ret;
+}
+
+static struct request *nvme_alloc_user_request(struct request_queue *q,
                struct nvme_command *cmd, void __user *ubuffer,
                unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
-               u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+               u32 meta_seed, void **metap, unsigned timeout, bool vec,
+               unsigned int rq_flags, blk_mq_req_flags_t blk_flags)
 {
        bool write = nvme_is_write(cmd);
        struct nvme_ns *ns = q->queuedata;
@@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
        void *meta = NULL;
        int ret;
 
-       req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
+       req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
        if (IS_ERR(req))
-               return PTR_ERR(req);
+               return req;
        nvme_init_request(req, cmd);
 
        if (timeout)
@@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q,
                                goto out_unmap;
                        }
                        req->cmd_flags |= REQ_INTEGRITY;
+                       *metap = meta;
                }
        }
 
+       return req;
+
+out_unmap:
+       if (bio)
+               blk_rq_unmap_user(bio);
+out:
+       blk_mq_free_request(req);
+       return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+               struct nvme_command *cmd, void __user *ubuffer,
+               unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+               u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+{
+       struct request *req;
+       void *meta = NULL;
+       struct bio *bio;
+       int ret;
+
+       req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
+                       meta_len, meta_seed, &meta, timeout, vec, 0, 0);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       bio = req->bio;
+
        ret = nvme_execute_passthru_rq(req);
+
        if (result)
                *result = le64_to_cpu(nvme_req(req)->result.u64);
-       if (meta && !ret && !write) {
-               if (copy_to_user(meta_buffer, meta, meta_len))
-                       ret = -EFAULT;
-       }
-       kfree(meta);
- out_unmap:
+       if (meta)
+               ret = nvme_finish_user_metadata(req, meta_buffer, meta,
+                                               meta_len, ret);
        if (bio)
                blk_rq_unmap_user(bio);
- out:
        blk_mq_free_request(req);
        return ret;
 }
 
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
        struct nvme_user_io io;
@@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        return status;
 }
 
+struct nvme_uring_data {
+       __u64   metadata;
+       __u64   addr;
+       __u32   data_len;
+       __u32   metadata_len;
+       __u32   timeout_ms;
+};
+
+/*
+ * This overlays struct io_uring_cmd pdu.
+ * Expect build errors if this grows larger than that.
+ */
+struct nvme_uring_cmd_pdu {
+       union {
+               struct bio *bio;
+               struct request *req;
+       };
+       void *meta; /* kernel-resident buffer */
+       void __user *meta_buffer;
+       u32 meta_len;
+};
+
+static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
+               struct io_uring_cmd *ioucmd)
+{
+       return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
+}
+
+static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
+{
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       struct request *req = pdu->req;
+       struct bio *bio = req->bio;
+       int status;
+       u64 result;
+
+       if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+               status = -EINTR;
+       else
+               status = nvme_req(req)->status;
+
+       result = le64_to_cpu(nvme_req(req)->result.u64);
+
+       if (pdu->meta)
+               status = nvme_finish_user_metadata(req, pdu->meta_buffer,
+                                       pdu->meta, pdu->meta_len, status);
+       if (bio)
+               blk_rq_unmap_user(bio);
+       blk_mq_free_request(req);
+
+       io_uring_cmd_done(ioucmd, status, result);
+}
+
+static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err)
+{
+       struct io_uring_cmd *ioucmd = req->end_io_data;
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       /* extract bio before reusing the same field for request */
+       struct bio *bio = pdu->bio;
+
+       pdu->req = req;
+       req->bio = bio;
+       /* this takes care of moving rest of completion-work to task context */
+       io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
+}
+
+static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+               struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
+{
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+       const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+       struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
+       struct nvme_uring_data d;
+       struct nvme_command c;
+       struct request *req;
+       unsigned int rq_flags = 0;
+       blk_mq_req_flags_t blk_flags = 0;
+       void *meta = NULL;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       c.common.opcode = READ_ONCE(cmd->opcode);
+       c.common.flags = READ_ONCE(cmd->flags);
+       if (c.common.flags)
+               return -EINVAL;
+
+       c.common.command_id = 0;
+       c.common.nsid = cpu_to_le32(cmd->nsid);
+       if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
+               return -EINVAL;
+
+       c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
+       c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
+       c.common.metadata = 0;
+       c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
+       c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
+       c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
+       c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
+       c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
+       c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
+       c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+
+       d.metadata = READ_ONCE(cmd->metadata);
+       d.addr = READ_ONCE(cmd->addr);
+       d.data_len = READ_ONCE(cmd->data_len);
+       d.metadata_len = READ_ONCE(cmd->metadata_len);
+       d.timeout_ms = READ_ONCE(cmd->timeout_ms);
+
+       if (issue_flags & IO_URING_F_NONBLOCK) {
+               rq_flags = REQ_NOWAIT;
+               blk_flags = BLK_MQ_REQ_NOWAIT;
+       }
+
+       req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
+                       d.data_len, nvme_to_user_ptr(d.metadata),
+                       d.metadata_len, 0, &meta, d.timeout_ms ?
+                       msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
+                       blk_flags);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+       req->end_io_data = ioucmd;
+
+       /* to free bio on completion, as req->bio will be null at that time */
+       pdu->bio = req->bio;
+       pdu->meta = meta;
+       pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
+       pdu->meta_len = d.metadata_len;
+
+       blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io);
+       return -EIOCBQUEUED;
+}
+
 static bool is_ctrl_ioctl(unsigned int cmd)
 {
        if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
@@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return __nvme_ioctl(ns, cmd, (void __user *)arg);
 }
 
+static int nvme_uring_cmd_checks(unsigned int issue_flags)
+{
+       /* IOPOLL not supported yet */
+       if (issue_flags & IO_URING_F_IOPOLL)
+               return -EOPNOTSUPP;
+
+       /* NVMe passthrough requires big SQE/CQE support */
+       if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
+           (IO_URING_F_SQE128|IO_URING_F_CQE32))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
+                            unsigned int issue_flags)
+{
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       int ret;
+
+       BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
+
+       ret = nvme_uring_cmd_checks(issue_flags);
+       if (ret)
+               return ret;
+
+       switch (ioucmd->cmd_op) {
+       case NVME_URING_CMD_IO:
+               ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
+               break;
+       case NVME_URING_CMD_IO_VEC:
+               ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
+               break;
+       default:
+               ret = -ENOTTY;
+       }
+
+       return ret;
+}
+
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+       struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
+                       struct nvme_ns, cdev);
+
+       return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+}
+
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
                void __user *argp, struct nvme_ns_head *head, int srcu_idx)
@@ -453,8 +669,46 @@ out_unlock:
        srcu_read_unlock(&head->srcu, srcu_idx);
        return ret;
 }
+
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags)
+{
+       struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+       struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
+       int srcu_idx = srcu_read_lock(&head->srcu);
+       struct nvme_ns *ns = nvme_find_path(head);
+       int ret = -EINVAL;
+
+       if (ns)
+               ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+       srcu_read_unlock(&head->srcu, srcu_idx);
+       return ret;
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+       struct nvme_ctrl *ctrl = ioucmd->file->private_data;
+       int ret;
+
+       ret = nvme_uring_cmd_checks(issue_flags);
+       if (ret)
+               return ret;
+
+       switch (ioucmd->cmd_op) {
+       case NVME_URING_CMD_ADMIN:
+               ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
+               break;
+       case NVME_URING_CMD_ADMIN_VEC:
+               ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
+               break;
+       default:
+               ret = -ENOTTY;
+       }
+
+       return ret;
+}
+
 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 {
        struct nvme_ns *ns;
index d464fdf978fbaa449cd353f8b4972bfd4d62f29c..d3e2440d8abb059b44dcab8c32a184e7710f0778 100644 (file)
@@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
        .release        = nvme_ns_head_chr_release,
        .unlocked_ioctl = nvme_ns_head_chr_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = nvme_ns_head_chr_uring_cmd,
 };
 
 static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
index 1393bbf82d71e3542af7d8425075194ef51cd3dc..9b72b6ecf33c9cfc66a5d45c36225b15781773b7 100644 (file)
@@ -144,6 +144,11 @@ enum nvme_quirks {
         * encoding the generation sequence number.
         */
        NVME_QUIRK_SKIP_CID_GEN                 = (1 << 17),
+
+       /*
+        * Reports garbage in the namespace identifiers (eui64, nguid, uuid).
+        */
+       NVME_QUIRK_BOGUS_NID                    = (1 << 18),
 };
 
 /*
@@ -279,6 +284,7 @@ struct nvme_ctrl {
 #endif
        u16 crdt[3];
        u16 oncs;
+       u32 dmrsl;
        u16 oacs;
        u16 sqsize;
        u32 max_namespaces;
@@ -777,7 +783,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
                unsigned long arg);
 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
                unsigned long arg);
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags);
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags);
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 
 extern const struct attribute_group *nvme_ns_id_attr_groups[];
 extern const struct pr_ops nvme_pr_ops;
index d817ca17463edfba84dc27b50c5652998bbf576f..5a98a7de09642d974f6c436182b0cc638005defc 100644 (file)
@@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        nvme_init_request(abort_req, &cmd);
 
        abort_req->end_io_data = NULL;
+       abort_req->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(abort_req, false, abort_endio);
 
        /*
@@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
                if (IS_ERR(dev->ctrl.admin_q)) {
                        blk_mq_free_tag_set(&dev->admin_tagset);
+                       dev->ctrl.admin_q = NULL;
                        return -ENOMEM;
                }
                if (!blk_get_queue(dev->ctrl.admin_q)) {
@@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
        req->end_io_data = nvmeq;
 
        init_completion(&nvmeq->delete_done);
+       req->rq_flags |= RQF_QUIET;
        blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
                        nvme_del_cq_end : nvme_del_queue_end);
        return 0;
@@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(pdev)) {
+       if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
                u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
                if (dev->ctrl.state == NVME_CTRL_LIVE ||
@@ -3409,7 +3412,10 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                .driver_data = NVME_QUIRK_IDENTIFY_CNS |
-                               NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+                               NVME_QUIRK_DISABLE_WRITE_ZEROES |
+                               NVME_QUIRK_BOGUS_NID, },
+       { PCI_VDEVICE(REDHAT, 0x0010),  /* Qemu emulated controller */
+               .driver_data = NVME_QUIRK_BOGUS_NID, },
        { PCI_DEVICE(0x126f, 0x2263),   /* Silicon Motion unidentified */
                .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
@@ -3447,6 +3453,10 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
        { PCI_DEVICE(0x2646, 0x2263),   /* KINGSTON A2000 NVMe SSD  */
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+       { PCI_DEVICE(0x1e4B, 0x1002),   /* MAXIO MAP1002 */
+               .driver_data = NVME_QUIRK_BOGUS_NID, },
+       { PCI_DEVICE(0x1e4B, 0x1202),   /* MAXIO MAP1202 */
+               .driver_data = NVME_QUIRK_BOGUS_NID, },
        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
                .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
index d9f19d90131398f8ab5a12395eb041ff34c0fdd1..b87c8ae41d9be892ded04839c72eb2602466faa9 100644 (file)
@@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
        struct nvme_rdma_queue *queue = req->queue;
 
        nvme_rdma_stop_queue(queue);
-       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
-               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
-               blk_mq_complete_request(rq);
-       }
+       nvmf_complete_timed_out_request(rq);
 }
 
 static enum blk_eh_timer_return
index ad3a2bf2f1e9b49088dfebde5768f10fe029d675..bb67538d241b657dec9ee4bac33029920c493d9d 100644 (file)
@@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
        struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
 
        nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
-       if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
-               nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
-               blk_mq_complete_request(rq);
-       }
+       nvmf_complete_timed_out_request(rq);
 }
 
 static enum blk_eh_timer_return
index d886c2c59554f69fdc438aa4596e6a26c01584e9..27a72504d31ce1c81493963d85c1342e05c12c33 100644 (file)
@@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
        ret = __blkdev_issue_discard(ns->bdev,
                        nvmet_lba_to_sect(ns, range->slba),
                        le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-                       GFP_KERNEL, 0, bio);
+                       GFP_KERNEL, bio);
        if (ret && ret != -EOPNOTSUPP) {
                req->error_slba = le64_to_cpu(range->slba);
                return errno_to_nvme_status(req, ret);
index e34718b095504db20c5922112ce687081f5523bd..82b61acf7a72bd7a052b4db760c1731059bfa042 100644 (file)
@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z,
 
 bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
 {
-       struct request_queue *q = ns->bdev->bd_disk->queue;
-       u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
+       u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
        struct gendisk *bd_disk = ns->bdev->bd_disk;
        int ret;
 
index 6ab90891801d83e29cd5b2a4ccd3f524c134ad04..816028c0f6edb0bea64c980d3573143388224d67 100644 (file)
@@ -1550,6 +1550,11 @@ static const struct qcom_pcie_cfg sc7280_cfg = {
        .pipe_clk_need_muxing = true,
 };
 
+static const struct qcom_pcie_cfg sc8180x_cfg = {
+       .ops = &ops_1_9_0,
+       .has_tbu_clk = true,
+};
+
 static const struct dw_pcie_ops dw_pcie_ops = {
        .link_up = qcom_pcie_link_up,
        .start_link = qcom_pcie_start_link,
@@ -1656,7 +1661,7 @@ static const struct of_device_id qcom_pcie_match[] = {
        { .compatible = "qcom,pcie-qcs404", .data = &ipq4019_cfg },
        { .compatible = "qcom,pcie-sdm845", .data = &sdm845_cfg },
        { .compatible = "qcom,pcie-sm8250", .data = &sm8250_cfg },
-       { .compatible = "qcom,pcie-sc8180x", .data = &sm8250_cfg },
+       { .compatible = "qcom,pcie-sc8180x", .data = &sc8180x_cfg },
        { .compatible = "qcom,pcie-sm8450-pcie0", .data = &sm8450_pcie0_cfg },
        { .compatible = "qcom,pcie-sm8450-pcie1", .data = &sm8450_pcie1_cfg },
        { .compatible = "qcom,pcie-sc7280", .data = &sc7280_cfg },
index 09d9bf465d727b29461ed9e4e5e1870fe798f189..ffec82c8a523fe52c756bf30fe4e8e4cbdb52794 100644 (file)
@@ -272,7 +272,6 @@ struct advk_pcie {
                u32 actions;
        } wins[OB_WIN_COUNT];
        u8 wins_count;
-       int irq;
        struct irq_domain *rp_irq_domain;
        struct irq_domain *irq_domain;
        struct irq_chip irq_chip;
@@ -1570,26 +1569,21 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
        }
 }
 
-static void advk_pcie_irq_handler(struct irq_desc *desc)
+static irqreturn_t advk_pcie_irq_handler(int irq, void *arg)
 {
-       struct advk_pcie *pcie = irq_desc_get_handler_data(desc);
-       struct irq_chip *chip = irq_desc_get_chip(desc);
-       u32 val, mask, status;
+       struct advk_pcie *pcie = arg;
+       u32 status;
 
-       chained_irq_enter(chip, desc);
+       status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
+       if (!(status & PCIE_IRQ_CORE_INT))
+               return IRQ_NONE;
 
-       val = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
-       mask = advk_readl(pcie, HOST_CTRL_INT_MASK_REG);
-       status = val & ((~mask) & PCIE_IRQ_ALL_MASK);
+       advk_pcie_handle_int(pcie);
 
-       if (status & PCIE_IRQ_CORE_INT) {
-               advk_pcie_handle_int(pcie);
+       /* Clear interrupt */
+       advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
 
-               /* Clear interrupt */
-               advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
-       }
-
-       chained_irq_exit(chip, desc);
+       return IRQ_HANDLED;
 }
 
 static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
@@ -1669,7 +1663,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
        struct advk_pcie *pcie;
        struct pci_host_bridge *bridge;
        struct resource_entry *entry;
-       int ret;
+       int ret, irq;
 
        bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie));
        if (!bridge)
@@ -1755,9 +1749,17 @@ static int advk_pcie_probe(struct platform_device *pdev)
        if (IS_ERR(pcie->base))
                return PTR_ERR(pcie->base);
 
-       pcie->irq = platform_get_irq(pdev, 0);
-       if (pcie->irq < 0)
-               return pcie->irq;
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+
+       ret = devm_request_irq(dev, irq, advk_pcie_irq_handler,
+                              IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie",
+                              pcie);
+       if (ret) {
+               dev_err(dev, "Failed to register interrupt\n");
+               return ret;
+       }
 
        pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node,
                                                       "reset-gpios", 0,
@@ -1814,15 +1816,12 @@ static int advk_pcie_probe(struct platform_device *pdev)
                return ret;
        }
 
-       irq_set_chained_handler_and_data(pcie->irq, advk_pcie_irq_handler, pcie);
-
        bridge->sysdata = pcie;
        bridge->ops = &advk_pcie_ops;
        bridge->map_irq = advk_pcie_map_irq;
 
        ret = pci_host_probe(bridge);
        if (ret < 0) {
-               irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
                advk_pcie_remove_rp_irq_domain(pcie);
                advk_pcie_remove_msi_irq_domain(pcie);
                advk_pcie_remove_irq_domain(pcie);
@@ -1871,9 +1870,6 @@ static int advk_pcie_remove(struct platform_device *pdev)
        advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
        advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
 
-       /* Remove IRQ handler */
-       irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
-
        /* Remove IRQ domains */
        advk_pcie_remove_rp_irq_domain(pcie);
        advk_pcie_remove_msi_irq_domain(pcie);
index 558b35aba610460e10303a3447e58ee4d28575a5..d270a204324e9cc556de9715b6d9470b28694583 100644 (file)
@@ -3407,6 +3407,15 @@ static int hv_pci_probe(struct hv_device *hdev,
        hbus->bridge->domain_nr = dom;
 #ifdef CONFIG_X86
        hbus->sysdata.domain = dom;
+#elif defined(CONFIG_ARM64)
+       /*
+        * Set the PCI bus parent to be the corresponding VMbus
+        * device. Then the VMbus device will be assigned as the
+        * ACPI companion in pcibios_root_bridge_prepare() and
+        * pci_dma_configure() will propagate device coherence
+        * information to devices created on the bus.
+        */
+       hbus->sysdata.parent = hdev->device.parent;
 #endif
 
        hbus->hdev = hdev;
index 9ecce435fb3f125abb5ba52455d4a6d2f40fdcdf..d25122fbe98ab57ec472aeb61b2d8f90c28a20ec 100644 (file)
@@ -2920,6 +2920,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
                        DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
                },
+               /*
+                * Downstream device is not accessible after putting a root port
+                * into D3cold and back into D0 on Elo i2.
+                */
+               .ident = "Elo i2",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"),
+               },
        },
 #endif
        { }
index afdcb91601d2bbf93b5221673a67f503a05f679f..1e2d69453771d4aca267b0a64d4d0419c0dc820f 100644 (file)
@@ -187,7 +187,7 @@ source "drivers/perf/hisilicon/Kconfig"
 
 config MARVELL_CN10K_DDR_PMU
        tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
-       depends on ARM64 || (COMPILE_TEST && 64BIT)
+       depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
        help
          Enable perf support for Marvell DDR Performance monitoring
          event on CN10K platform.
index 9694370651fa8bc3bd19b91df020240f8144887e..59d3980b8ca2a201d3e1510468364104ec75da63 100644 (file)
@@ -400,6 +400,9 @@ validate_group(struct perf_event *event)
        if (!validate_event(event->pmu, &fake_pmu, leader))
                return -EINVAL;
 
+       if (event == leader)
+               return 0;
+
        for_each_sibling_event(sibling, leader) {
                if (!validate_event(event->pmu, &fake_pmu, sibling))
                        return -EINVAL;
@@ -489,12 +492,7 @@ __hw_perf_event_init(struct perf_event *event)
                local64_set(&hwc->period_left, hwc->sample_period);
        }
 
-       if (event->group_leader != event) {
-               if (validate_group(event) != 0)
-                       return -EINVAL;
-       }
-
-       return 0;
+       return validate_group(event);
 }
 
 static int armpmu_event_init(struct perf_event *event)
index 94ebc1ecace7cf342398e912d748c7fab33c3633..b1b2a55de77fc8c658f8e378ce21df435f1fa4be 100644 (file)
@@ -29,7 +29,7 @@
 #define CNTL_OVER_MASK         0xFFFFFFFE
 
 #define CNTL_CSV_SHIFT         24
-#define CNTL_CSV_MASK          (0xFF << CNTL_CSV_SHIFT)
+#define CNTL_CSV_MASK          (0xFFU << CNTL_CSV_SHIFT)
 
 #define EVENT_CYCLES_ID                0
 #define EVENT_CYCLES_COUNTER   0
index 7640491aab123c6e1dc71c1bca49d68bc31e268d..30234c261b05c33b3616a4c98114dee478f64bbc 100644 (file)
@@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
 {
        u64 mpidr;
        int cpu_cluster_id;
-       struct cluster_pmu *cluster = NULL;
+       struct cluster_pmu *cluster;
 
        /*
         * This assumes that the cluster_id is in MPIDR[aff1] for
@@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
                         cluster->cluster_id);
                cpumask_set_cpu(cpu, &cluster->cluster_cpus);
                *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
-               break;
+               return cluster;
        }
 
-       return cluster;
+       return NULL;
 }
 
 static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
index 5b471ab80fe289e6b66161292f589077b676672e..54d65a6f0fccfdfea01918be8403218d8ee4d393 100644 (file)
@@ -414,19 +414,19 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
 
        ret = clk_prepare_enable(priv->clk_ref);
        if (ret)
-               goto err_disable_clk_ref;
+               return ret;
 
        priv->reset = devm_reset_control_array_get_exclusive(dev);
-       if (IS_ERR(priv->reset))
-               return PTR_ERR(priv->reset);
+       if (IS_ERR(priv->reset)) {
+               ret = PTR_ERR(priv->reset);
+               goto err_disable_clk_ref;
+       }
 
        priv->phy = devm_phy_create(dev, np, &phy_g12a_usb3_pcie_ops);
        if (IS_ERR(priv->phy)) {
                ret = PTR_ERR(priv->phy);
-               if (ret != -EPROBE_DEFER)
-                       dev_err(dev, "failed to create PHY\n");
-
-               return ret;
+               dev_err_probe(dev, ret, "failed to create PHY\n");
+               goto err_disable_clk_ref;
        }
 
        phy_set_drvdata(priv->phy, priv);
@@ -434,8 +434,12 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
 
        phy_provider = devm_of_phy_provider_register(dev,
                                                     phy_g12a_usb3_pcie_xlate);
+       if (IS_ERR(phy_provider)) {
+               ret = PTR_ERR(phy_provider);
+               goto err_disable_clk_ref;
+       }
 
-       return PTR_ERR_OR_ZERO(phy_provider);
+       return 0;
 
 err_disable_clk_ref:
        clk_disable_unprepare(priv->clk_ref);
index 5172971f4c360a1ba73c073cec3a3bb82f0b5863..3cd4d51c247c3cf2950dee85ec0233bb5742c7f9 100644 (file)
@@ -629,7 +629,8 @@ idle:
 cleanup:
        if (error < 0)
                phy_mdm6600_device_power_off(ddata);
-
+       pm_runtime_disable(ddata->dev);
+       pm_runtime_dont_use_autosuspend(ddata->dev);
        return error;
 }
 
index 9ec234243f7c6fd29ca82a4b1c4464c58430340f..595adba5fb8f1351be2ee7791276ba732b7ec5e5 100644 (file)
@@ -187,6 +187,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
                return -EINVAL;
 
        sata_phy->client = of_find_i2c_device_by_node(node);
+       of_node_put(node);
        if (!sata_phy->client)
                return -EPROBE_DEFER;
 
@@ -195,20 +196,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
        sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl");
        if (IS_ERR(sata_phy->phyclk)) {
                dev_err(dev, "failed to get clk for PHY\n");
-               return PTR_ERR(sata_phy->phyclk);
+               ret = PTR_ERR(sata_phy->phyclk);
+               goto put_dev;
        }
 
        ret = clk_prepare_enable(sata_phy->phyclk);
        if (ret < 0) {
                dev_err(dev, "failed to enable source clk\n");
-               return ret;
+               goto put_dev;
        }
 
        sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops);
        if (IS_ERR(sata_phy->phy)) {
-               clk_disable_unprepare(sata_phy->phyclk);
                dev_err(dev, "failed to create PHY\n");
-               return PTR_ERR(sata_phy->phy);
+               ret = PTR_ERR(sata_phy->phy);
+               goto clk_disable;
        }
 
        phy_set_drvdata(sata_phy->phy, sata_phy);
@@ -216,11 +218,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
        phy_provider = devm_of_phy_provider_register(dev,
                                        of_phy_simple_xlate);
        if (IS_ERR(phy_provider)) {
-               clk_disable_unprepare(sata_phy->phyclk);
-               return PTR_ERR(phy_provider);
+               ret = PTR_ERR(phy_provider);
+               goto clk_disable;
        }
 
        return 0;
+
+clk_disable:
+       clk_disable_unprepare(sata_phy->phyclk);
+put_dev:
+       put_device(&sata_phy->client->dev);
+
+       return ret;
 }
 
 static const struct of_device_id exynos_sata_phy_of_match[] = {
index c1211c4f863cadf650d838b4d7ec9997594cdee2..0be727bb9f79245f643ac90fab13494c4ca6299d 100644 (file)
@@ -838,7 +838,7 @@ static int serdes_am654_probe(struct platform_device *pdev)
 
 clk_err:
        of_clk_del_provider(node);
-
+       pm_runtime_disable(dev);
        return ret;
 }
 
index 3a505fe5715addae9a61ce079491a11ef163ce65..31a775877f6e3040df1b53b4b43f1e2571b009f8 100644 (file)
@@ -215,7 +215,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy)
        return 0;
 
 err1:
-       clk_disable(phy->wkupclk);
+       clk_disable_unprepare(phy->wkupclk);
 
 err0:
        return ret;
index 2cbc91e535d465bcf7e4772ca29f40438bed5357..f502c36f3be543071f96bac2851d2179ccad74a2 100644 (file)
@@ -696,6 +696,7 @@ static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy)
                }
 
                control_pdev = of_find_device_by_node(control_node);
+               of_node_put(control_node);
                if (!control_pdev) {
                        dev_err(dev, "Failed to get control device\n");
                        return -EINVAL;
index a0cdbcadf09e485ffbbf428500b2a81641dbd350..c3ab4b69ea680ff08a538cac8cad2a40ca8430f7 100644 (file)
@@ -155,7 +155,7 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 }
 
 #ifdef CONFIG_POWER_SUPPLY
-const char * const tusb1210_chg_det_states[] = {
+static const char * const tusb1210_chg_det_states[] = {
        "CHG_DET_CONNECTING",
        "CHG_DET_START_DET",
        "CHG_DET_READ_DET",
@@ -537,12 +537,18 @@ static int tusb1210_probe(struct ulpi *ulpi)
        tusb1210_probe_charger_detect(tusb);
 
        tusb->phy = ulpi_phy_create(ulpi, &phy_ops);
-       if (IS_ERR(tusb->phy))
-               return PTR_ERR(tusb->phy);
+       if (IS_ERR(tusb->phy)) {
+               ret = PTR_ERR(tusb->phy);
+               goto err_remove_charger;
+       }
 
        phy_set_drvdata(tusb->phy, tusb);
        ulpi_set_drvdata(ulpi, tusb);
        return 0;
+
+err_remove_charger:
+       tusb1210_remove_charger_detect(tusb);
+       return ret;
 }
 
 static void tusb1210_remove(struct ulpi *ulpi)
index a3fa03bcd9a305775a270603faf5310326a0629d..80838dc54b3abb1eccbb15a3e749f7f94a526a12 100644 (file)
@@ -1236,18 +1236,17 @@ FUNC_GROUP_DECL(SALT8, AA12);
 FUNC_GROUP_DECL(WDTRST4, AA12);
 
 #define AE12 196
-SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID,
-                       SIG_DESC_SET(SCU438, 4));
+SIG_EXPR_LIST_DECL_SESG(AE12, FWSPIQ2, FWQSPI, SIG_DESC_SET(SCU438, 4));
 SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4);
-PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2),
+PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIQ2),
          SIG_EXPR_LIST_PTR(AE12, GPIOY4));
 
 #define AF12 197
-SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID,
-                       SIG_DESC_SET(SCU438, 5));
+SIG_EXPR_LIST_DECL_SESG(AF12, FWSPIQ3, FWQSPI, SIG_DESC_SET(SCU438, 5));
 SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5);
-PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3),
+PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIQ3),
          SIG_EXPR_LIST_PTR(AF12, GPIOY5));
+FUNC_GROUP_DECL(FWQSPI, AE12, AF12);
 
 #define AC12 198
 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6));
@@ -1520,9 +1519,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
 PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
 
 GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
-GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
 GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
-FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
+FUNC_DECL_1(FWSPID, FWSPID);
 FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
 FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
 /*
@@ -1918,7 +1916,7 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
        ASPEED_PINCTRL_GROUP(FSI2),
        ASPEED_PINCTRL_GROUP(FWSPIABR),
        ASPEED_PINCTRL_GROUP(FWSPID),
-       ASPEED_PINCTRL_GROUP(FWQSPID),
+       ASPEED_PINCTRL_GROUP(FWQSPI),
        ASPEED_PINCTRL_GROUP(FWSPIWP),
        ASPEED_PINCTRL_GROUP(GPIT0),
        ASPEED_PINCTRL_GROUP(GPIT1),
@@ -2160,6 +2158,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
        ASPEED_PINCTRL_FUNC(FSI2),
        ASPEED_PINCTRL_FUNC(FWSPIABR),
        ASPEED_PINCTRL_FUNC(FWSPID),
+       ASPEED_PINCTRL_FUNC(FWQSPI),
        ASPEED_PINCTRL_FUNC(FWSPIWP),
        ASPEED_PINCTRL_FUNC(GPIT0),
        ASPEED_PINCTRL_FUNC(GPIT1),
index 32ba50efbcebcf020a2b660fbab0c52eb2431d9f..62dbd1e67513dcbe0e28125a420f537750e1c098 100644 (file)
 
 #include "pinctrl-intel.h"
 
-#define ADL_PAD_OWN    0x0a0
-#define ADL_PADCFGLOCK 0x110
-#define ADL_HOSTSW_OWN 0x150
-#define ADL_GPI_IS     0x200
-#define ADL_GPI_IE     0x220
+#define ADL_N_PAD_OWN          0x020
+#define ADL_N_PADCFGLOCK       0x080
+#define ADL_N_HOSTSW_OWN       0x0b0
+#define ADL_N_GPI_IS           0x100
+#define ADL_N_GPI_IE           0x120
+
+#define ADL_S_PAD_OWN          0x0a0
+#define ADL_S_PADCFGLOCK       0x110
+#define ADL_S_HOSTSW_OWN       0x150
+#define ADL_S_GPI_IS           0x200
+#define ADL_S_GPI_IE           0x220
 
 #define ADL_GPP(r, s, e, g)                            \
        {                                               \
                .gpio_base = (g),                       \
        }
 
-#define ADL_COMMUNITY(b, s, e, g)                      \
+#define ADL_N_COMMUNITY(b, s, e, g)                    \
+       {                                               \
+               .barno = (b),                           \
+               .padown_offset = ADL_N_PAD_OWN,         \
+               .padcfglock_offset = ADL_N_PADCFGLOCK,  \
+               .hostown_offset = ADL_N_HOSTSW_OWN,     \
+               .is_offset = ADL_N_GPI_IS,              \
+               .ie_offset = ADL_N_GPI_IE,              \
+               .pin_base = (s),                        \
+               .npins = ((e) - (s) + 1),               \
+               .gpps = (g),                            \
+               .ngpps = ARRAY_SIZE(g),                 \
+       }
+
+#define ADL_S_COMMUNITY(b, s, e, g)                    \
        {                                               \
                .barno = (b),                           \
-               .padown_offset = ADL_PAD_OWN,           \
-               .padcfglock_offset = ADL_PADCFGLOCK,    \
-               .hostown_offset = ADL_HOSTSW_OWN,       \
-               .is_offset = ADL_GPI_IS,                \
-               .ie_offset = ADL_GPI_IE,                \
+               .padown_offset = ADL_S_PAD_OWN,         \
+               .padcfglock_offset = ADL_S_PADCFGLOCK,  \
+               .hostown_offset = ADL_S_HOSTSW_OWN,     \
+               .is_offset = ADL_S_GPI_IS,              \
+               .ie_offset = ADL_S_GPI_IE,              \
                .pin_base = (s),                        \
                .npins = ((e) - (s) + 1),               \
                .gpps = (g),                            \
@@ -342,10 +362,10 @@ static const struct intel_padgroup adln_community5_gpps[] = {
 };
 
 static const struct intel_community adln_communities[] = {
-       ADL_COMMUNITY(0, 0, 66, adln_community0_gpps),
-       ADL_COMMUNITY(1, 67, 168, adln_community1_gpps),
-       ADL_COMMUNITY(2, 169, 248, adln_community4_gpps),
-       ADL_COMMUNITY(3, 249, 256, adln_community5_gpps),
+       ADL_N_COMMUNITY(0, 0, 66, adln_community0_gpps),
+       ADL_N_COMMUNITY(1, 67, 168, adln_community1_gpps),
+       ADL_N_COMMUNITY(2, 169, 248, adln_community4_gpps),
+       ADL_N_COMMUNITY(3, 249, 256, adln_community5_gpps),
 };
 
 static const struct intel_pinctrl_soc_data adln_soc_data = {
@@ -713,11 +733,11 @@ static const struct intel_padgroup adls_community5_gpps[] = {
 };
 
 static const struct intel_community adls_communities[] = {
-       ADL_COMMUNITY(0, 0, 94, adls_community0_gpps),
-       ADL_COMMUNITY(1, 95, 150, adls_community1_gpps),
-       ADL_COMMUNITY(2, 151, 199, adls_community3_gpps),
-       ADL_COMMUNITY(3, 200, 269, adls_community4_gpps),
-       ADL_COMMUNITY(4, 270, 303, adls_community5_gpps),
+       ADL_S_COMMUNITY(0, 0, 94, adls_community0_gpps),
+       ADL_S_COMMUNITY(1, 95, 150, adls_community1_gpps),
+       ADL_S_COMMUNITY(2, 151, 199, adls_community3_gpps),
+       ADL_S_COMMUNITY(3, 200, 269, adls_community4_gpps),
+       ADL_S_COMMUNITY(4, 270, 303, adls_community5_gpps),
 };
 
 static const struct intel_pinctrl_soc_data adls_soc_data = {
index 8dca1ef04965faa59562ccc6d97b0a2f6dff6ee5..40accd110c3d8fe74fad1b01ab0cada0ed94fe8a 100644 (file)
@@ -30,6 +30,7 @@ config PINCTRL_MTK_MOORE
        select GENERIC_PINMUX_FUNCTIONS
        select GPIOLIB
        select OF_GPIO
+       select EINT_MTK
        select PINCTRL_MTK_V2
 
 config PINCTRL_MTK_PARIS
index 727c65221aef9dc912a4adf828902cf0229eacd1..57f37a294063c5999ff73efacd3e27daa7da6463 100644 (file)
@@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = {
        MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13),
        MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14),
        MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15),
-       MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16),
+       MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16),
        MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17),
        MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18),
        MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19),
index 1a7d686494ffbd26f2eefde41d69630bd496c27b..0645c2c24f508b48ccb9909843ea206a1d2c1406 100644 (file)
@@ -387,6 +387,8 @@ static void amd_gpio_irq_enable(struct irq_data *d)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        raw_spin_lock_irqsave(&gpio_dev->lock, flags);
        pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
        pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
@@ -408,6 +410,8 @@ static void amd_gpio_irq_disable(struct irq_data *d)
        pin_reg &= ~BIT(INTERRUPT_MASK_OFF);
        writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 static void amd_gpio_irq_mask(struct irq_data *d)
@@ -577,7 +581,7 @@ static void amd_irq_ack(struct irq_data *d)
        */
 }
 
-static struct irq_chip amd_gpio_irqchip = {
+static const struct irq_chip amd_gpio_irqchip = {
        .name         = "amd_gpio",
        .irq_ack      = amd_irq_ack,
        .irq_enable   = amd_gpio_irq_enable,
@@ -593,7 +597,8 @@ static struct irq_chip amd_gpio_irqchip = {
         * the wake event. Otherwise the wake event will never clear and
         * prevent the system from suspending.
         */
-       .flags        = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND,
+       .flags        = IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND | IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 #define PIN_IRQ_PENDING        (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
@@ -1026,7 +1031,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
        amd_gpio_irq_init(gpio_dev);
 
        girq = &gpio_dev->gc.irq;
-       girq->chip = &amd_gpio_irqchip;
+       gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
        /* This will let us handle the parent IRQ in the driver */
        girq->parent_handler = NULL;
        girq->num_parents = 0;
index 72f4dd2466e119466735294cb3cb16708d7c6bd8..5e610849dfc3ebf5295adcf0b8d33c29d57dc8ed 100644 (file)
@@ -36,7 +36,6 @@ struct apple_gpio_pinctrl {
 
        struct pinctrl_desc pinctrl_desc;
        struct gpio_chip gpio_chip;
-       struct irq_chip irq_chip;
        u8 irqgrps[];
 };
 
@@ -275,17 +274,21 @@ static unsigned int apple_gpio_irq_type(unsigned int type)
 
 static void apple_gpio_irq_mask(struct irq_data *data)
 {
-       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc);
 
        apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
                           FIELD_PREP(REG_GPIOx_MODE, REG_GPIOx_IN_IRQ_OFF));
+       gpiochip_disable_irq(gc, data->hwirq);
 }
 
 static void apple_gpio_irq_unmask(struct irq_data *data)
 {
-       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+       struct apple_gpio_pinctrl *pctl = gpiochip_get_data(gc);
        unsigned int irqtype = apple_gpio_irq_type(irqd_get_trigger_type(data));
 
+       gpiochip_enable_irq(gc, data->hwirq);
        apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
                           FIELD_PREP(REG_GPIOx_MODE, irqtype));
 }
@@ -343,13 +346,15 @@ static void apple_gpio_irq_handler(struct irq_desc *desc)
        chained_irq_exit(chip, desc);
 }
 
-static struct irq_chip apple_gpio_irqchip = {
-       .name           = "Apple-GPIO",
-       .irq_startup    = apple_gpio_irq_startup,
-       .irq_ack        = apple_gpio_irq_ack,
-       .irq_mask       = apple_gpio_irq_mask,
-       .irq_unmask     = apple_gpio_irq_unmask,
-       .irq_set_type   = apple_gpio_irq_set_type,
+static const struct irq_chip apple_gpio_irqchip = {
+       .name                   = "Apple-GPIO",
+       .irq_startup            = apple_gpio_irq_startup,
+       .irq_ack                = apple_gpio_irq_ack,
+       .irq_mask               = apple_gpio_irq_mask,
+       .irq_unmask             = apple_gpio_irq_unmask,
+       .irq_set_type           = apple_gpio_irq_set_type,
+       .flags                  = IRQCHIP_IMMUTABLE,
+       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 /* Probe & register */
@@ -360,8 +365,6 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl)
        void **irq_data = NULL;
        int ret;
 
-       pctl->irq_chip = apple_gpio_irqchip;
-
        pctl->gpio_chip.label = dev_name(pctl->dev);
        pctl->gpio_chip.request = gpiochip_generic_request;
        pctl->gpio_chip.free = gpiochip_generic_free;
@@ -377,7 +380,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl)
        if (girq->num_parents) {
                int i;
 
-               girq->chip = &pctl->irq_chip;
+               gpio_irq_chip_set_chip(girq, &apple_gpio_irqchip);
                girq->parent_handler = apple_gpio_irq_handler;
 
                girq->parents = kmalloc_array(girq->num_parents,
index 003fb0e341537bd3efce6fe993bf0359f2b13174..6a956ee94494f50696fca8fb0b3d981b1b1dfb98 100644 (file)
@@ -129,6 +129,7 @@ enum {
        FUNC_PTP1,
        FUNC_PTP2,
        FUNC_PTP3,
+       FUNC_PTPSYNC_0,
        FUNC_PTPSYNC_1,
        FUNC_PTPSYNC_2,
        FUNC_PTPSYNC_3,
@@ -252,6 +253,7 @@ static const char *const ocelot_function_names[] = {
        [FUNC_PTP1]             = "ptp1",
        [FUNC_PTP2]             = "ptp2",
        [FUNC_PTP3]             = "ptp3",
+       [FUNC_PTPSYNC_0]        = "ptpsync_0",
        [FUNC_PTPSYNC_1]        = "ptpsync_1",
        [FUNC_PTPSYNC_2]        = "ptpsync_2",
        [FUNC_PTPSYNC_3]        = "ptpsync_3",
@@ -983,7 +985,7 @@ LAN966X_P(31,   GPIO,   FC3_c,     CAN1,      NONE,   OB_TRG,   RECO_b,      NON
 LAN966X_P(32,   GPIO,   FC3_c,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,      NONE,        R);
 LAN966X_P(33,   GPIO,   FC1_b,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,    MIIM_b,        R);
 LAN966X_P(34,   GPIO,   FC1_b,     NONE,   SGPIO_a,     NONE,  MIIM_Sa,    MIIM_b,        R);
-LAN966X_P(35,   GPIO,   FC1_b,     NONE,   SGPIO_a,   CAN0_b,     NONE,      NONE,        R);
+LAN966X_P(35,   GPIO,   FC1_b,  PTPSYNC_0, SGPIO_a,   CAN0_b,     NONE,      NONE,        R);
 LAN966X_P(36,   GPIO,    NONE,  PTPSYNC_1,    NONE,   CAN0_b,     NONE,      NONE,        R);
 LAN966X_P(37,   GPIO, FC_SHRD0, PTPSYNC_2, TWI_SLC_GATE_AD, NONE, NONE,      NONE,        R);
 LAN966X_P(38,   GPIO,    NONE,  PTPSYNC_3,    NONE,     NONE,     NONE,      NONE,        R);
index 8d271c6b0ca4103bc325440f6a86b95ae9bb654b..5de691c630b4fc77e044eac5e874aa06ba3df402 100644 (file)
@@ -1374,10 +1374,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl)
                }
 
                irq = irq_of_parse_and_map(child, 0);
-               if (irq < 0) {
-                       dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq);
+               if (!irq) {
+                       dev_err(pctl->dev, "No IRQ for bank %u\n", i);
                        of_node_put(child);
-                       ret = irq;
+                       ret = -EINVAL;
                        goto err;
                }
 
index a1b598b86aa9f400133fe0649db24a7b5cfa7f82..2cb79e649fcf31e96be5d6d99fd20ec43e7fc512 100644 (file)
@@ -457,95 +457,110 @@ static  struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = {
 
 static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = {
        {
+               /* gpio1b6_sel */
                .num = 1,
                .pin = 14,
                .reg = 0x28,
                .bit = 12,
                .mask = 0xf
        }, {
+               /* gpio1b7_sel */
                .num = 1,
                .pin = 15,
                .reg = 0x2c,
                .bit = 0,
                .mask = 0x3
        }, {
+               /* gpio1c2_sel */
                .num = 1,
                .pin = 18,
                .reg = 0x30,
                .bit = 4,
                .mask = 0xf
        }, {
+               /* gpio1c3_sel */
                .num = 1,
                .pin = 19,
                .reg = 0x30,
                .bit = 8,
                .mask = 0xf
        }, {
+               /* gpio1c4_sel */
                .num = 1,
                .pin = 20,
                .reg = 0x30,
                .bit = 12,
                .mask = 0xf
        }, {
+               /* gpio1c5_sel */
                .num = 1,
                .pin = 21,
                .reg = 0x34,
                .bit = 0,
                .mask = 0xf
        }, {
+               /* gpio1c6_sel */
                .num = 1,
                .pin = 22,
                .reg = 0x34,
                .bit = 4,
                .mask = 0xf
        }, {
+               /* gpio1c7_sel */
                .num = 1,
                .pin = 23,
                .reg = 0x34,
                .bit = 8,
                .mask = 0xf
        }, {
-               .num = 3,
-               .pin = 12,
-               .reg = 0x68,
-               .bit = 8,
-               .mask = 0xf
-       }, {
-               .num = 3,
-               .pin = 13,
-               .reg = 0x68,
-               .bit = 12,
-               .mask = 0xf
-       }, {
+               /* gpio2a2_sel */
                .num = 2,
                .pin = 2,
-               .reg = 0x608,
-               .bit = 0,
-               .mask = 0x7
+               .reg = 0x40,
+               .bit = 4,
+               .mask = 0x3
        }, {
+               /* gpio2a3_sel */
                .num = 2,
                .pin = 3,
-               .reg = 0x608,
-               .bit = 4,
-               .mask = 0x7
+               .reg = 0x40,
+               .bit = 6,
+               .mask = 0x3
        }, {
+               /* gpio2c0_sel */
                .num = 2,
                .pin = 16,
-               .reg = 0x610,
-               .bit = 8,
-               .mask = 0x7
+               .reg = 0x50,
+               .bit = 0,
+               .mask = 0x3
        }, {
+               /* gpio3b2_sel */
                .num = 3,
                .pin = 10,
-               .reg = 0x610,
-               .bit = 0,
-               .mask = 0x7
+               .reg = 0x68,
+               .bit = 4,
+               .mask = 0x3
        }, {
+               /* gpio3b3_sel */
                .num = 3,
                .pin = 11,
-               .reg = 0x610,
-               .bit = 4,
-               .mask = 0x7
+               .reg = 0x68,
+               .bit = 6,
+               .mask = 0x3
+       }, {
+               /* gpio3b4_sel */
+               .num = 3,
+               .pin = 12,
+               .reg = 0x68,
+               .bit = 8,
+               .mask = 0xf
+       }, {
+               /* gpio3b5_sel */
+               .num = 3,
+               .pin = 13,
+               .reg = 0x68,
+               .bit = 12,
+               .mask = 0xf
        },
 };
 
index 966ea6622ff3ce860dff48c8c6f4fa18dcafa620..a2abfe987ab123c148c9b792ecadefa149989cf6 100644 (file)
@@ -42,7 +42,6 @@
  * @chip:           gpiochip handle.
  * @desc:           pin controller descriptor
  * @restart_nb:     restart notifier block.
- * @irq_chip:       irq chip information
  * @irq:            parent irq for the TLMM irq_chip.
  * @intr_target_use_scm: route irq to application cpu using scm calls
  * @lock:           Spinlock to protect register resources as well
@@ -63,7 +62,6 @@ struct msm_pinctrl {
        struct pinctrl_desc desc;
        struct notifier_block restart_nb;
 
-       struct irq_chip irq_chip;
        int irq;
 
        bool intr_target_use_scm;
@@ -868,6 +866,8 @@ static void msm_gpio_irq_enable(struct irq_data *d)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
 
+       gpiochip_enable_irq(gc, d->hwirq);
+
        if (d->parent_data)
                irq_chip_enable_parent(d);
 
@@ -885,6 +885,8 @@ static void msm_gpio_irq_disable(struct irq_data *d)
 
        if (!test_bit(d->hwirq, pctrl->skip_wake_irqs))
                msm_gpio_irq_mask(d);
+
+       gpiochip_disable_irq(gc, d->hwirq);
 }
 
 /**
@@ -958,6 +960,14 @@ static void msm_gpio_irq_ack(struct irq_data *d)
        raw_spin_unlock_irqrestore(&pctrl->lock, flags);
 }
 
+static void msm_gpio_irq_eoi(struct irq_data *d)
+{
+       d = d->parent_data;
+
+       if (d)
+               d->chip->irq_eoi(d);
+}
+
 static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d,
                                                       unsigned int type)
 {
@@ -1255,6 +1265,26 @@ static bool msm_gpio_needs_valid_mask(struct msm_pinctrl *pctrl)
        return device_property_count_u16(pctrl->dev, "gpios") > 0;
 }
 
+static const struct irq_chip msm_gpio_irq_chip = {
+       .name                   = "msmgpio",
+       .irq_enable             = msm_gpio_irq_enable,
+       .irq_disable            = msm_gpio_irq_disable,
+       .irq_mask               = msm_gpio_irq_mask,
+       .irq_unmask             = msm_gpio_irq_unmask,
+       .irq_ack                = msm_gpio_irq_ack,
+       .irq_eoi                = msm_gpio_irq_eoi,
+       .irq_set_type           = msm_gpio_irq_set_type,
+       .irq_set_wake           = msm_gpio_irq_set_wake,
+       .irq_request_resources  = msm_gpio_irq_reqres,
+       .irq_release_resources  = msm_gpio_irq_relres,
+       .irq_set_affinity       = msm_gpio_irq_set_affinity,
+       .irq_set_vcpu_affinity  = msm_gpio_irq_set_vcpu_affinity,
+       .flags                  = (IRQCHIP_MASK_ON_SUSPEND |
+                                  IRQCHIP_SET_TYPE_MASKED |
+                                  IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND |
+                                  IRQCHIP_IMMUTABLE),
+};
+
 static int msm_gpio_init(struct msm_pinctrl *pctrl)
 {
        struct gpio_chip *chip;
@@ -1276,22 +1306,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
        if (msm_gpio_needs_valid_mask(pctrl))
                chip->init_valid_mask = msm_gpio_init_valid_mask;
 
-       pctrl->irq_chip.name = "msmgpio";
-       pctrl->irq_chip.irq_enable = msm_gpio_irq_enable;
-       pctrl->irq_chip.irq_disable = msm_gpio_irq_disable;
-       pctrl->irq_chip.irq_mask = msm_gpio_irq_mask;
-       pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask;
-       pctrl->irq_chip.irq_ack = msm_gpio_irq_ack;
-       pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type;
-       pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake;
-       pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres;
-       pctrl->irq_chip.irq_release_resources = msm_gpio_irq_relres;
-       pctrl->irq_chip.irq_set_affinity = msm_gpio_irq_set_affinity;
-       pctrl->irq_chip.irq_set_vcpu_affinity = msm_gpio_irq_set_vcpu_affinity;
-       pctrl->irq_chip.flags = IRQCHIP_MASK_ON_SUSPEND |
-                               IRQCHIP_SET_TYPE_MASKED |
-                               IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND;
-
        np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0);
        if (np) {
                chip->irq.parent_domain = irq_find_matching_host(np,
@@ -1300,7 +1314,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
                if (!chip->irq.parent_domain)
                        return -EPROBE_DEFER;
                chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq;
-               pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent;
                /*
                 * Let's skip handling the GPIOs, if the parent irqchip
                 * is handling the direct connect IRQ of the GPIO.
@@ -1313,7 +1326,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
        }
 
        girq = &chip->irq;
-       girq->chip = &pctrl->irq_chip;
+       gpio_irq_chip_set_chip(girq, &msm_gpio_irq_chip);
        girq->parent_handler = msm_gpio_irq_handler;
        girq->fwnode = pctrl->dev->fwnode;
        girq->num_parents = 1;
index 4d37b817b2328827124cfa66c66b78780228ef8b..a91a86628f2f87fec7d2cecca8e60b0967f27448 100644 (file)
@@ -264,14 +264,14 @@ static const struct pinctrl_pin_desc sm6350_pins[] = {
        PINCTRL_PIN(153, "GPIO_153"),
        PINCTRL_PIN(154, "GPIO_154"),
        PINCTRL_PIN(155, "GPIO_155"),
-       PINCTRL_PIN(156, "SDC1_RCLK"),
-       PINCTRL_PIN(157, "SDC1_CLK"),
-       PINCTRL_PIN(158, "SDC1_CMD"),
-       PINCTRL_PIN(159, "SDC1_DATA"),
-       PINCTRL_PIN(160, "SDC2_CLK"),
-       PINCTRL_PIN(161, "SDC2_CMD"),
-       PINCTRL_PIN(162, "SDC2_DATA"),
-       PINCTRL_PIN(163, "UFS_RESET"),
+       PINCTRL_PIN(156, "UFS_RESET"),
+       PINCTRL_PIN(157, "SDC1_RCLK"),
+       PINCTRL_PIN(158, "SDC1_CLK"),
+       PINCTRL_PIN(159, "SDC1_CMD"),
+       PINCTRL_PIN(160, "SDC1_DATA"),
+       PINCTRL_PIN(161, "SDC2_CLK"),
+       PINCTRL_PIN(162, "SDC2_CMD"),
+       PINCTRL_PIN(163, "SDC2_DATA"),
 };
 
 #define DECLARE_MSM_GPIO_PINS(pin) \
index dfd805e7686244c1b7faf5d4293db607503a6fbe..7b0576f71376e6ceb376030b68072a5d9c066538 100644 (file)
@@ -4,14 +4,13 @@
 #
 config PINCTRL_SAMSUNG
        bool
-       depends on OF_GPIO
+       select GPIOLIB
        select PINMUX
        select PINCONF
 
 config PINCTRL_EXYNOS
        bool "Pinctrl common driver part for Samsung Exynos SoCs"
-       depends on OF_GPIO
-       depends on ARCH_EXYNOS || ARCH_S5PV210 || COMPILE_TEST
+       depends on ARCH_EXYNOS || ARCH_S5PV210 || (COMPILE_TEST && OF)
        select PINCTRL_SAMSUNG
        select PINCTRL_EXYNOS_ARM if ARM && (ARCH_EXYNOS || ARCH_S5PV210)
        select PINCTRL_EXYNOS_ARM64 if ARM64 && ARCH_EXYNOS
@@ -26,12 +25,10 @@ config PINCTRL_EXYNOS_ARM64
 
 config PINCTRL_S3C24XX
        bool "Samsung S3C24XX SoC pinctrl driver"
-       depends on OF_GPIO
-       depends on ARCH_S3C24XX || COMPILE_TEST
+       depends on ARCH_S3C24XX || (COMPILE_TEST && OF)
        select PINCTRL_SAMSUNG
 
 config PINCTRL_S3C64XX
        bool "Samsung S3C64XX SoC pinctrl driver"
-       depends on OF_GPIO
-       depends on ARCH_S3C64XX || COMPILE_TEST
+       depends on ARCH_S3C64XX || (COMPILE_TEST && OF)
        select PINCTRL_SAMSUNG
index d291819c2f77c0a07114586d68e00654462a99d5..cb965cf9370575027126fc8a5120ed0e9c65e22b 100644 (file)
@@ -770,7 +770,7 @@ static const struct samsung_pin_bank_data fsd_pin_banks2[] __initconst = {
        EXYNOS850_PIN_BANK_EINTN(3, 0x00, "gpq0"),
 };
 
-const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = {
+static const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = {
        {
                /* pin-controller instance 0 FSYS0 data */
                .pin_banks      = fsd_pin_banks0,
index 9ed76473157076d2b3531b657e3bc4fa1043a151..f7c9459f66283b1dfdf919c5bf8a8bf8db86e31a 100644 (file)
@@ -225,6 +225,13 @@ static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset)
        pinctrl_gpio_free(chip->base + offset);
 }
 
+static int stm32_gpio_get_noclk(struct gpio_chip *chip, unsigned int offset)
+{
+       struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
+
+       return !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
+}
+
 static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
        struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
@@ -232,7 +239,7 @@ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
 
        clk_enable(bank->clk);
 
-       ret = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
+       ret = stm32_gpio_get_noclk(chip, offset);
 
        clk_disable(bank->clk);
 
@@ -311,8 +318,12 @@ static void stm32_gpio_irq_trigger(struct irq_data *d)
        struct stm32_gpio_bank *bank = d->domain->host_data;
        int level;
 
+       /* Do not access the GPIO if this is not LEVEL triggered IRQ. */
+       if (!(bank->irq_type[d->hwirq] & IRQ_TYPE_LEVEL_MASK))
+               return;
+
        /* If level interrupt type then retrig */
-       level = stm32_gpio_get(&bank->gpio_chip, d->hwirq);
+       level = stm32_gpio_get_noclk(&bank->gpio_chip, d->hwirq);
        if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) ||
            (level == 1 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_HIGH))
                irq_chip_retrigger_hierarchy(d);
@@ -354,6 +365,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
 {
        struct stm32_gpio_bank *bank = irq_data->domain->host_data;
        struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
+       unsigned long flags;
        int ret;
 
        ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq);
@@ -367,6 +379,10 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
                return ret;
        }
 
+       flags = irqd_get_trigger_type(irq_data);
+       if (flags & IRQ_TYPE_LEVEL_MASK)
+               clk_enable(bank->clk);
+
        return 0;
 }
 
@@ -374,6 +390,9 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data)
 {
        struct stm32_gpio_bank *bank = irq_data->domain->host_data;
 
+       if (bank->irq_type[irq_data->hwirq] & IRQ_TYPE_LEVEL_MASK)
+               clk_disable(bank->clk);
+
        gpiochip_unlock_as_irq(&bank->gpio_chip, irq_data->hwirq);
 }
 
index 9748345b9298fcf2d150de0a93349917be11439f..cd657760a644721d52c86ca70928b8a7eb0669c8 100644 (file)
@@ -419,7 +419,15 @@ static const struct sppctl_grp sp7021grps_prbp[] = {
        EGRP("PROBE_PORT2", 2, pins_prp2),
 };
 
+/*
+ * Due to compatible reason, the first valid item should start at the third
+ * position of the array. Please keep the first two items of the table
+ * no use (dummy).
+ */
 const struct sppctl_func sppctl_list_funcs[] = {
+       FNCN("", pinmux_type_fpmx, 0x00, 0, 0),
+       FNCN("", pinmux_type_fpmx, 0x00, 0, 0),
+
        FNCN("L2SW_CLK_OUT",        pinmux_type_fpmx, 0x00, 0, 7),
        FNCN("L2SW_MAC_SMI_MDC",    pinmux_type_fpmx, 0x00, 8, 7),
        FNCN("L2SW_LED_FLASH0",     pinmux_type_fpmx, 0x01, 0, 7),
index 2801ca706273254768393fe1136916e0c38aa17a..b8fc88a23cf4b6800c5eb7d5ad87668d60ccb6c4 100644 (file)
@@ -51,7 +51,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
                  SUNXI_FUNCTION(0x3, "pwm0"),          /* PWM0 */
                  SUNXI_FUNCTION(0x4, "i2s"),           /* IN */
                  SUNXI_FUNCTION(0x5, "uart1"),         /* RX */
-                 SUNXI_FUNCTION(0x6, "spi1")),         /* MOSI */
+                 SUNXI_FUNCTION(0x6, "spi1")),         /* CLK */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 3),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
@@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
                  SUNXI_FUNCTION(0x2, "lcd"),           /* D20 */
-                 SUNXI_FUNCTION(0x3, "lvds1"),         /* RX */
+                 SUNXI_FUNCTION(0x3, "uart2"),         /* RX */
                  SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)),
        SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
index d384d36098c270c93dae5a93330ae286492b7e5e..a62c5dfe42d64395a97bafba04e88d632615034d 100644 (file)
@@ -817,7 +817,7 @@ err_cpkg:
 err_bus:
        return status;
 }
-module_init(ssam_core_init);
+subsys_initcall(ssam_core_init);
 
 static void __exit ssam_core_exit(void)
 {
index c1775db29efb67f0c192490ac3542e7492b313fa..ec66fde28e75a873cd728664b6955ca2508952b9 100644 (file)
@@ -99,6 +99,14 @@ static const struct dmi_system_id dmi_lid_device_table[] = {
                },
                .driver_data = (void *)lid_device_props_l4D,
        },
+       {
+               .ident = "Surface Pro 8",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 8"),
+               },
+               .driver_data = (void *)lid_device_props_l4B,
+       },
        {
                .ident = "Surface Book 1",
                .matches = {
index 6b8b3ab8db486c483ce146bfdfeb8bc5f216fc46..3463629f87640a2efcaf477a731e5a9058a4fe14 100644 (file)
@@ -584,21 +584,6 @@ static struct platform_driver acerhdf_driver = {
        .remove = acerhdf_remove,
 };
 
-/* checks if str begins with start */
-static int str_starts_with(const char *str, const char *start)
-{
-       unsigned long str_len = 0, start_len = 0;
-
-       str_len = strlen(str);
-       start_len = strlen(start);
-
-       if (str_len >= start_len &&
-                       !strncmp(str, start, start_len))
-               return 1;
-
-       return 0;
-}
-
 /* check hardware */
 static int __init acerhdf_check_hardware(void)
 {
@@ -651,9 +636,9 @@ static int __init acerhdf_check_hardware(void)
                 * check if actual hardware BIOS vendor, product and version
                 * IDs start with the strings of BIOS table entry
                 */
-               if (str_starts_with(vendor, bt->vendor) &&
-                               str_starts_with(product, bt->product) &&
-                               str_starts_with(version, bt->version)) {
+               if (strstarts(vendor, bt->vendor) &&
+                   strstarts(product, bt->product) &&
+                   strstarts(version, bt->version)) {
                        found = 1;
                        break;
                }
index e9d0dbbb28870d2f103c79a8a125709a6e332819..fa4123dbdf7ff6a407f488c86c130fba1bcf771b 100644 (file)
@@ -160,8 +160,10 @@ MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism");
 
 static struct amd_pmc_dev pmc;
 static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret);
-static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data);
 static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf);
+#ifdef CONFIG_SUSPEND
+static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data);
+#endif
 
 static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
 {
@@ -325,6 +327,7 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table
        return 0;
 }
 
+#ifdef CONFIG_SUSPEND
 static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
 {
        struct smu_metrics table;
@@ -338,6 +341,7 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
                dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n",
                         table.timein_s0i3_lastcapture);
 }
+#endif
 
 #ifdef CONFIG_DEBUG_FS
 static int smu_fw_info_show(struct seq_file *s, void *unused)
@@ -569,6 +573,7 @@ out_unlock:
        return rc;
 }
 
+#ifdef CONFIG_SUSPEND
 static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
 {
        switch (dev->cpu_id) {
@@ -694,6 +699,7 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = {
        .prepare = amd_pmc_s2idle_prepare,
        .restore = amd_pmc_s2idle_restore,
 };
+#endif
 
 static const struct pci_device_id pmc_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
@@ -733,6 +739,7 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
        return 0;
 }
 
+#ifdef CONFIG_SUSPEND
 static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data)
 {
        int err;
@@ -753,6 +760,7 @@ static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data)
 
        return 0;
 }
+#endif
 
 static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf)
 {
@@ -859,9 +867,11 @@ static int amd_pmc_probe(struct platform_device *pdev)
 
        amd_pmc_get_smu_version(dev);
        platform_set_drvdata(pdev, dev);
+#ifdef CONFIG_SUSPEND
        err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops);
        if (err)
                dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n");
+#endif
 
        amd_pmc_dbgfs_register(dev);
        return 0;
@@ -875,7 +885,9 @@ static int amd_pmc_remove(struct platform_device *pdev)
 {
        struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
 
+#ifdef CONFIG_SUSPEND
        acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops);
+#endif
        amd_pmc_dbgfs_unregister(dev);
        pci_dev_put(dev->rdev);
        mutex_destroy(&dev->lock);
index 2104a2621e5070e1c963576b77e29c7afff06e4d..0e7fbed8a50d6ff50ab4b69e2b326182450728f8 100644 (file)
@@ -371,10 +371,14 @@ static int asus_wmi_evaluate_method_buf(u32 method_id,
 
        switch (obj->type) {
        case ACPI_TYPE_BUFFER:
-               if (obj->buffer.length > size)
+               if (obj->buffer.length > size) {
                        err = -ENOSPC;
-               if (obj->buffer.length == 0)
+                       break;
+               }
+               if (obj->buffer.length == 0) {
                        err = -ENODATA;
+                       break;
+               }
 
                memcpy(ret_buffer, obj->buffer.pointer, obj->buffer.length);
                break;
@@ -2223,9 +2227,10 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available,
 
        err = fan_curve_get_factory_default(asus, fan_dev);
        if (err) {
-               if (err == -ENODEV || err == -ENODATA)
-                       return 0;
-               return err;
+               pr_debug("fan_curve_get_factory_default(0x%08x) failed: %d\n",
+                        fan_dev, err);
+               /* Don't cause probe to fail on devices without fan-curves */
+               return 0;
        }
 
        *available = true;
index f5c72e33f9ae340b5e2aa75d2cd3694e5bfa19e4..05534287bc26b65db179ab0e29f83508ae5e4b87 100644 (file)
@@ -10,7 +10,6 @@
 
 #define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
 
-#include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/dmi.h>
 #include <linux/err.h>
index 8230e7a68a5ebdf1b81f3974c44330503725e87d..1321687d923ed5e7d81738e4bf7e7baf1264f7c1 100644 (file)
@@ -80,6 +80,10 @@ static struct quirk_entry quirk_dell_inspiron_1012 = {
        .kbd_led_not_present = true,
 };
 
+static struct quirk_entry quirk_dell_latitude_7520 = {
+       .kbd_missing_ac_tag = true,
+};
+
 static struct platform_driver platform_driver = {
        .driver = {
                .name = "dell-laptop",
@@ -336,6 +340,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
                },
                .driver_data = &quirk_dell_inspiron_1012,
        },
+       {
+               .callback = dmi_matched,
+               .ident = "Dell Latitude 7520",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 7520"),
+               },
+               .driver_data = &quirk_dell_latitude_7520,
+       },
        { }
 };
 
index 658bab4b79648b7cdbcb9455c6053e2c322b0c8f..e87a931eab1e72c1c864fd0edf038eb03077e8bd 100644 (file)
@@ -148,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+       DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
        DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
index a46d3b53bf61af76325301b69c5233309b78a7d9..7a059e02c26565ec16ab3690958dc68a3cc508b4 100644 (file)
@@ -236,7 +236,7 @@ enum ppfear_regs {
 #define ADL_LPM_STATUS_LATCH_EN_OFFSET         0x1704
 #define ADL_LPM_LIVE_STATUS_OFFSET             0x1764
 
-const char *pmc_lpm_modes[] = {
+static const char *pmc_lpm_modes[] = {
        "S0i2.0",
        "S0i2.1",
        "S0i2.2",
index 6b6f3e2a617afc3d8a26604a25ed5f34ba0c75b9..f73ecfd4a30922f10c8cf2c611cc87f9b568c635 100644 (file)
@@ -103,7 +103,7 @@ static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxilia
        auxiliary_set_drvdata(auxdev, priv);
 
        for (i = 0; i < intel_vsec_dev->num_resources; i++) {
-               struct intel_pmt_entry *entry = &priv->entry[i];
+               struct intel_pmt_entry *entry = &priv->entry[priv->num_entries];
 
                ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i);
                if (ret < 0)
index 11d14cc0ff0aeac9105eea7c7b6dc82748c4e672..c830e98dfa386852de5bb973722d3b98e132efbc 100644 (file)
@@ -51,6 +51,8 @@
 #define MBOX_TIMEOUT_US                        2000
 #define MBOX_TIMEOUT_ACQUIRE_US                1000
 #define MBOX_POLLING_PERIOD_US         100
+#define MBOX_ACQUIRE_NUM_RETRIES       5
+#define MBOX_ACQUIRE_RETRY_DELAY_MS    500
 #define MBOX_MAX_PACKETS               4
 
 #define MBOX_OWNER_NONE                        0x00
@@ -81,7 +83,7 @@ enum sdsi_command {
 
 struct sdsi_mbox_info {
        u64     *payload;
-       u64     *buffer;
+       void    *buffer;
        int     size;
 };
 
@@ -163,9 +165,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf
        total = 0;
        loop = 0;
        do {
-               int offset = SDSI_SIZE_MAILBOX * loop;
-               void __iomem *addr = priv->mbox_addr + offset;
-               u64 *buf = info->buffer + offset / SDSI_SIZE_CMD;
+               void *buf = info->buffer + (SDSI_SIZE_MAILBOX * loop);
                u32 packet_size;
 
                /* Poll on ready bit */
@@ -196,7 +196,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf
                        break;
                }
 
-               sdsi_memcpy64_fromio(buf, addr, round_up(packet_size, SDSI_SIZE_CMD));
+               sdsi_memcpy64_fromio(buf, priv->mbox_addr, round_up(packet_size, SDSI_SIZE_CMD));
 
                total += packet_size;
 
@@ -243,8 +243,8 @@ static int sdsi_mbox_cmd_write(struct sdsi_priv *priv, struct sdsi_mbox_info *in
                  FIELD_PREP(CTRL_PACKET_SIZE, info->size);
        writeq(control, priv->control_addr);
 
-       /* Poll on run_busy bit */
-       ret = readq_poll_timeout(priv->control_addr, control, !(control & CTRL_RUN_BUSY),
+       /* Poll on ready bit */
+       ret = readq_poll_timeout(priv->control_addr, control, control & CTRL_READY,
                                 MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_US);
 
        if (ret)
@@ -263,7 +263,7 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info
 {
        u64 control;
        u32 owner;
-       int ret;
+       int ret, retries = 0;
 
        lockdep_assert_held(&priv->mb_lock);
 
@@ -273,13 +273,29 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info
        if (owner != MBOX_OWNER_NONE)
                return -EBUSY;
 
-       /* Write first qword of payload */
-       writeq(info->payload[0], priv->mbox_addr);
+       /*
+        * If there has been no recent transaction and no one owns the mailbox,
+        * we should acquire it in under 1ms. However, if we've accessed it
+        * recently it may take up to 2.1 seconds to acquire it again.
+        */
+       do {
+               /* Write first qword of payload */
+               writeq(info->payload[0], priv->mbox_addr);
+
+               /* Check for ownership */
+               ret = readq_poll_timeout(priv->control_addr, control,
+                       FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_INBAND,
+                       MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US);
+
+               if (FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_NONE &&
+                   retries++ < MBOX_ACQUIRE_NUM_RETRIES) {
+                       msleep(MBOX_ACQUIRE_RETRY_DELAY_MS);
+                       continue;
+               }
 
-       /* Check for ownership */
-       ret = readq_poll_timeout(priv->control_addr, control,
-                                FIELD_GET(CTRL_OWNER, control) & MBOX_OWNER_INBAND,
-                                MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US);
+               /* Either we got it or someone else did. */
+               break;
+       } while (true);
 
        return ret;
 }
index c61f804dd44e820ac65563517b5f1a1a2d02a685..8f9c571d725789ecd4f3e47388b7f86dcdfef403 100644 (file)
@@ -212,6 +212,9 @@ static int __init intel_uncore_init(void)
        const struct x86_cpu_id *id;
        int ret;
 
+       if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+               return -ENODEV;
+
        id = x86_match_cpu(intel_uncore_cpu_ids);
        if (!id)
                return -ENODEV;
index c1d9ed9b7b672d45b30a4671113a105001a2eb1b..19f6b456234f8bd5443b38ceba4189c5c1ba7d03 100644 (file)
@@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev,
 
        if (value > samsung->kbd_led.max_brightness)
                value = samsung->kbd_led.max_brightness;
-       else if (value < 0)
-               value = 0;
 
        samsung->kbd_led_wk = value;
        queue_work(samsung->led_workqueue, &samsung->kbd_led_work);
index bce17ca9794742ee002b9d63d8d06a03cc56bfec..a01a92769c1a367ac207b7d7a55b52060130468d 100644 (file)
@@ -740,16 +740,8 @@ static ssize_t certificate_store(struct kobject *kobj,
        if (!tlmi_priv.certificate_support)
                return -EOPNOTSUPP;
 
-       new_cert = kstrdup(buf, GFP_KERNEL);
-       if (!new_cert)
-               return -ENOMEM;
-       /* Strip out CR if one is present */
-       strip_cr(new_cert);
-
        /* If empty then clear installed certificate */
-       if (new_cert[0] == '\0') { /* Clear installed certificate */
-               kfree(new_cert);
-
+       if ((buf[0] == '\0') || (buf[0] == '\n')) { /* Clear installed certificate */
                /* Check that signature is set */
                if (!setting->signature || !setting->signature[0])
                        return -EACCES;
@@ -763,14 +755,16 @@ static ssize_t certificate_store(struct kobject *kobj,
 
                ret = tlmi_simple_call(LENOVO_CLEAR_BIOS_CERT_GUID, auth_str);
                kfree(auth_str);
-               if (ret)
-                       return ret;
 
-               kfree(setting->certificate);
-               setting->certificate = NULL;
-               return count;
+               return ret ?: count;
        }
 
+       new_cert = kstrdup(buf, GFP_KERNEL);
+       if (!new_cert)
+               return -ENOMEM;
+       /* Strip out CR if one is present */
+       strip_cr(new_cert);
+
        if (setting->cert_installed) {
                /* Certificate is installed so this is an update */
                if (!setting->signature || !setting->signature[0]) {
@@ -792,21 +786,14 @@ static ssize_t certificate_store(struct kobject *kobj,
                auth_str = kasprintf(GFP_KERNEL, "%s,%s",
                                new_cert, setting->password);
        }
-       if (!auth_str) {
-               kfree(new_cert);
+       kfree(new_cert);
+       if (!auth_str)
                return -ENOMEM;
-       }
 
        ret = tlmi_simple_call(guid, auth_str);
        kfree(auth_str);
-       if (ret) {
-               kfree(new_cert);
-               return ret;
-       }
 
-       kfree(setting->certificate);
-       setting->certificate = new_cert;
-       return count;
+       return ret ?: count;
 }
 
 static struct kobj_attribute auth_certificate = __ATTR_WO(certificate);
@@ -1194,6 +1181,10 @@ static void tlmi_release_attr(void)
 
        kset_unregister(tlmi_priv.attribute_kset);
 
+       /* Free up any saved signatures */
+       kfree(tlmi_priv.pwd_admin->signature);
+       kfree(tlmi_priv.pwd_admin->save_signature);
+
        /* Authentication structures */
        sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
        kobject_put(&tlmi_priv.pwd_admin->kobj);
@@ -1210,11 +1201,6 @@ static void tlmi_release_attr(void)
        }
 
        kset_unregister(tlmi_priv.authentication_kset);
-
-       /* Free up any saved certificates/signatures */
-       kfree(tlmi_priv.pwd_admin->certificate);
-       kfree(tlmi_priv.pwd_admin->signature);
-       kfree(tlmi_priv.pwd_admin->save_signature);
 }
 
 static int tlmi_sysfs_init(void)
index 4f69df6eed07d0fad2adad2868a6141a926a19ca..4daba6151cd670bea54342769942c649ddd36c08 100644 (file)
@@ -63,7 +63,6 @@ struct tlmi_pwd_setting {
        int index; /*Used for HDD and NVME auth */
        enum level_option level;
        bool cert_installed;
-       char *certificate;
        char *signature;
        char *save_signature;
 };
index c568fae56db29ab979c8dfbdb30f9dd852627559..e6cb4a14cdd4705cb479e4e21b52dbf3d472dda1 100644 (file)
@@ -309,6 +309,20 @@ struct ibm_init_struct {
        struct ibm_struct *data;
 };
 
+/* DMI Quirks */
+struct quirk_entry {
+       bool btusb_bug;
+       u32 s2idle_bug_mmio;
+};
+
+static struct quirk_entry quirk_btusb_bug = {
+       .btusb_bug = true,
+};
+
+static struct quirk_entry quirk_s2idle_bug = {
+       .s2idle_bug_mmio = 0xfed80380,
+};
+
 static struct {
        u32 bluetooth:1;
        u32 hotkey:1;
@@ -338,6 +352,7 @@ static struct {
        u32 hotkey_poll_active:1;
        u32 has_adaptive_kbd:1;
        u32 kbd_lang:1;
+       struct quirk_entry *quirks;
 } tp_features;
 
 static struct {
@@ -4359,9 +4374,10 @@ static void bluetooth_exit(void)
        bluetooth_shutdown();
 }
 
-static const struct dmi_system_id bt_fwbug_list[] __initconst = {
+static const struct dmi_system_id fwbug_list[] __initconst = {
        {
                .ident = "ThinkPad E485",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20KU"),
@@ -4369,6 +4385,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad E585",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20KV"),
@@ -4376,6 +4393,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A285 - 20MW",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MW"),
@@ -4383,6 +4401,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A285 - 20MX",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MX"),
@@ -4390,6 +4409,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A485 - 20MU",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MU"),
@@ -4397,14 +4417,125 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
        },
        {
                .ident = "ThinkPad A485 - 20MV",
+               .driver_data = &quirk_btusb_bug,
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "20MV"),
                },
        },
+       {
+               .ident = "L14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20X5"),
+               }
+       },
+       {
+               .ident = "T14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XF"),
+               }
+       },
+       {
+               .ident = "X13 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XH"),
+               }
+       },
+       {
+               .ident = "T14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XK"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UD"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UE"),
+               }
+       },
+       {
+               .ident = "T14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
+               }
+       },
+       {
+               .ident = "P14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"),
+               }
+       },
+       {
+               .ident = "P14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21A0"),
+               }
+       },
        {}
 };
 
+#ifdef CONFIG_SUSPEND
+/*
+ * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0
+ * transition that occurs specifically when exiting suspend to idle which can cause
+ * large delays during resume when the IOMMU translation layer is enabled (the default
+ * behavior) for NVME devices:
+ *
+ * To avoid this firmware problem, skip the SMI handler on these machines before the
+ * D0 transition occurs.
+ */
+static void thinkpad_acpi_amd_s2idle_restore(void)
+{
+       struct resource *res;
+       void __iomem *addr;
+       u8 val;
+
+       res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1,
+                                       "thinkpad_acpi_pm80");
+       if (!res)
+               return;
+
+       addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1);
+       if (!addr)
+               goto cleanup_resource;
+
+       val = ioread8(addr);
+       iowrite8(val & ~BIT(0), addr);
+
+       iounmap(addr);
+cleanup_resource:
+       release_resource(res);
+}
+
+static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = {
+       .restore = thinkpad_acpi_amd_s2idle_restore,
+};
+#endif
+
 static const struct pci_device_id fwbug_cards_ids[] __initconst = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) },
@@ -4419,7 +4550,8 @@ static int __init have_bt_fwbug(void)
         * Some AMD based ThinkPads have a firmware bug that calling
         * "GBDC" will cause bluetooth on Intel wireless cards blocked
         */
-       if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) {
+       if (tp_features.quirks && tp_features.quirks->btusb_bug &&
+           pci_dev_present(fwbug_cards_ids)) {
                vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL,
                        FW_BUG "disable bluetooth subdriver for Intel cards\n");
                return 1;
@@ -8748,24 +8880,27 @@ static int __init fan_init(struct ibm_init_struct *iibm)
                        fan_status_access_mode = TPACPI_FAN_RD_TPEC;
                        if (quirks & TPACPI_FAN_Q1)
                                fan_quirk1_setup();
-                       if (quirks & TPACPI_FAN_2FAN) {
-                               tp_features.second_fan = 1;
-                               pr_info("secondary fan support enabled\n");
-                       }
-                       if (quirks & TPACPI_FAN_2CTL) {
-                               tp_features.second_fan = 1;
-                               tp_features.second_fan_ctl = 1;
-                               pr_info("secondary fan control enabled\n");
-                       }
                        /* Try and probe the 2nd fan */
+                       tp_features.second_fan = 1; /* needed for get_speed to work */
                        res = fan2_get_speed(&speed);
                        if (res >= 0) {
                                /* It responded - so let's assume it's there */
                                tp_features.second_fan = 1;
                                tp_features.second_fan_ctl = 1;
                                pr_info("secondary fan control detected & enabled\n");
+                       } else {
+                               /* Fan not auto-detected */
+                               tp_features.second_fan = 0;
+                               if (quirks & TPACPI_FAN_2FAN) {
+                                       tp_features.second_fan = 1;
+                                       pr_info("secondary fan support enabled\n");
+                               }
+                               if (quirks & TPACPI_FAN_2CTL) {
+                                       tp_features.second_fan = 1;
+                                       tp_features.second_fan_ctl = 1;
+                                       pr_info("secondary fan control enabled\n");
+                               }
                        }
-
                } else {
                        pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n");
                        return -ENODEV;
@@ -11455,6 +11590,10 @@ static void thinkpad_acpi_module_exit(void)
 
        tpacpi_lifecycle = TPACPI_LIFE_EXITING;
 
+#ifdef CONFIG_SUSPEND
+       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio)
+               acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops);
+#endif
        if (tpacpi_hwmon)
                hwmon_device_unregister(tpacpi_hwmon);
        if (tp_features.sensors_pdrv_registered)
@@ -11496,6 +11635,7 @@ static void thinkpad_acpi_module_exit(void)
 
 static int __init thinkpad_acpi_module_init(void)
 {
+       const struct dmi_system_id *dmi_id;
        int ret, i;
 
        tpacpi_lifecycle = TPACPI_LIFE_INIT;
@@ -11535,6 +11675,10 @@ static int __init thinkpad_acpi_module_init(void)
                return -ENODEV;
        }
 
+       dmi_id = dmi_first_match(fwbug_list);
+       if (dmi_id)
+               tp_features.quirks = dmi_id->driver_data;
+
        /* Device initialization */
        tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1,
                                                        NULL, 0);
@@ -11623,6 +11767,13 @@ static int __init thinkpad_acpi_module_init(void)
                tp_features.input_device_registered = 1;
        }
 
+#ifdef CONFIG_SUSPEND
+       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) {
+               if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops))
+                       pr_info("Using s2idle quirk to avoid %s platform firmware bug\n",
+                               (dmi_id && dmi_id->ident) ? dmi_id->ident : "");
+       }
+#endif
        return 0;
 }
 
index ea02c8dcd7484a1652238a1dc739ebb02056a56b..d925cb137e1268f38cc91b5b7988dd6273beac18 100644 (file)
@@ -604,6 +604,12 @@ int power_supply_get_battery_info(struct power_supply *psy,
        err = samsung_sdi_battery_get_info(&psy->dev, value, &info);
        if (!err)
                goto out_ret_pointer;
+       else if (err == -ENODEV)
+               /*
+                * Device does not have a static battery.
+                * Proceed to look for a simple battery.
+                */
+               err = 0;
 
        if (strcmp("simple-battery", value)) {
                err = -ENODEV;
index 9d59f277f51984188027eaac00cfe0360f044166..b33daab798b98a93260f94c20a1608ad0cabf54f 100644 (file)
@@ -824,6 +824,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = {
                        .constant_charge_current_max_ua = 900000,
                        .constant_charge_voltage_max_uv = 4200000,
                        .charge_term_current_ua = 200000,
+                       .charge_restart_voltage_uv = 4170000,
                        .maintenance_charge = samsung_maint_charge_table,
                        .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table),
                        .alert_low_temp_charge_current_ua = 300000,
@@ -867,6 +868,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = {
                        .constant_charge_current_max_ua = 1500000,
                        .constant_charge_voltage_max_uv = 4350000,
                        .charge_term_current_ua = 120000,
+                       .charge_restart_voltage_uv = 4300000,
                        .maintenance_charge = samsung_maint_charge_table,
                        .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table),
                        .alert_low_temp_charge_current_ua = 300000,
index 0feaa4b453175b22abd933eeb38f6fba6e95bb62..860672d6a03c04ca590aa0c809653555a3255ef7 100644 (file)
@@ -300,7 +300,7 @@ struct ptp_ocp {
        struct platform_device  *spi_flash;
        struct clk_hw           *i2c_clk;
        struct timer_list       watchdog;
-       const struct ocp_attr_group *attr_tbl;
+       const struct attribute_group **attr_group;
        const struct ptp_ocp_eeprom_map *eeprom_map;
        struct dentry           *debug_root;
        time64_t                gnss_lost;
@@ -841,7 +841,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val)
 }
 
 static void
-ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
+ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, s64 delta_ns)
 {
        struct timespec64 ts;
        unsigned long flags;
@@ -850,7 +850,8 @@ ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
        spin_lock_irqsave(&bp->lock, flags);
        err = __ptp_ocp_gettime_locked(bp, &ts, NULL);
        if (likely(!err)) {
-               timespec64_add_ns(&ts, delta_ns);
+               set_normalized_timespec64(&ts, ts.tv_sec,
+                                         ts.tv_nsec + delta_ns);
                __ptp_ocp_settime_locked(bp, &ts);
        }
        spin_unlock_irqrestore(&bp->lock, flags);
@@ -1557,7 +1558,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s)
        start_ns = ktime_set(ts.tv_sec, ts.tv_nsec) + NSEC_PER_MSEC;
        if (!s->start) {
                /* roundup() does not work on 32-bit systems */
-               s->start = DIV_ROUND_UP_ULL(start_ns, s->period);
+               s->start = DIV64_U64_ROUND_UP(start_ns, s->period);
                s->start = ktime_add(s->start, s->phase);
        }
 
@@ -1835,6 +1836,42 @@ ptp_ocp_signal_init(struct ptp_ocp *bp)
                                             bp->signal_out[i]->mem);
 }
 
+static void
+ptp_ocp_attr_group_del(struct ptp_ocp *bp)
+{
+       sysfs_remove_groups(&bp->dev.kobj, bp->attr_group);
+       kfree(bp->attr_group);
+}
+
+static int
+ptp_ocp_attr_group_add(struct ptp_ocp *bp,
+                      const struct ocp_attr_group *attr_tbl)
+{
+       int count, i;
+       int err;
+
+       count = 0;
+       for (i = 0; attr_tbl[i].cap; i++)
+               if (attr_tbl[i].cap & bp->fw_cap)
+                       count++;
+
+       bp->attr_group = kcalloc(count + 1, sizeof(struct attribute_group *),
+                                GFP_KERNEL);
+       if (!bp->attr_group)
+               return -ENOMEM;
+
+       count = 0;
+       for (i = 0; attr_tbl[i].cap; i++)
+               if (attr_tbl[i].cap & bp->fw_cap)
+                       bp->attr_group[count++] = attr_tbl[i].group;
+
+       err = sysfs_create_groups(&bp->dev.kobj, bp->attr_group);
+       if (err)
+               bp->attr_group[0] = NULL;
+
+       return err;
+}
+
 static void
 ptp_ocp_sma_init(struct ptp_ocp *bp)
 {
@@ -1904,7 +1941,6 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
        bp->flash_start = 1024 * 4096;
        bp->eeprom_map = fb_eeprom_map;
        bp->fw_version = ioread32(&bp->image->version);
-       bp->attr_tbl = fb_timecard_groups;
        bp->fw_cap = OCP_CAP_BASIC;
 
        ver = bp->fw_version & 0xffff;
@@ -1918,6 +1954,10 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
        ptp_ocp_sma_init(bp);
        ptp_ocp_signal_init(bp);
 
+       err = ptp_ocp_attr_group_add(bp, fb_timecard_groups);
+       if (err)
+               return err;
+
        err = ptp_ocp_fb_set_pins(bp);
        if (err)
                return err;
@@ -3388,7 +3428,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
 {
        struct pps_device *pps;
        char buf[32];
-       int i, err;
 
        if (bp->gnss_port != -1) {
                sprintf(buf, "ttyS%d", bp->gnss_port);
@@ -3413,14 +3452,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
        if (pps)
                ptp_ocp_symlink(bp, pps->dev, "pps");
 
-       for (i = 0; bp->attr_tbl[i].cap; i++) {
-               if (!(bp->attr_tbl[i].cap & bp->fw_cap))
-                       continue;
-               err = sysfs_create_group(&bp->dev.kobj, bp->attr_tbl[i].group);
-               if (err)
-                       return err;
-       }
-
        ptp_ocp_debugfs_add_device(bp);
 
        return 0;
@@ -3492,15 +3523,11 @@ static void
 ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
 {
        struct device *dev = &bp->dev;
-       int i;
 
        sysfs_remove_link(&dev->kobj, "ttyGNSS");
        sysfs_remove_link(&dev->kobj, "ttyMAC");
        sysfs_remove_link(&dev->kobj, "ptp");
        sysfs_remove_link(&dev->kobj, "pps");
-       if (bp->attr_tbl)
-               for (i = 0; bp->attr_tbl[i].cap; i++)
-                       sysfs_remove_group(&dev->kobj, bp->attr_tbl[i].group);
 }
 
 static void
@@ -3510,6 +3537,7 @@ ptp_ocp_detach(struct ptp_ocp *bp)
 
        ptp_ocp_debugfs_remove_device(bp);
        ptp_ocp_detach_sysfs(bp);
+       ptp_ocp_attr_group_del(bp);
        if (timer_pending(&bp->watchdog))
                del_timer_sync(&bp->watchdog);
        if (bp->ts0)
index 05147d2c384289e0bb90fb513c252c01d4907569..485e58b264c044fffa3cd569dc2f89c6f54e5aa8 100644 (file)
@@ -292,6 +292,7 @@ enum atc2603c_reg_ids {
        .bypass_mask = BIT(5), \
        .active_discharge_reg = ATC2603C_PMU_SWITCH_CTL, \
        .active_discharge_mask = BIT(1), \
+       .active_discharge_on = BIT(1), \
        .owner = THIS_MODULE, \
 }
 
index f21e3f8b21f23b54393eb5c6dc9ead9e61ced07e..8e13dea354a21e7ca69da47cca2a083d417ada72 100644 (file)
@@ -285,6 +285,7 @@ static const unsigned int rtq2134_buck_ramp_delay_table[] = {
                .enable_mask = RTQ2134_VOUTEN_MASK, \
                .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \
                .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \
+               .active_discharge_on = RTQ2134_ACTDISCHG_MASK, \
                .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \
                .ramp_mask = RTQ2134_RSPUP_MASK, \
                .ramp_delay_table = rtq2134_buck_ramp_delay_table, \
index cadea0344486fa6e63eabb79cee8159ae0f067e5..40befdd9dfa922bf5878f2d46bc9bf6cec413223 100644 (file)
@@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = {
 };
 
 static const struct regulator_desc wm8994_ldo_desc[] = {
+       {
+               .name = "LDO1",
+               .id = 1,
+               .type = REGULATOR_VOLTAGE,
+               .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1,
+               .vsel_reg = WM8994_LDO_1,
+               .vsel_mask = WM8994_LDO1_VSEL_MASK,
+               .ops = &wm8994_ldo1_ops,
+               .min_uV = 2400000,
+               .uV_step = 100000,
+               .enable_time = 3000,
+               .off_on_delay = 36000,
+               .owner = THIS_MODULE,
+       },
+       {
+               .name = "LDO2",
+               .id = 2,
+               .type = REGULATOR_VOLTAGE,
+               .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1,
+               .vsel_reg = WM8994_LDO_2,
+               .vsel_mask = WM8994_LDO2_VSEL_MASK,
+               .ops = &wm8994_ldo2_ops,
+               .enable_time = 3000,
+               .off_on_delay = 36000,
+               .owner = THIS_MODULE,
+       },
+};
+
+static const struct regulator_desc wm8958_ldo_desc[] = {
        {
                .name = "LDO1",
                .id = 1,
@@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev)
         * regulator core and we need not worry about it on the
         * error path.
         */
-       ldo->regulator = devm_regulator_register(&pdev->dev,
-                                                &wm8994_ldo_desc[id],
-                                                &config);
+       if (ldo->wm8994->type == WM8994) {
+               ldo->regulator = devm_regulator_register(&pdev->dev,
+                                                        &wm8994_ldo_desc[id],
+                                                        &config);
+       } else {
+               ldo->regulator = devm_regulator_register(&pdev->dev,
+                                                        &wm8958_ldo_desc[id],
+                                                        &config);
+       }
+
        if (IS_ERR(ldo->regulator)) {
                ret = PTR_ERR(ldo->regulator);
                dev_err(wm8994->dev, "Failed to register LDO%d: %d\n",
index 1e831503885066b132faafd811d43837e3b72154..a8dde46063602dcc799db1e7bac5a3eef9fdf9e7 100644 (file)
@@ -121,7 +121,9 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev)
                return dev_err_probe(dev, PTR_ERR(priv->rstc),
                                     "failed to get reset\n");
 
-       reset_control_deassert(priv->rstc);
+       error = reset_control_deassert(priv->rstc);
+       if (error)
+               return error;
 
        priv->rcdev.ops = &rzg2l_usbphy_ctrl_reset_ops;
        priv->rcdev.of_reset_n_cells = 1;
index 24d3395964cc4ba2d3934a32299fef3f667cd45f..4c5bba52b10593890c9a95ccea929148db9cdc5f 100644 (file)
@@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
        struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc);
        struct mrq_reset_request request;
        struct tegra_bpmp_message msg;
+       int err;
 
        memset(&request, 0, sizeof(request));
        request.cmd = command;
@@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc,
        msg.tx.data = &request;
        msg.tx.size = sizeof(request);
 
-       return tegra_bpmp_transfer(bpmp, &msg);
+       err = tegra_bpmp_transfer(bpmp, &msg);
+       if (err)
+               return err;
+       if (msg.rx.ret)
+               return -EINVAL;
+
+       return 0;
 }
 
 static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc,
index 5b3e4da6340612f78eec1e691966b60cde6661ec..5252ce4cbda4ecd71a6134f63c4cc672c190ed4f 100644 (file)
@@ -370,6 +370,23 @@ CLK_OF_DECLARE_DRIVER(sun8i_h3_rtc_clk, "allwinner,sun8i-h3-rtc",
 CLK_OF_DECLARE_DRIVER(sun50i_h5_rtc_clk, "allwinner,sun50i-h5-rtc",
                      sun8i_h3_rtc_clk_init);
 
+static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
+       .rc_osc_rate = 16000000,
+       .fixed_prescaler = 32,
+       .has_prescaler = 1,
+       .has_out_clk = 1,
+       .export_iosc = 1,
+       .has_losc_en = 1,
+       .has_auto_swt = 1,
+};
+
+static void __init sun50i_h6_rtc_clk_init(struct device_node *node)
+{
+       sun6i_rtc_clk_init(node, &sun50i_h6_rtc_data);
+}
+CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc",
+                     sun50i_h6_rtc_clk_init);
+
 /*
  * The R40 user manual is self-conflicting on whether the prescaler is
  * fixed or configurable. The clock diagram shows it as fixed, but there
index 8e87a31e329d047c0172f55d31bb9e6989d71755..ba6d787896606d093183c1b0ad1eec5c9fdba2d7 100644 (file)
@@ -1422,6 +1422,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
                if (!cqr->lpm)
                        cqr->lpm = dasd_path_get_opm(device);
        }
+       /*
+        * remember the amount of formatted tracks to prevent double format on
+        * ESE devices
+        */
+       if (cqr->block)
+               cqr->trkcount = atomic_read(&cqr->block->trkcount);
+
        if (cqr->cpmode == 1) {
                rc = ccw_device_tm_start(device->cdev, cqr->cpaddr,
                                         (long) cqr, cqr->lpm);
@@ -1639,6 +1646,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        unsigned long now;
        int nrf_suppressed = 0;
        int fp_suppressed = 0;
+       struct request *req;
        u8 *sense = NULL;
        int expires;
 
@@ -1739,7 +1747,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        }
 
        if (dasd_ese_needs_format(cqr->block, irb)) {
-               if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
+               req = dasd_get_callback_data(cqr);
+               if (!req) {
+                       cqr->status = DASD_CQR_ERROR;
+                       return;
+               }
+               if (rq_data_dir(req) == READ) {
                        device->discipline->ese_read(cqr, irb);
                        cqr->status = DASD_CQR_SUCCESS;
                        cqr->stopclk = now;
@@ -2765,8 +2778,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
                 * complete a request partially.
                 */
                if (proc_bytes) {
-                       blk_update_request(req, BLK_STS_OK,
-                                          blk_rq_bytes(req) - proc_bytes);
+                       blk_update_request(req, BLK_STS_OK, proc_bytes);
                        blk_mq_requeue_request(req, true);
                } else if (likely(!blk_should_fake_timeout(req->q))) {
                        blk_mq_complete_request(req);
index 8410a25a65c139347a6306221593bfb12ff13312..836838f7d6867ae6328ac4f9342eccee2e0a54c9 100644 (file)
@@ -1480,7 +1480,7 @@ static int dasd_eckd_pe_handler(struct dasd_device *device,
 {
        struct pe_handler_work_data *data;
 
-       data = kmalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA);
+       data = kzalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA);
        if (!data) {
                if (mutex_trylock(&dasd_pe_handler_mutex)) {
                        data = pe_handler_worker;
@@ -1488,9 +1488,6 @@ static int dasd_eckd_pe_handler(struct dasd_device *device,
                } else {
                        return -ENOMEM;
                }
-       } else {
-               memset(data, 0, sizeof(*data));
-               data->isglobal = 0;
        }
        INIT_WORK(&data->worker, do_pe_handler_work);
        dasd_get_device(device);
@@ -3083,13 +3080,24 @@ static int dasd_eckd_format_device(struct dasd_device *base,
 }
 
 static bool test_and_set_format_track(struct dasd_format_entry *to_format,
-                                     struct dasd_block *block)
+                                     struct dasd_ccw_req *cqr)
 {
+       struct dasd_block *block = cqr->block;
        struct dasd_format_entry *format;
        unsigned long flags;
        bool rc = false;
 
        spin_lock_irqsave(&block->format_lock, flags);
+       if (cqr->trkcount != atomic_read(&block->trkcount)) {
+               /*
+                * The number of formatted tracks has changed after request
+                * start and we can not tell if the current track was involved.
+                * To avoid data corruption treat it as if the current track is
+                * involved
+                */
+               rc = true;
+               goto out;
+       }
        list_for_each_entry(format, &block->format_list, list) {
                if (format->track == to_format->track) {
                        rc = true;
@@ -3109,6 +3117,7 @@ static void clear_format_track(struct dasd_format_entry *format,
        unsigned long flags;
 
        spin_lock_irqsave(&block->format_lock, flags);
+       atomic_inc(&block->trkcount);
        list_del_init(&format->list);
        spin_unlock_irqrestore(&block->format_lock, flags);
 }
@@ -3145,7 +3154,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
        sector_t curr_trk;
        int rc;
 
-       req = cqr->callback_data;
+       req = dasd_get_callback_data(cqr);
        block = cqr->block;
        base = block->base;
        private = base->private;
@@ -3170,8 +3179,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
        }
        format->track = curr_trk;
        /* test if track is already in formatting by another thread */
-       if (test_and_set_format_track(format, block))
+       if (test_and_set_format_track(format, cqr)) {
+               /* this is no real error so do not count down retries */
+               cqr->retries++;
                return ERR_PTR(-EEXIST);
+       }
 
        fdata.start_unit = curr_trk;
        fdata.stop_unit = curr_trk;
@@ -3270,12 +3282,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
                                cqr->proc_bytes = blk_count * blksize;
                                return 0;
                        }
-                       if (dst && !skip_block) {
-                               dst += off;
+                       if (dst && !skip_block)
                                memset(dst, 0, blksize);
-                       } else {
+                       else
                                skip_block--;
-                       }
+                       dst += blksize;
                        blk_count++;
                }
        }
index e084f4deddddd26429c01a38bde53962f258fb8b..60be7f7bf2d167d02e1526cc7367adfd8e0e1642 100644 (file)
@@ -782,7 +782,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
        blk_queue_segment_boundary(q, PAGE_SIZE - 1);
 
        q->limits.discard_granularity = logical_block_size;
-       q->limits.discard_alignment = PAGE_SIZE;
 
        /* Calculate max_discard_sectors and make it PAGE aligned */
        max_bytes = USHRT_MAX * logical_block_size;
@@ -791,7 +790,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
 
        blk_queue_max_discard_sectors(q, max_discard_sectors);
        blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static int dasd_fba_pe_handler(struct dasd_device *device,
index 3b7af00a7825fecc8aeade5102ea0f7c56bdc98e..83b918b84b4aeeb9a083eca13f54ea01e42684c0 100644 (file)
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
        void (*callback)(struct dasd_ccw_req *, void *data);
        void *callback_data;
        unsigned int proc_bytes;        /* bytes for partial completion */
+       unsigned int trkcount;          /* count formatted tracks */
 };
 
 /*
@@ -610,6 +611,7 @@ struct dasd_block {
 
        struct list_head format_list;
        spinlock_t format_lock;
+       atomic_t trkcount;
 };
 
 struct dasd_attention_data {
@@ -756,6 +758,18 @@ dasd_check_blocksize(int bsize)
        return 0;
 }
 
+/*
+ * return the callback data of the original request in case there are
+ * ERP requests build on top of it
+ */
+static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr)
+{
+       while (cqr->refers)
+               cqr = cqr->refers;
+
+       return cqr->callback_data;
+}
+
 /* externals in dasd.c */
 #define DASD_PROFILE_OFF        0
 #define DASD_PROFILE_ON         1
index 88abfb5e8045c6135c3e58572f697880c16586e3..8ac213a551418da387e96825acea509001382c28 100644 (file)
@@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo)
                ctcm_clear_busy_do(dev);
        }
 
-       kfree(mpcginfo);
-
        return;
 
 }
@@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
                                                CTCM_FUNTAIL, dev->name);
                        priv->stats.rx_dropped++;
                        /* mpcginfo only used for non-data transfers */
-                       kfree(mpcginfo);
                        if (do_debug_data)
                                ctcmpc_dump_skb(pskb, -8);
                }
+               kfree(mpcginfo);
        }
 done:
 
@@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg)
                }
                break;
        }
-       kfree(mpcginfo);
 
        CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n",
                __func__, ch->id, grp->outstanding_xid2,
@@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg)
                mpc_validate_xid(mpcginfo);
                break;
        }
-       kfree(mpcginfo);
        return;
 }
 
index ded1930a00b2d8f04f7dc8c1f4a2cc20b29dbf47..e3813a7aa5e68ff1d3573e50bc6f624f8ef4cc6f 100644 (file)
@@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev,
        struct ctcm_priv *priv = dev_get_drvdata(dev);
        int rc;
 
-       ndev = priv->channel[CTCM_READ]->netdev;
-       if (!(priv && priv->channel[CTCM_READ] && ndev)) {
+       if (!(priv && priv->channel[CTCM_READ] &&
+             priv->channel[CTCM_READ]->netdev)) {
                CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev");
                return -ENODEV;
        }
+       ndev = priv->channel[CTCM_READ]->netdev;
 
        rc = kstrtouint(buf, 0, &bs1);
        if (rc)
index bab9b34926c6881d1388e6c328aa704b58868613..84c8981317b4602a2695f8eb29f67288eaeceecd 100644 (file)
@@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
                        lcs_schedule_recovery(card);
                        break;
                case LCS_CMD_STOPLAN:
-                       pr_warn("Stoplan for %s initiated by LGW\n",
-                               card->dev->name);
-                       if (card->dev)
+                       if (card->dev) {
+                               pr_warn("Stoplan for %s initiated by LGW\n",
+                                       card->dev->name);
                                netif_carrier_off(card->dev);
+                       }
                        break;
                default:
                        LCS_DBF_TEXT(5, trace, "noLGWcmd");
index 5f554a3a0f626f54e9b20d6dd0af7deeb7b97973..caeebfb67149981a132c468bd56a9f8d35b12bce 100644 (file)
@@ -317,14 +317,18 @@ enum {
 };
 
 struct aha152x_cmd_priv {
-       struct scsi_pointer scsi_pointer;
+       char *ptr;
+       int this_residual;
+       struct scatterlist *buffer;
+       int status;
+       int message;
+       int sent_command;
+       int phase;
 };
 
-static struct scsi_pointer *aha152x_scsi_pointer(struct scsi_cmnd *cmd)
+static struct aha152x_cmd_priv *aha152x_priv(struct scsi_cmnd *cmd)
 {
-       struct aha152x_cmd_priv *acmd = scsi_cmd_priv(cmd);
-
-       return &acmd->scsi_pointer;
+       return scsi_cmd_priv(cmd);
 }
 
 MODULE_AUTHOR("Jürgen Fischer");
@@ -890,17 +894,16 @@ void aha152x_release(struct Scsi_Host *shpnt)
 static int setup_expected_interrupts(struct Scsi_Host *shpnt)
 {
        if(CURRENT_SC) {
-               struct scsi_pointer *scsi_pointer =
-                       aha152x_scsi_pointer(CURRENT_SC);
+               struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
-               scsi_pointer->phase |= 1 << 16;
+               acp->phase |= 1 << 16;
 
-               if (scsi_pointer->phase & selecting) {
+               if (acp->phase & selecting) {
                        SETPORT(SSTAT1, SELTO);
                        SETPORT(SIMODE0, ENSELDO | (DISCONNECTED_SC ? ENSELDI : 0));
                        SETPORT(SIMODE1, ENSELTIMO);
                } else {
-                       SETPORT(SIMODE0, (scsi_pointer->phase & spiordy) ? ENSPIORDY : 0);
+                       SETPORT(SIMODE0, (acp->phase & spiordy) ? ENSPIORDY : 0);
                        SETPORT(SIMODE1, ENPHASEMIS | ENSCSIRST | ENSCSIPERR | ENBUSFREE);
                }
        } else if(STATE==seldi) {
@@ -924,17 +927,16 @@ static int setup_expected_interrupts(struct Scsi_Host *shpnt)
 static int aha152x_internal_queue(struct scsi_cmnd *SCpnt,
                                  struct completion *complete, int phase)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(SCpnt);
+       struct aha152x_cmd_priv *acp = aha152x_priv(SCpnt);
        struct Scsi_Host *shpnt = SCpnt->device->host;
        unsigned long flags;
 
-       scsi_pointer->phase        = not_issued | phase;
-       scsi_pointer->Status       = 0x1; /* Ilegal status by SCSI standard */
-       scsi_pointer->Message      = 0;
-       scsi_pointer->have_data_in = 0;
-       scsi_pointer->sent_command = 0;
+       acp->phase        = not_issued | phase;
+       acp->status       = 0x1; /* Illegal status by SCSI standard */
+       acp->message      = 0;
+       acp->sent_command = 0;
 
-       if (scsi_pointer->phase & (resetting | check_condition)) {
+       if (acp->phase & (resetting | check_condition)) {
                if (!SCpnt->host_scribble || SCSEM(SCpnt) || SCNEXT(SCpnt)) {
                        scmd_printk(KERN_ERR, SCpnt, "cannot reuse command\n");
                        return FAILED;
@@ -957,15 +959,15 @@ static int aha152x_internal_queue(struct scsi_cmnd *SCpnt,
           SCp.phase            : current state of the command */
 
        if ((phase & resetting) || !scsi_sglist(SCpnt)) {
-               scsi_pointer->ptr           = NULL;
-               scsi_pointer->this_residual = 0;
+               acp->ptr           = NULL;
+               acp->this_residual = 0;
                scsi_set_resid(SCpnt, 0);
-               scsi_pointer->buffer        = NULL;
+               acp->buffer        = NULL;
        } else {
                scsi_set_resid(SCpnt, scsi_bufflen(SCpnt));
-               scsi_pointer->buffer        = scsi_sglist(SCpnt);
-               scsi_pointer->ptr           = SG_ADDRESS(scsi_pointer->buffer);
-               scsi_pointer->this_residual = scsi_pointer->buffer->length;
+               acp->buffer        = scsi_sglist(SCpnt);
+               acp->ptr           = SG_ADDRESS(acp->buffer);
+               acp->this_residual = acp->buffer->length;
        }
 
        DO_LOCK(flags);
@@ -1015,7 +1017,7 @@ static void reset_done(struct scsi_cmnd *SCpnt)
 
 static void aha152x_scsi_done(struct scsi_cmnd *SCpnt)
 {
-       if (aha152x_scsi_pointer(SCpnt)->phase & resetting)
+       if (aha152x_priv(SCpnt)->phase & resetting)
                reset_done(SCpnt);
        else
                scsi_done(SCpnt);
@@ -1101,7 +1103,7 @@ static int aha152x_device_reset(struct scsi_cmnd * SCpnt)
 
        DO_LOCK(flags);
 
-       if (aha152x_scsi_pointer(SCpnt)->phase & resetted) {
+       if (aha152x_priv(SCpnt)->phase & resetted) {
                HOSTDATA(shpnt)->commands--;
                if (!HOSTDATA(shpnt)->commands)
                        SETPORT(PORTA, 0);
@@ -1395,31 +1397,30 @@ static void busfree_run(struct Scsi_Host *shpnt)
        SETPORT(SSTAT1, CLRBUSFREE);
 
        if(CURRENT_SC) {
-               struct scsi_pointer *scsi_pointer =
-                       aha152x_scsi_pointer(CURRENT_SC);
+               struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
 #if defined(AHA152X_STAT)
                action++;
 #endif
-               scsi_pointer->phase &= ~syncneg;
+               acp->phase &= ~syncneg;
 
-               if (scsi_pointer->phase & completed) {
+               if (acp->phase & completed) {
                        /* target sent COMMAND COMPLETE */
-                       done(shpnt, scsi_pointer->Status, DID_OK);
+                       done(shpnt, acp->status, DID_OK);
 
-               } else if (scsi_pointer->phase & aborted) {
-                       done(shpnt, scsi_pointer->Status, DID_ABORT);
+               } else if (acp->phase & aborted) {
+                       done(shpnt, acp->status, DID_ABORT);
 
-               } else if (scsi_pointer->phase & resetted) {
-                       done(shpnt, scsi_pointer->Status, DID_RESET);
+               } else if (acp->phase & resetted) {
+                       done(shpnt, acp->status, DID_RESET);
 
-               } else if (scsi_pointer->phase & disconnected) {
+               } else if (acp->phase & disconnected) {
                        /* target sent DISCONNECT */
 #if defined(AHA152X_STAT)
                        HOSTDATA(shpnt)->disconnections++;
 #endif
                        append_SC(&DISCONNECTED_SC, CURRENT_SC);
-                       scsi_pointer->phase |= 1 << 16;
+                       acp->phase |= 1 << 16;
                        CURRENT_SC = NULL;
 
                } else {
@@ -1438,24 +1439,23 @@ static void busfree_run(struct Scsi_Host *shpnt)
                action++;
 #endif
 
-               if (aha152x_scsi_pointer(DONE_SC)->phase & check_condition) {
+               if (aha152x_priv(DONE_SC)->phase & check_condition) {
                        struct scsi_cmnd *cmd = HOSTDATA(shpnt)->done_SC;
                        struct aha152x_scdata *sc = SCDATA(cmd);
 
                        scsi_eh_restore_cmnd(cmd, &sc->ses);
 
-                       aha152x_scsi_pointer(cmd)->Status = SAM_STAT_CHECK_CONDITION;
+                       aha152x_priv(cmd)->status = SAM_STAT_CHECK_CONDITION;
 
                        HOSTDATA(shpnt)->commands--;
                        if (!HOSTDATA(shpnt)->commands)
                                SETPORT(PORTA, 0);      /* turn led off */
-               } else if (aha152x_scsi_pointer(DONE_SC)->Status ==
-                          SAM_STAT_CHECK_CONDITION) {
+               } else if (aha152x_priv(DONE_SC)->status == SAM_STAT_CHECK_CONDITION) {
 #if defined(AHA152X_STAT)
                        HOSTDATA(shpnt)->busfree_with_check_condition++;
 #endif
 
-                       if(!(aha152x_scsi_pointer(DONE_SC)->phase & not_issued)) {
+                       if (!(aha152x_priv(DONE_SC)->phase & not_issued)) {
                                struct aha152x_scdata *sc;
                                struct scsi_cmnd *ptr = DONE_SC;
                                DONE_SC=NULL;
@@ -1480,7 +1480,7 @@ static void busfree_run(struct Scsi_Host *shpnt)
                        if (!HOSTDATA(shpnt)->commands)
                                SETPORT(PORTA, 0);      /* turn led off */
 
-                       if (!(aha152x_scsi_pointer(ptr)->phase & resetting)) {
+                       if (!(aha152x_priv(ptr)->phase & resetting)) {
                                kfree(ptr->host_scribble);
                                ptr->host_scribble=NULL;
                        }
@@ -1503,13 +1503,12 @@ static void busfree_run(struct Scsi_Host *shpnt)
        DO_UNLOCK(flags);
 
        if(CURRENT_SC) {
-               struct scsi_pointer *scsi_pointer =
-                       aha152x_scsi_pointer(CURRENT_SC);
+               struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
 #if defined(AHA152X_STAT)
                action++;
 #endif
-               scsi_pointer->phase |= selecting;
+               acp->phase |= selecting;
 
                /* clear selection timeout */
                SETPORT(SSTAT1, SELTO);
@@ -1537,13 +1536,13 @@ static void busfree_run(struct Scsi_Host *shpnt)
  */
 static void seldo_run(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
+       struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
        SETPORT(SCSISIG, 0);
        SETPORT(SSTAT1, CLRBUSFREE);
        SETPORT(SSTAT1, CLRPHASECHG);
 
-       scsi_pointer->phase &= ~(selecting | not_issued);
+       acp->phase &= ~(selecting | not_issued);
 
        SETPORT(SCSISEQ, 0);
 
@@ -1558,12 +1557,12 @@ static void seldo_run(struct Scsi_Host *shpnt)
 
        ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun));
 
-       if (scsi_pointer->phase & aborting) {
+       if (acp->phase & aborting) {
                ADDMSGO(ABORT);
-       } else if (scsi_pointer->phase & resetting) {
+       } else if (acp->phase & resetting) {
                ADDMSGO(BUS_DEVICE_RESET);
        } else if (SYNCNEG==0 && SYNCHRONOUS) {
-               scsi_pointer->phase |= syncneg;
+               acp->phase |= syncneg;
                MSGOLEN += spi_populate_sync_msg(&MSGO(MSGOLEN), 50, 8);
                SYNCNEG=1;              /* negotiation in progress */
        }
@@ -1578,7 +1577,7 @@ static void seldo_run(struct Scsi_Host *shpnt)
  */
 static void selto_run(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
+       struct aha152x_cmd_priv *acp;
 
        SETPORT(SCSISEQ, 0);
        SETPORT(SSTAT1, CLRSELTIMO);
@@ -1586,9 +1585,10 @@ static void selto_run(struct Scsi_Host *shpnt)
        if (!CURRENT_SC)
                return;
 
-       scsi_pointer->phase &= ~selecting;
+       acp = aha152x_priv(CURRENT_SC);
+       acp->phase &= ~selecting;
 
-       if (scsi_pointer->phase & aborted)
+       if (acp->phase & aborted)
                done(shpnt, SAM_STAT_GOOD, DID_ABORT);
        else if (TESTLO(SSTAT0, SELINGO))
                done(shpnt, SAM_STAT_GOOD, DID_BUS_BUSY);
@@ -1616,10 +1616,9 @@ static void seldi_run(struct Scsi_Host *shpnt)
        SETPORT(SSTAT1, CLRPHASECHG);
 
        if(CURRENT_SC) {
-               struct scsi_pointer *scsi_pointer =
-                       aha152x_scsi_pointer(CURRENT_SC);
+               struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
-               if (!(scsi_pointer->phase & not_issued))
+               if (!(acp->phase & not_issued))
                        scmd_printk(KERN_ERR, CURRENT_SC,
                                    "command should not have been issued yet\n");
 
@@ -1676,7 +1675,7 @@ static void seldi_run(struct Scsi_Host *shpnt)
 static void msgi_run(struct Scsi_Host *shpnt)
 {
        for(;;) {
-               struct scsi_pointer *scsi_pointer;
+               struct aha152x_cmd_priv *acp;
                int sstat1 = GETPORT(SSTAT1);
 
                if(sstat1 & (PHASECHG|PHASEMIS|BUSFREE) || !(sstat1 & REQINIT))
@@ -1714,9 +1713,9 @@ static void msgi_run(struct Scsi_Host *shpnt)
                                continue;
                        }
 
-                       scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
-                       scsi_pointer->Message = MSGI(0);
-                       scsi_pointer->phase &= ~disconnected;
+                       acp = aha152x_priv(CURRENT_SC);
+                       acp->message = MSGI(0);
+                       acp->phase &= ~disconnected;
 
                        MSGILEN=0;
 
@@ -1724,8 +1723,8 @@ static void msgi_run(struct Scsi_Host *shpnt)
                        continue;
                }
 
-               scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
-               scsi_pointer->Message = MSGI(0);
+               acp = aha152x_priv(CURRENT_SC);
+               acp->message = MSGI(0);
 
                switch (MSGI(0)) {
                case DISCONNECT:
@@ -1733,11 +1732,11 @@ static void msgi_run(struct Scsi_Host *shpnt)
                                scmd_printk(KERN_WARNING, CURRENT_SC,
                                            "target was not allowed to disconnect\n");
 
-                       scsi_pointer->phase |= disconnected;
+                       acp->phase |= disconnected;
                        break;
 
                case COMMAND_COMPLETE:
-                       scsi_pointer->phase |= completed;
+                       acp->phase |= completed;
                        break;
 
                case MESSAGE_REJECT:
@@ -1867,11 +1866,9 @@ static void msgi_end(struct Scsi_Host *shpnt)
  */
 static void msgo_init(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
-
        if(MSGOLEN==0) {
-               if ((scsi_pointer->phase & syncneg) && SYNCNEG==2 &&
-                   SYNCRATE==0) {
+               if ((aha152x_priv(CURRENT_SC)->phase & syncneg) &&
+                   SYNCNEG == 2 && SYNCRATE == 0) {
                        ADDMSGO(IDENTIFY(RECONNECT, CURRENT_SC->device->lun));
                } else {
                        scmd_printk(KERN_INFO, CURRENT_SC,
@@ -1888,7 +1885,7 @@ static void msgo_init(struct Scsi_Host *shpnt)
  */
 static void msgo_run(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
+       struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
        while(MSGO_I<MSGOLEN) {
                if (TESTLO(SSTAT0, SPIORDY))
@@ -1901,13 +1898,13 @@ static void msgo_run(struct Scsi_Host *shpnt)
 
 
                if (MSGO(MSGO_I) & IDENTIFY_BASE)
-                       scsi_pointer->phase |= identified;
+                       acp->phase |= identified;
 
                if (MSGO(MSGO_I)==ABORT)
-                       scsi_pointer->phase |= aborted;
+                       acp->phase |= aborted;
 
                if (MSGO(MSGO_I)==BUS_DEVICE_RESET)
-                       scsi_pointer->phase |= resetted;
+                       acp->phase |= resetted;
 
                SETPORT(SCSIDAT, MSGO(MSGO_I++));
        }
@@ -1936,7 +1933,7 @@ static void msgo_end(struct Scsi_Host *shpnt)
  */
 static void cmd_init(struct Scsi_Host *shpnt)
 {
-       if (aha152x_scsi_pointer(CURRENT_SC)->sent_command) {
+       if (aha152x_priv(CURRENT_SC)->sent_command) {
                scmd_printk(KERN_ERR, CURRENT_SC,
                            "command already sent\n");
                done(shpnt, SAM_STAT_GOOD, DID_ERROR);
@@ -1967,7 +1964,7 @@ static void cmd_end(struct Scsi_Host *shpnt)
                            "command sent incompletely (%d/%d)\n",
                            CMD_I, CURRENT_SC->cmd_len);
        else
-               aha152x_scsi_pointer(CURRENT_SC)->sent_command++;
+               aha152x_priv(CURRENT_SC)->sent_command++;
 }
 
 /*
@@ -1979,7 +1976,7 @@ static void status_run(struct Scsi_Host *shpnt)
        if (TESTLO(SSTAT0, SPIORDY))
                return;
 
-       aha152x_scsi_pointer(CURRENT_SC)->Status = GETPORT(SCSIDAT);
+       aha152x_priv(CURRENT_SC)->status = GETPORT(SCSIDAT);
 
 }
 
@@ -2003,7 +2000,7 @@ static void datai_init(struct Scsi_Host *shpnt)
 
 static void datai_run(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer;
+       struct aha152x_cmd_priv *acp;
        unsigned long the_time;
        int fifodata, data_count;
 
@@ -2041,36 +2038,35 @@ static void datai_run(struct Scsi_Host *shpnt)
                        fifodata = GETPORT(FIFOSTAT);
                }
 
-               scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
-               if (scsi_pointer->this_residual > 0) {
-                       while (fifodata > 0 && scsi_pointer->this_residual > 0) {
-                               data_count = fifodata > scsi_pointer->this_residual ?
-                                               scsi_pointer->this_residual :
-                                               fifodata;
+               acp = aha152x_priv(CURRENT_SC);
+               if (acp->this_residual > 0) {
+                       while (fifodata > 0 && acp->this_residual > 0) {
+                               data_count = fifodata > acp->this_residual ?
+                                               acp->this_residual : fifodata;
                                fifodata -= data_count;
 
                                if (data_count & 1) {
                                        SETPORT(DMACNTRL0, ENDMA|_8BIT);
-                                       *scsi_pointer->ptr++ = GETPORT(DATAPORT);
-                                       scsi_pointer->this_residual--;
+                                       *acp->ptr++ = GETPORT(DATAPORT);
+                                       acp->this_residual--;
                                        DATA_LEN++;
                                        SETPORT(DMACNTRL0, ENDMA);
                                }
 
                                if (data_count > 1) {
                                        data_count >>= 1;
-                                       insw(DATAPORT, scsi_pointer->ptr, data_count);
-                                       scsi_pointer->ptr += 2 * data_count;
-                                       scsi_pointer->this_residual -= 2 * data_count;
+                                       insw(DATAPORT, acp->ptr, data_count);
+                                       acp->ptr += 2 * data_count;
+                                       acp->this_residual -= 2 * data_count;
                                        DATA_LEN += 2 * data_count;
                                }
 
-                               if (scsi_pointer->this_residual == 0 &&
-                                   !sg_is_last(scsi_pointer->buffer)) {
+                               if (acp->this_residual == 0 &&
+                                   !sg_is_last(acp->buffer)) {
                                        /* advance to next buffer */
-                                       scsi_pointer->buffer = sg_next(scsi_pointer->buffer);
-                                       scsi_pointer->ptr           = SG_ADDRESS(scsi_pointer->buffer);
-                                       scsi_pointer->this_residual = scsi_pointer->buffer->length;
+                                       acp->buffer = sg_next(acp->buffer);
+                                       acp->ptr = SG_ADDRESS(acp->buffer);
+                                       acp->this_residual = acp->buffer->length;
                                }
                        }
                } else if (fifodata > 0) {
@@ -2138,15 +2134,15 @@ static void datao_init(struct Scsi_Host *shpnt)
 
 static void datao_run(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
+       struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
        unsigned long the_time;
        int data_count;
 
        /* until phase changes or all data sent */
-       while (TESTLO(DMASTAT, INTSTAT) && scsi_pointer->this_residual > 0) {
+       while (TESTLO(DMASTAT, INTSTAT) && acp->this_residual > 0) {
                data_count = 128;
-               if (data_count > scsi_pointer->this_residual)
-                       data_count = scsi_pointer->this_residual;
+               if (data_count > acp->this_residual)
+                       data_count = acp->this_residual;
 
                if(TESTLO(DMASTAT, DFIFOEMP)) {
                        scmd_printk(KERN_ERR, CURRENT_SC,
@@ -2157,26 +2153,25 @@ static void datao_run(struct Scsi_Host *shpnt)
 
                if(data_count & 1) {
                        SETPORT(DMACNTRL0,WRITE_READ|ENDMA|_8BIT);
-                       SETPORT(DATAPORT, *scsi_pointer->ptr++);
-                       scsi_pointer->this_residual--;
+                       SETPORT(DATAPORT, *acp->ptr++);
+                       acp->this_residual--;
                        CMD_INC_RESID(CURRENT_SC, -1);
                        SETPORT(DMACNTRL0,WRITE_READ|ENDMA);
                }
 
                if(data_count > 1) {
                        data_count >>= 1;
-                       outsw(DATAPORT, scsi_pointer->ptr, data_count);
-                       scsi_pointer->ptr           += 2 * data_count;
-                       scsi_pointer->this_residual -= 2 * data_count;
+                       outsw(DATAPORT, acp->ptr, data_count);
+                       acp->ptr += 2 * data_count;
+                       acp->this_residual -= 2 * data_count;
                        CMD_INC_RESID(CURRENT_SC, -2 * data_count);
                }
 
-               if (scsi_pointer->this_residual == 0 &&
-                   !sg_is_last(scsi_pointer->buffer)) {
+               if (acp->this_residual == 0 && !sg_is_last(acp->buffer)) {
                        /* advance to next buffer */
-                       scsi_pointer->buffer = sg_next(scsi_pointer->buffer);
-                       scsi_pointer->ptr           = SG_ADDRESS(scsi_pointer->buffer);
-                       scsi_pointer->this_residual = scsi_pointer->buffer->length;
+                       acp->buffer = sg_next(acp->buffer);
+                       acp->ptr = SG_ADDRESS(acp->buffer);
+                       acp->this_residual = acp->buffer->length;
                }
 
                the_time=jiffies + 100*HZ;
@@ -2192,7 +2187,7 @@ static void datao_run(struct Scsi_Host *shpnt)
 
 static void datao_end(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
+       struct aha152x_cmd_priv *acp = aha152x_priv(CURRENT_SC);
 
        if(TESTLO(DMASTAT, DFIFOEMP)) {
                u32 datao_cnt = GETSTCNT();
@@ -2211,10 +2206,9 @@ static void datao_end(struct Scsi_Host *shpnt)
                        sg = sg_next(sg);
                }
 
-               scsi_pointer->buffer = sg;
-               scsi_pointer->ptr = SG_ADDRESS(scsi_pointer->buffer) + done;
-               scsi_pointer->this_residual = scsi_pointer->buffer->length -
-                       done;
+               acp->buffer = sg;
+               acp->ptr = SG_ADDRESS(acp->buffer) + done;
+               acp->this_residual = acp->buffer->length - done;
        }
 
        SETPORT(SXFRCTL0, CH1|CLRCH1|CLRSTCNT);
@@ -2229,7 +2223,6 @@ static void datao_end(struct Scsi_Host *shpnt)
  */
 static int update_state(struct Scsi_Host *shpnt)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(CURRENT_SC);
        int dataphase=0;
        unsigned int stat0 = GETPORT(SSTAT0);
        unsigned int stat1 = GETPORT(SSTAT1);
@@ -2244,7 +2237,7 @@ static int update_state(struct Scsi_Host *shpnt)
        } else if (stat0 & SELDI && PREVSTATE == busfree) {
                STATE=seldi;
        } else if (stat0 & SELDO && CURRENT_SC &&
-                  (scsi_pointer->phase & selecting)) {
+                  (aha152x_priv(CURRENT_SC)->phase & selecting)) {
                STATE=seldo;
        } else if(stat1 & SELTO) {
                STATE=selto;
@@ -2376,8 +2369,7 @@ static void is_complete(struct Scsi_Host *shpnt)
                        SETPORT(SXFRCTL0, CH1);
                        SETPORT(DMACNTRL0, 0);
                        if(CURRENT_SC)
-                               aha152x_scsi_pointer(CURRENT_SC)->phase &=
-                                       ~spiordy;
+                               aha152x_priv(CURRENT_SC)->phase &= ~spiordy;
                }
 
                /*
@@ -2399,8 +2391,7 @@ static void is_complete(struct Scsi_Host *shpnt)
                        SETPORT(DMACNTRL0, 0);
                        SETPORT(SXFRCTL0, CH1|SPIOEN);
                        if(CURRENT_SC)
-                               aha152x_scsi_pointer(CURRENT_SC)->phase |=
-                                       spiordy;
+                               aha152x_priv(CURRENT_SC)->phase |= spiordy;
                }
 
                /*
@@ -2490,7 +2481,7 @@ static void disp_enintr(struct Scsi_Host *shpnt)
  */
 static void show_command(struct scsi_cmnd *ptr)
 {
-       const int phase = aha152x_scsi_pointer(ptr)->phase;
+       const int phase = aha152x_priv(ptr)->phase;
 
        scsi_print_command(ptr);
        scmd_printk(KERN_DEBUG, ptr,
@@ -2538,8 +2529,8 @@ static void show_queues(struct Scsi_Host *shpnt)
 
 static void get_command(struct seq_file *m, struct scsi_cmnd * ptr)
 {
-       struct scsi_pointer *scsi_pointer = aha152x_scsi_pointer(ptr);
-       const int phase = scsi_pointer->phase;
+       struct aha152x_cmd_priv *acp = aha152x_priv(ptr);
+       const int phase = acp->phase;
        int i;
 
        seq_printf(m, "%p: target=%d; lun=%d; cmnd=( ",
@@ -2549,8 +2540,8 @@ static void get_command(struct seq_file *m, struct scsi_cmnd * ptr)
                seq_printf(m, "0x%02x ", ptr->cmnd[i]);
 
        seq_printf(m, "); resid=%d; residual=%d; buffers=%d; phase |",
-               scsi_get_resid(ptr), scsi_pointer->this_residual,
-               sg_nents(scsi_pointer->buffer) - 1);
+               scsi_get_resid(ptr), acp->this_residual,
+               sg_nents(acp->buffer) - 1);
 
        if (phase & not_issued)
                seq_puts(m, "not issued|");
index 679a4fd138746e406d95877043c6fdb0c668c035..793fe19993a90e4b71a121fe4ab55d5fa8596e52 100644 (file)
@@ -420,8 +420,6 @@ ahd_unlock(struct ahd_softc *ahd, unsigned long *flags)
 
 /* config registers for header type 0 devices */
 #define PCIR_MAPS      0x10
-#define PCIR_SUBVEND_0 0x2c
-#define PCIR_SUBDEV_0  0x2e
 
 /****************************** PCI-X definitions *****************************/
 #define PCIXR_COMMAND  0x96
index 2f0bdb9225a40183a0045c3a2d598e3230f7c851..5fad41b1ab58d40bd28a0ecff1e75fdd5f515687 100644 (file)
@@ -260,8 +260,8 @@ ahd_find_pci_device(ahd_dev_softc_t pci)
 
        vendor = ahd_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2);
        device = ahd_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2);
-       subvendor = ahd_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2);
-       subdevice = ahd_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2);
+       subvendor = ahd_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2);
+       subdevice = ahd_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2);
        full_id = ahd_compose_id(device,
                                 vendor,
                                 subdevice,
@@ -298,7 +298,7 @@ ahd_pci_config(struct ahd_softc *ahd, const struct ahd_pci_identity *entry)
         * Record if this is an HP board.
         */
        subvendor = ahd_pci_read_config(ahd->dev_softc,
-                                       PCIR_SUBVEND_0, /*bytes*/2);
+                                       PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2);
        if (subvendor == SUBID_HP)
                ahd->flags |= AHD_HP_BOARD;
 
index 4782a304e93cc420a469355a6e66a6f2dfd2f9fe..51d9f4de07346a83476414f358e6f57279f44734 100644 (file)
@@ -433,8 +433,6 @@ ahc_unlock(struct ahc_softc *ahc, unsigned long *flags)
 
 /* config registers for header type 0 devices */
 #define PCIR_MAPS      0x10
-#define PCIR_SUBVEND_0 0x2c
-#define PCIR_SUBDEV_0  0x2e
 
 typedef enum
 {
index dab3a6d12c4d22395822fb45e9fd4aba4de9493e..2d4c85426dc3eb28a93ef5036c158fb53b8feeae 100644 (file)
@@ -673,8 +673,8 @@ ahc_find_pci_device(ahc_dev_softc_t pci)
 
        vendor = ahc_pci_read_config(pci, PCIR_DEVVENDOR, /*bytes*/2);
        device = ahc_pci_read_config(pci, PCIR_DEVICE, /*bytes*/2);
-       subvendor = ahc_pci_read_config(pci, PCIR_SUBVEND_0, /*bytes*/2);
-       subdevice = ahc_pci_read_config(pci, PCIR_SUBDEV_0, /*bytes*/2);
+       subvendor = ahc_pci_read_config(pci, PCI_SUBSYSTEM_VENDOR_ID, /*bytes*/2);
+       subdevice = ahc_pci_read_config(pci, PCI_SUBSYSTEM_ID, /*bytes*/2);
        full_id = ahc_compose_id(device, vendor, subdevice, subvendor);
 
        /*
index 0103f811cc252f9749b389bf95a0071ccb4dd1fd..77654438559864829fe0a0629472f93fb780ac5b 100644 (file)
@@ -1169,7 +1169,7 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
                ofld_kcqe->fcoe_conn_context_id);
        interface = tgt->port->priv;
        if (hba != interface->hba) {
-               printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mis-match\n");
+               printk(KERN_ERR PFX "ERROR:ofld_cmpl: HBA mismatch\n");
                goto ofld_cmpl_err;
        }
        /*
@@ -1226,12 +1226,12 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba,
         * and enable
         */
        if (tgt->context_id != context_id) {
-               printk(KERN_ERR PFX "context id mis-match\n");
+               printk(KERN_ERR PFX "context id mismatch\n");
                return;
        }
        interface = tgt->port->priv;
        if (hba != interface->hba) {
-               printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n");
+               printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mismatch\n");
                goto enbl_cmpl_err;
        }
        if (!ofld_kcqe->completion_status)
index 5521469ce678b51773bdf3e3e6b46ea2140519ac..6c864b093ac94aad8d8182807101f4740ae272a9 100644 (file)
@@ -1977,7 +1977,7 @@ static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
                if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
                        break;
 
-               if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx))) {
+               if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
                        if (nopin->op_code == ISCSI_OP_NOOP_IN &&
                            nopin->itt == (u16) RESERVED_ITT) {
                                printk(KERN_ALERT "bnx2i: Unsolicited "
@@ -2398,7 +2398,7 @@ static void bnx2i_process_conn_destroy_cmpl(struct bnx2i_hba *hba,
        }
 
        if (hba != ep->hba) {
-               printk(KERN_ALERT "conn destroy- error hba mis-match\n");
+               printk(KERN_ALERT "conn destroy- error hba mismatch\n");
                return;
        }
 
@@ -2432,7 +2432,7 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
        }
 
        if (hba != ep->hba) {
-               printk(KERN_ALERT "ofld_cmpl: error hba mis-match\n");
+               printk(KERN_ALERT "ofld_cmpl: error hba mismatch\n");
                return;
        }
 
index fe86fd61a995c19c28063a7238efc6ea2af726e4..15fbd09baa943a6caf1651ceddd336b5a11b5649 100644 (file)
@@ -1721,7 +1721,7 @@ static int bnx2i_tear_down_conn(struct bnx2i_hba *hba,
                        struct iscsi_conn *conn = ep->conn->cls_conn->dd_data;
 
                        /* Must suspend all rx queue activity for this ep */
-                       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+                       set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
                }
                /* CONN_DISCONNECT timeout may or may not be an issue depending
                 * on what transcribed in TCP layer, different targets behave
index 8c7d4dda4cf29944c5f73a487852b818112103e1..4365d52c6430e79db085e175e724c5d6eb34822f 100644 (file)
@@ -1634,11 +1634,11 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
        log_debug(1 << CXGBI_DBG_PDU_RX,
                "csk 0x%p, conn 0x%p.\n", csk, conn);
 
-       if (unlikely(!conn || conn->suspend_rx)) {
+       if (unlikely(!conn || test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
                log_debug(1 << CXGBI_DBG_PDU_RX,
-                       "csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n",
+                       "csk 0x%p, conn 0x%p, id %d, conn flags 0x%lx!\n",
                        csk, conn, conn ? conn->id : 0xFF,
-                       conn ? conn->suspend_rx : 0xFF);
+                       conn ? conn->flags : 0xFF);
                return;
        }
 
index 37d06f993b761ec4feb2591435f1bd5805997156..1d9be771f3ee0508098d4e3445b345e527827b6e 100644 (file)
@@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
        case SCSI_ACCESS_STATE_OPTIMAL:
        case SCSI_ACCESS_STATE_ACTIVE:
        case SCSI_ACCESS_STATE_LBA:
-               return BLK_STS_OK;
        case SCSI_ACCESS_STATE_TRANSITIONING:
-               return BLK_STS_AGAIN;
+               return BLK_STS_OK;
        default:
                req->rq_flags |= RQF_QUIET;
                return BLK_STS_IOERR;
index 461ef8a76c4ce700d432ea1e5054bcbe673227ef..4bda2f6cb3526f0bf16e41f08b69c34e6a4050ee 100644 (file)
@@ -442,7 +442,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba,
        case SAS_PROTOCOL_INTERNAL_ABORT:
                hisi_sas_task_prep_abort(hisi_hba, slot);
                break;
-       fallthrough;
        default:
                return;
        }
index 80238e6a3c9839294b438e7f74e1fb4602c36ca5..eee1a24f7e15e8c8ac6700d5972b8b439823fe9f 100644 (file)
@@ -36,7 +36,7 @@
 
 #define IBMVSCSIS_VERSION      "v0.2"
 
-#define        INITIAL_SRP_LIMIT       800
+#define        INITIAL_SRP_LIMIT       1024
 #define        DEFAULT_MAX_SECTORS     256
 #define MAX_TXU                        1024 * 1024
 
index d690d9cf7eb15a325920a57672a200a2530ad4d1..35589b6af90d6e40896b2b5979bbb8cad6a749db 100644 (file)
@@ -413,7 +413,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent)
                                dev_warn(&ihost->pdev->dev,
                                         "%s: SCIC Controller 0x%p received "
                                         "event 0x%x for io request object "
-                                        "that doesnt exist.\n",
+                                        "that doesn't exist.\n",
                                         __func__,
                                         ihost,
                                         ent);
@@ -428,7 +428,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent)
                                dev_warn(&ihost->pdev->dev,
                                         "%s: SCIC Controller 0x%p received "
                                         "event 0x%x for remote device object "
-                                        "that doesnt exist.\n",
+                                        "that doesn't exist.\n",
                                         __func__,
                                         ihost,
                                         ent);
@@ -462,7 +462,7 @@ static void sci_controller_event_completion(struct isci_host *ihost, u32 ent)
                } else
                        dev_err(&ihost->pdev->dev,
                                "%s: SCIC Controller 0x%p received event 0x%x "
-                               "for remote device object 0x%0x that doesnt "
+                               "for remote device object 0x%0x that doesn't "
                                "exist.\n",
                                __func__,
                                ihost,
index d09926e6c8a86ef225c21f8e7ee2929f9134840c..797abf4f53995a124c0c48b586f93f9feb505d25 100644 (file)
@@ -678,7 +678,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
        struct iscsi_task *task;
        itt_t itt;
 
-       if (session->state == ISCSI_STATE_TERMINATE)
+       if (session->state == ISCSI_STATE_TERMINATE ||
+           !test_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags))
                return NULL;
 
        if (opcode == ISCSI_OP_LOGIN || opcode == ISCSI_OP_TEXT) {
@@ -1392,8 +1393,8 @@ static bool iscsi_set_conn_failed(struct iscsi_conn *conn)
        if (conn->stop_stage == 0)
                session->state = ISCSI_STATE_FAILED;
 
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+       set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
+       set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
        return true;
 }
 
@@ -1454,7 +1455,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
         * Do this after dropping the extra ref because if this was a requeue
         * it's removed from that list and cleanup_queued_task would miss it.
         */
-       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+       if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
                /*
                 * Save the task and ref in case we weren't cleaning up this
                 * task and get woken up again.
@@ -1532,7 +1533,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
        int rc = 0;
 
        spin_lock_bh(&conn->session->frwd_lock);
-       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+       if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
                ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n");
                spin_unlock_bh(&conn->session->frwd_lock);
                return -ENODATA;
@@ -1746,7 +1747,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
                goto fault;
        }
 
-       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+       if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
                reason = FAILURE_SESSION_IN_RECOVERY;
                sc->result = DID_REQUEUE << 16;
                goto fault;
@@ -1935,7 +1936,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)
 void iscsi_suspend_queue(struct iscsi_conn *conn)
 {
        spin_lock_bh(&conn->session->frwd_lock);
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
        spin_unlock_bh(&conn->session->frwd_lock);
 }
 EXPORT_SYMBOL_GPL(iscsi_suspend_queue);
@@ -1953,7 +1954,7 @@ void iscsi_suspend_tx(struct iscsi_conn *conn)
        struct Scsi_Host *shost = conn->session->host;
        struct iscsi_host *ihost = shost_priv(shost);
 
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
        if (ihost->workq)
                flush_workqueue(ihost->workq);
 }
@@ -1961,7 +1962,7 @@ EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
 
 static void iscsi_start_tx(struct iscsi_conn *conn)
 {
-       clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
        iscsi_conn_queue_work(conn);
 }
 
@@ -2214,6 +2215,8 @@ void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active)
        iscsi_suspend_tx(conn);
 
        spin_lock_bh(&session->frwd_lock);
+       clear_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags);
+
        if (!is_active) {
                /*
                 * if logout timed out before userspace could even send a PDU
@@ -3045,7 +3048,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        if (!cls_conn)
                return NULL;
        conn = cls_conn->dd_data;
-       memset(conn, 0, sizeof(*conn) + dd_size);
 
        conn->dd_data = cls_conn->dd_data + sizeof(*conn);
        conn->session = session;
@@ -3318,6 +3320,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
        spin_lock_bh(&session->frwd_lock);
        if (is_leading)
                session->leadconn = conn;
+
+       set_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags);
        spin_unlock_bh(&session->frwd_lock);
 
        /*
@@ -3330,8 +3334,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
        /*
         * Unblock xmitworker(), Login Phase will pass through.
         */
-       clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-       clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       clear_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
+       clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
        return 0;
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_bind);
index 2e9ffe3d1a55e7ccff3203947493a2183a5bd92f..883005757ddb82cdab6c74b46afd6a9586fd2e0e 100644 (file)
@@ -927,7 +927,7 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
         */
        conn->last_recv = jiffies;
 
-       if (unlikely(conn->suspend_rx)) {
+       if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
                ISCSI_DBG_TCP(conn, "Rx suspended!\n");
                *status = ISCSI_TCP_SUSPENDED;
                return 0;
index f0cf8ffdc5f3ea7f6c0ba38ca04146c2ed6e256a..0025760230e5182ffdeddb4e5a6aca470024394d 100644 (file)
@@ -897,6 +897,11 @@ enum lpfc_irq_chann_mode {
        NHT_MODE,
 };
 
+enum lpfc_hba_bit_flags {
+       FABRIC_COMANDS_BLOCKED,
+       HBA_PCI_ERR,
+};
+
 struct lpfc_hba {
        /* SCSI interface function jump table entries */
        struct lpfc_io_buf * (*lpfc_get_scsi_buf)
@@ -1043,7 +1048,6 @@ struct lpfc_hba {
                                         * Firmware supports Forced Link Speed
                                         * capability
                                         */
-#define HBA_PCI_ERR            0x80000 /* The PCI slot is offline */
 #define HBA_FLOGI_ISSUED       0x100000 /* FLOGI was issued */
 #define HBA_SHORT_CMF          0x200000 /* shorter CMF timer routine */
 #define HBA_CGN_DAY_WRAP       0x400000 /* HBA Congestion info day wraps */
@@ -1350,7 +1354,6 @@ struct lpfc_hba {
        atomic_t fabric_iocb_count;
        struct timer_list fabric_block_timer;
        unsigned long bit_flags;
-#define        FABRIC_COMANDS_BLOCKED  0
        atomic_t num_rsrc_err;
        atomic_t num_cmd_success;
        unsigned long last_rsrc_error_time;
index 96408cd6c4c81668a68d31e5c7f6793711fe7b41..9897a1aa387b63a00a4e34fcd6975fb351fb3a39 100644 (file)
@@ -670,3 +670,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport,
                                              uint32_t hash, uint8_t *buf);
 void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport);
 int lpfc_issue_els_qfpa(struct lpfc_vport *vport);
+
+void lpfc_sli_rpi_release(struct lpfc_vport *vport,
+                         struct lpfc_nodelist *ndlp);
index ef6e8cd8c26ae5adba9ab8b73457103799faed4a..872a26376ccbb84faf6cc6c5804ed5aeea6102cf 100644 (file)
@@ -1330,7 +1330,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
                    LPFC_SLI_INTF_IF_TYPE_0) {
                        /* FLOGI needs to be 3 for WQE FCFI */
-                       ct = ((SLI4_CT_FCFI >> 1) & 1) | (SLI4_CT_FCFI & 1);
+                       ct = SLI4_CT_FCFI;
                        bf_set(wqe_ct, &wqe->els_req.wqe_com, ct);
 
                        /* Set the fcfi to the fcfi we registered with */
index 0144da30e3dbd0c5448d55e0fb71ad9126312143..2b877dff5ed4fb515602c227b69a53c7fe53fdd1 100644 (file)
@@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport)
 
        ndlp = rdata->pnode;
        if (!rdata->pnode) {
-               pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n",
-                      __func__, rport, rport->scsi_target_id);
+               pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n",
+                       __func__, rport, rport->scsi_target_id);
                return -EINVAL;
        }
 
@@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                         "3181 dev_loss_callbk x%06x, rport x%px flg x%x "
-                        "load_flag x%x refcnt %d\n",
+                        "load_flag x%x refcnt %d state %d xpt x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
-                        vport->load_flag, kref_read(&ndlp->kref));
+                        vport->load_flag, kref_read(&ndlp->kref),
+                        ndlp->nlp_state, ndlp->fc4_xpt_flags);
 
        /* Don't schedule a worker thread event if the vport is going down.
         * The teardown process cleans up the node via lpfc_drop_node.
@@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
                ndlp->rport = NULL;
 
                ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
+               /* clear the NLP_XPT_REGD if the node is not registered
+                * with nvme-fc
+                */
+               if (ndlp->fc4_xpt_flags == NLP_XPT_REGD)
+                       ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
 
                /* Remove the node reference from remote_port_add now.
                 * The driver will not call remote_port_delete.
@@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
        ndlp->rport = NULL;
        spin_unlock_irqrestore(&ndlp->lock, iflags);
 
-       /* We need to hold the node by incrementing the reference
-        * count until this queued work is done
-        */
-       evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+       if (phba->worker_thread) {
+               /* We need to hold the node by incrementing the reference
+                * count until this queued work is done
+                */
+               evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+
+               spin_lock_irqsave(&phba->hbalock, iflags);
+               if (evtp->evt_arg1) {
+                       evtp->evt = LPFC_EVT_DEV_LOSS;
+                       list_add_tail(&evtp->evt_listp, &phba->work_list);
+                       lpfc_worker_wake_up(phba);
+               }
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
+       } else {
+               lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+                                "3188 worker thread is stopped %s x%06x, "
+                                " rport x%px flg x%x load_flag x%x refcnt "
+                                "%d\n", __func__, ndlp->nlp_DID,
+                                ndlp->rport, ndlp->nlp_flag,
+                                vport->load_flag, kref_read(&ndlp->kref));
+               if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) {
+                       spin_lock_irqsave(&ndlp->lock, iflags);
+                       /* Node is in dev loss.  No further transaction. */
+                       ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+                       spin_unlock_irqrestore(&ndlp->lock, iflags);
+                       lpfc_disc_state_machine(vport, ndlp, NULL,
+                                               NLP_EVT_DEVICE_RM);
+               }
 
-       spin_lock_irqsave(&phba->hbalock, iflags);
-       if (evtp->evt_arg1) {
-               evtp->evt = LPFC_EVT_DEV_LOSS;
-               list_add_tail(&evtp->evt_listp, &phba->work_list);
-               lpfc_worker_wake_up(phba);
        }
-       spin_unlock_irqrestore(&phba->hbalock, iflags);
 
        return;
 }
@@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "0203 Devloss timeout on "
                                 "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
-                                "NPort x%06x Data: x%x x%x x%x\n",
+                                "NPort x%06x Data: x%x x%x x%x refcnt %d\n",
                                 *name, *(name+1), *(name+2), *(name+3),
                                 *(name+4), *(name+5), *(name+6), *(name+7),
                                 ndlp->nlp_DID, ndlp->nlp_flag,
-                                ndlp->nlp_state, ndlp->nlp_rpi);
+                                ndlp->nlp_state, ndlp->nlp_rpi,
+                                kref_read(&ndlp->kref));
        } else {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT,
                                 "0204 Devloss timeout on "
@@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba)
        int free_evt;
        int fcf_inuse;
        uint32_t nlp_did;
+       bool hba_pci_err;
 
        spin_lock_irq(&phba->hbalock);
        while (!list_empty(&phba->work_list)) {
                list_remove_head((&phba->work_list), evtp, typeof(*evtp),
                                 evt_listp);
                spin_unlock_irq(&phba->hbalock);
+               hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
                free_evt = 1;
                switch (evtp->evt) {
                case LPFC_EVT_ELS_RETRY:
                        ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
-                       lpfc_els_retry_delay_handler(ndlp);
-                       free_evt = 0; /* evt is part of ndlp */
+                       if (!hba_pci_err) {
+                               lpfc_els_retry_delay_handler(ndlp);
+                               free_evt = 0; /* evt is part of ndlp */
+                       }
                        /* decrement the node reference count held
                         * for this queued work
                         */
@@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba)
                        break;
                case LPFC_EVT_RECOVER_PORT:
                        ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
-                       lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
-                       free_evt = 0;
+                       if (!hba_pci_err) {
+                               lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
+                               free_evt = 0;
+                       }
                        /* decrement the node reference count held for
                         * this queued work
                         */
@@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        struct lpfc_vport *vport;
        int i;
+       bool hba_pci_err;
 
+       hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
        spin_lock_irq(&phba->hbalock);
        ha_copy = phba->work_ha;
        phba->work_ha = 0;
        spin_unlock_irq(&phba->hbalock);
+       if (hba_pci_err)
+               ha_copy = 0;
 
        /* First, try to post the next mailbox command to SLI4 device */
-       if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+       if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err)
                lpfc_sli4_post_async_mbox(phba);
 
        if (ha_copy & HA_ERATT) {
@@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba)
                lpfc_handle_latt(phba);
 
        /* Handle VMID Events */
-       if (lpfc_is_vmid_enabled(phba)) {
+       if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) {
                if (phba->pport->work_port_events &
                    WORKER_CHECK_VMID_ISSUE_QFPA) {
                        lpfc_check_vmid_qfpa_issue(phba);
@@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba)
                        work_port_events = vport->work_port_events;
                        vport->work_port_events &= ~work_port_events;
                        spin_unlock_irq(&vport->work_port_lock);
+                       if (hba_pci_err)
+                               continue;
                        if (work_port_events & WORKER_DISC_TMO)
                                lpfc_disc_timeout_handler(vport);
                        if (work_port_events & WORKER_ELS_TMO)
@@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        LPFC_MBOXQ_t          *mb;
        int i;
+       int offline;
 
        if (phba->link_state == LPFC_LINK_DOWN)
                return 0;
 
        /* Block all SCSI stack I/Os */
        lpfc_scsi_dev_block(phba);
+       offline = pci_channel_offline(phba->pcidev);
 
        phba->defer_flogi_acc_flag = false;
 
@@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
        lpfc_destroy_vport_work_array(phba, vports);
 
        /* Clean up any SLI3 firmware default rpi's */
-       if (phba->sli_rev > LPFC_SLI_REV3)
+       if (phba->sli_rev > LPFC_SLI_REV3 || offline)
                goto skip_unreg_did;
 
        mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        spin_lock_irqsave(&ndlp->lock, iflags);
        if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
                spin_unlock_irqrestore(&ndlp->lock, iflags);
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+                                "0999 %s Not regd: ndlp x%px rport x%px DID "
+                                "x%x FLG x%x XPT x%x\n",
+                                 __func__, ndlp, ndlp->rport, ndlp->nlp_DID,
+                                 ndlp->nlp_flag, ndlp->fc4_xpt_flags);
                return;
        }
 
@@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
            ndlp->fc4_xpt_flags & SCSI_XPT_REGD) {
                vport->phba->nport_event_cnt++;
                lpfc_unregister_remote_port(ndlp);
+       } else if (!ndlp->rport) {
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+                                "1999 %s NDLP in devloss x%px DID x%x FLG x%x"
+                                " XPT x%x refcnt %d\n",
+                                __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag,
+                                ndlp->fc4_xpt_flags,
+                                kref_read(&ndlp->kref));
        }
 
        if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) {
@@ -5371,6 +5422,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                ndlp->nlp_flag &= ~NLP_UNREG_INP;
                                mempool_free(mbox, phba->mbox_mem_pool);
                                acc_plogi = 1;
+                               lpfc_nlp_put(ndlp);
                        }
                } else {
                        lpfc_printf_vlog(vport, KERN_INFO,
@@ -6097,12 +6149,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport)
        }
 }
 
+/*
+ * lpfc_notify_xport_npr - notifies xport of node disappearance
+ * @vport: Pointer to Virtual Port object.
+ *
+ * Transitions all ndlps to NPR state.  When lpfc_nlp_set_state
+ * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered
+ * and transport notified that the node is gone.
+ * Return Code:
+ *     none
+ */
+static void
+lpfc_notify_xport_npr(struct lpfc_vport *vport)
+{
+       struct lpfc_nodelist *ndlp, *next_ndlp;
+
+       list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+                                nlp_listp) {
+               lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+       }
+}
 void
 lpfc_cleanup_discovery_resources(struct lpfc_vport *vport)
 {
        lpfc_els_flush_rscn(vport);
        lpfc_els_flush_cmd(vport);
        lpfc_disc_flush_list(vport);
+       if (pci_channel_offline(vport->phba->pcidev))
+               lpfc_notify_xport_npr(vport);
 }
 
 /*****************************************************************************/
index eed6464bd880669807eb339e25e71532c75010dc..461d333b1b3a828e2b03de2aebafe9cebe86f0e4 100644 (file)
@@ -95,6 +95,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *);
 static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *);
+static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1642,7 +1643,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
 {
        spin_lock_irq(&phba->hbalock);
        if (phba->link_state == LPFC_HBA_ERROR &&
-           phba->hba_flag & HBA_PCI_ERR) {
+               test_bit(HBA_PCI_ERR, &phba->bit_flags)) {
                spin_unlock_irq(&phba->hbalock);
                return;
        }
@@ -1985,6 +1986,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
        if (pci_channel_offline(phba->pcidev)) {
                lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
                                "3166 pci channel is offline\n");
+               lpfc_sli_flush_io_rings(phba);
                return;
        }
 
@@ -2973,6 +2975,22 @@ lpfc_cleanup(struct lpfc_vport *vport)
                                        NLP_EVT_DEVICE_RM);
        }
 
+       /* This is a special case flush to return all
+        * IOs before entering this loop. There are
+        * two points in the code where a flush is
+        * avoided if the FC_UNLOADING flag is set.
+        * one is in the multipool destroy,
+        * (this prevents a crash) and the other is
+        * in the nvme abort handler, ( also prevents
+        * a crash). Both of these exceptions are
+        * cases where the slot is still accessible.
+        * The flush here is only when the pci slot
+        * is offline.
+        */
+       if (vport->load_flag & FC_UNLOADING &&
+           pci_channel_offline(phba->pcidev))
+               lpfc_sli_flush_io_rings(vport->phba);
+
        /* At this point, ALL ndlp's should be gone
         * because of the previous NLP_EVT_DEVICE_RM.
         * Lets wait for this to happen, if needed.
@@ -2985,7 +3003,7 @@ lpfc_cleanup(struct lpfc_vport *vport)
                        list_for_each_entry_safe(ndlp, next_ndlp,
                                                &vport->fc_nodes, nlp_listp) {
                                lpfc_printf_vlog(ndlp->vport, KERN_ERR,
-                                                LOG_TRACE_EVENT,
+                                                LOG_DISCOVERY,
                                                 "0282 did:x%x ndlp:x%px "
                                                 "refcnt:%d xflags x%x nflag x%x\n",
                                                 ndlp->nlp_DID, (void *)ndlp,
@@ -3682,7 +3700,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        struct lpfc_vport **vports;
        struct Scsi_Host *shost;
        int i;
-       int offline = 0;
+       int offline;
+       bool hba_pci_err;
 
        if (vport->fc_flag & FC_OFFLINE_MODE)
                return;
@@ -3692,6 +3711,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        lpfc_linkdown(phba);
 
        offline =  pci_channel_offline(phba->pcidev);
+       hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
 
        /* Issue an unreg_login to all nodes on all vports */
        vports = lpfc_create_vport_work_array(phba);
@@ -3715,11 +3735,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
                                ndlp->nlp_flag &= ~NLP_NPR_ADISC;
                                spin_unlock_irq(&ndlp->lock);
 
-                               if (offline) {
+                               if (offline || hba_pci_err) {
                                        spin_lock_irq(&ndlp->lock);
                                        ndlp->nlp_flag &= ~(NLP_UNREG_INP |
                                                            NLP_RPI_REGISTERED);
                                        spin_unlock_irq(&ndlp->lock);
+                                       if (phba->sli_rev == LPFC_SLI_REV4)
+                                               lpfc_sli_rpi_release(vports[i],
+                                                                    ndlp);
                                } else {
                                        lpfc_unreg_rpi(vports[i], ndlp);
                                }
@@ -13354,8 +13377,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
        /* Abort all iocbs associated with the hba */
        lpfc_sli_hba_iocb_abort(phba);
 
-       /* Wait for completion of device XRI exchange busy */
-       lpfc_sli4_xri_exchange_busy_wait(phba);
+       if (!pci_channel_offline(phba->pcidev))
+               /* Wait for completion of device XRI exchange busy */
+               lpfc_sli4_xri_exchange_busy_wait(phba);
 
        /* per-phba callback de-registration for hotplug event */
        if (phba->pport)
@@ -13374,15 +13398,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
        /* Disable FW logging to host memory */
        lpfc_ras_stop_fwlog(phba);
 
-       /* Unset the queues shared with the hardware then release all
-        * allocated resources.
-        */
-       lpfc_sli4_queue_unset(phba);
-       lpfc_sli4_queue_destroy(phba);
-
        /* Reset SLI4 HBA FCoE function */
        lpfc_pci_function_reset(phba);
 
+       /* release all queue allocated resources. */
+       lpfc_sli4_queue_destroy(phba);
+
        /* Free RAS DMA memory */
        if (phba->ras_fwlog.ras_enabled)
                lpfc_sli4_ras_dma_free(phba);
@@ -14262,6 +14283,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba)
                        "2711 PCI channel permanent disable for failure\n");
        /* Block all SCSI devices' I/Os on the host */
        lpfc_scsi_dev_block(phba);
+       lpfc_sli4_prep_dev_for_reset(phba);
 
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
@@ -15057,24 +15079,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
 {
-       lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-                       "2826 PCI channel disable preparing for reset\n");
+       int offline =  pci_channel_offline(phba->pcidev);
+
+       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                       "2826 PCI channel disable preparing for reset offline"
+                       " %d\n", offline);
 
        /* Block any management I/Os to the device */
        lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT);
 
-       /* Block all SCSI devices' I/Os on the host */
-       lpfc_scsi_dev_block(phba);
 
+       /* HBA_PCI_ERR was set in io_error_detect */
+       lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
        /* Flush all driver's outstanding I/Os as we are to reset */
        lpfc_sli_flush_io_rings(phba);
+       lpfc_offline(phba);
 
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
 
+       lpfc_sli4_queue_destroy(phba);
        /* Disable interrupt and pci device */
        lpfc_sli4_disable_intr(phba);
-       lpfc_sli4_queue_destroy(phba);
        pci_disable_device(phba->pcidev);
 }
 
@@ -15123,6 +15149,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
 {
        struct Scsi_Host *shost = pci_get_drvdata(pdev);
        struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+       bool hba_pci_err;
 
        switch (state) {
        case pci_channel_io_normal:
@@ -15130,17 +15157,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
                lpfc_sli4_prep_dev_for_recover(phba);
                return PCI_ERS_RESULT_CAN_RECOVER;
        case pci_channel_io_frozen:
-               phba->hba_flag |= HBA_PCI_ERR;
+               hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
                /* Fatal error, prepare for slot reset */
-               lpfc_sli4_prep_dev_for_reset(phba);
+               if (!hba_pci_err)
+                       lpfc_sli4_prep_dev_for_reset(phba);
+               else
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "2832  Already handling PCI error "
+                                       "state: x%x\n", state);
                return PCI_ERS_RESULT_NEED_RESET;
        case pci_channel_io_perm_failure:
-               phba->hba_flag |= HBA_PCI_ERR;
+               set_bit(HBA_PCI_ERR, &phba->bit_flags);
                /* Permanent failure, prepare for device down */
                lpfc_sli4_prep_dev_for_perm_failure(phba);
                return PCI_ERS_RESULT_DISCONNECT;
        default:
-               phba->hba_flag |= HBA_PCI_ERR;
+               hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
+               if (!hba_pci_err)
+                       lpfc_sli4_prep_dev_for_reset(phba);
                /* Unknown state, prepare and request slot reset */
                lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
                                "2825 Unknown PCI error state: x%x\n", state);
@@ -15174,17 +15208,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
        struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
        struct lpfc_sli *psli = &phba->sli;
        uint32_t intr_mode;
+       bool hba_pci_err;
 
        dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
        if (pci_enable_device_mem(pdev)) {
                printk(KERN_ERR "lpfc: Cannot re-enable "
-                       "PCI device after reset.\n");
+                      "PCI device after reset.\n");
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
        pci_restore_state(pdev);
 
-       phba->hba_flag &= ~HBA_PCI_ERR;
+       hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags);
+       if (!hba_pci_err)
+               dev_info(&pdev->dev,
+                        "hba_pci_err was not set, recovering slot reset.\n");
        /*
         * As the new kernel behavior of pci_restore_state() API call clears
         * device saved_state flag, need to save the restored state again.
@@ -15198,6 +15236,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
        psli->sli_flag &= ~LPFC_SLI_ACTIVE;
        spin_unlock_irq(&phba->hbalock);
 
+       /* Init cpu_map array */
+       lpfc_cpu_map_array_init(phba);
        /* Configure and enable interrupt */
        intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
        if (intr_mode == LPFC_INTR_ERROR) {
@@ -15239,8 +15279,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev)
         */
        if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) {
                /* Perform device reset */
-               lpfc_offline_prep(phba, LPFC_MBX_WAIT);
-               lpfc_offline(phba);
                lpfc_sli_brdrestart(phba);
                /* Bring the device back online */
                lpfc_online(phba);
index 1213a299f9aae96efd3404cd0c4554b3dd236d6f..8d26f207ebd22724838e9c90aed8a55b0e5d04ce 100644 (file)
@@ -93,6 +93,11 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 
        lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
        vport = lport->vport;
+
+       if (!vport || vport->load_flag & FC_UNLOADING ||
+           vport->phba->hba_flag & HBA_IOQ_FLUSH)
+               return -ENODEV;
+
        qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL);
        if (qhandle == NULL)
                return -ENOMEM;
@@ -267,7 +272,8 @@ lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
                return -EINVAL;
 
        remoteport = lpfc_rport->remoteport;
-       if (!vport->localport)
+       if (!vport->localport ||
+           vport->phba->hba_flag & HBA_IOQ_FLUSH)
                return -EINVAL;
 
        lport = vport->localport->private;
@@ -559,6 +565,8 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                                 ndlp->nlp_DID, ntype, nstate);
                return -ENODEV;
        }
+       if (vport->phba->hba_flag & HBA_IOQ_FLUSH)
+               return -ENODEV;
 
        if (!vport->phba->sli4_hba.nvmels_wq)
                return -ENOMEM;
@@ -662,7 +670,8 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
                return -EINVAL;
 
        vport = lport->vport;
-       if (vport->load_flag & FC_UNLOADING)
+       if (vport->load_flag & FC_UNLOADING ||
+           vport->phba->hba_flag & HBA_IOQ_FLUSH)
                return -ENODEV;
 
        atomic_inc(&lport->fc4NvmeLsRequests);
@@ -1516,7 +1525,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 
        phba = vport->phba;
 
-       if (unlikely(vport->load_flag & FC_UNLOADING)) {
+       if ((unlikely(vport->load_flag & FC_UNLOADING)) ||
+           phba->hba_flag & HBA_IOQ_FLUSH) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
                                 "6124 Fail IO, Driver unload\n");
                atomic_inc(&lport->xmt_fcp_err);
@@ -2169,8 +2179,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                        abts_nvme = 0;
                        for (i = 0; i < phba->cfg_hdw_queue; i++) {
                                qp = &phba->sli4_hba.hdwq[i];
-                               if (!vport || !vport->localport ||
-                                   !qp || !qp->io_wq)
+                               if (!vport->localport || !qp || !qp->io_wq)
                                        return;
 
                                pring = qp->io_wq->pring;
@@ -2180,8 +2189,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                                abts_scsi += qp->abts_scsi_io_bufs;
                                abts_nvme += qp->abts_nvme_io_bufs;
                        }
-                       if (!vport || !vport->localport ||
-                           vport->phba->hba_flag & HBA_PCI_ERR)
+                       if (!vport->localport ||
+                           test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
+                           vport->load_flag & FC_UNLOADING)
                                return;
 
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -2541,8 +2551,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * return values is ignored.  The upcall is a courtesy to the
                 * transport.
                 */
-               if (vport->load_flag & FC_UNLOADING ||
-                   unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
+               if (vport->load_flag & FC_UNLOADING)
                        (void)nvme_fc_set_remoteport_devloss(remoteport, 0);
 
                ret = nvme_fc_unregister_remoteport(remoteport);
index 3c132604fd9136e3269717e4c43a572fd873b9df..f6b83853f7eea122b8ac87085b3c486b3981dcab 100644 (file)
@@ -5528,7 +5528,9 @@ static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
 {
        struct bio *bio = scsi_cmd_to_rq(cmd)->bio;
 
-       return bio ? blkcg_get_fc_appid(bio) : NULL;
+       if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !bio)
+               return NULL;
+       return blkcg_get_fc_appid(bio);
 }
 
 /**
@@ -5929,13 +5931,15 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
        }
 
        lpfc_cmd->waitq = &waitq;
-       if (phba->sli_rev == LPFC_SLI_REV4)
+       if (phba->sli_rev == LPFC_SLI_REV4) {
                spin_unlock(&pring_s4->ring_lock);
-       else
+               ret_val = lpfc_sli4_issue_abort_iotag(phba, iocb,
+                                                     lpfc_sli_abort_fcp_cmpl);
+       } else {
                pring = &phba->sli.sli3_ring[LPFC_FCP_RING];
-
-       ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb,
-                                            lpfc_sli_abort_fcp_cmpl);
+               ret_val = lpfc_sli_issue_abort_iotag(phba, pring, iocb,
+                                                    lpfc_sli_abort_fcp_cmpl);
+       }
 
        /* Make sure HBA is alive */
        lpfc_issue_hb_tmo(phba);
index 20d40957a3853d9868e1c25038bcea4997b9ffc5..6adaf79e67cc034c9d4e833a7816125beaad2bdb 100644 (file)
@@ -2828,6 +2828,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        ndlp->nlp_flag &= ~NLP_UNREG_INP;
 }
 
+void
+lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+       __lpfc_sli_rpi_release(vport, ndlp);
+}
+
 /**
  * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler
  * @phba: Pointer to HBA context object.
@@ -3715,7 +3721,15 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        unsigned long iflag;
        u32 ulp_command, ulp_status, ulp_word4, ulp_context, iotag;
 
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               spin_lock_irqsave(&pring->ring_lock, iflag);
+       else
+               spin_lock_irqsave(&phba->hbalock, iflag);
        cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq);
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               spin_unlock_irqrestore(&pring->ring_lock, iflag);
+       else
+               spin_unlock_irqrestore(&phba->hbalock, iflag);
 
        ulp_command = get_job_cmnd(phba, saveq);
        ulp_status = get_job_ulpstatus(phba, saveq);
@@ -4052,10 +4066,8 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
                                break;
                        }
 
-                       spin_unlock_irqrestore(&phba->hbalock, iflag);
                        cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring,
                                                         &rspiocbq);
-                       spin_lock_irqsave(&phba->hbalock, iflag);
                        if (unlikely(!cmdiocbq))
                                break;
                        if (cmdiocbq->cmd_flag & LPFC_DRIVER_ABORTED)
@@ -4536,42 +4548,62 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba,
 void
 lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 {
-       LIST_HEAD(completions);
+       LIST_HEAD(tx_completions);
+       LIST_HEAD(txcmplq_completions);
        struct lpfc_iocbq *iocb, *next_iocb;
+       int offline;
 
        if (pring->ringno == LPFC_ELS_RING) {
                lpfc_fabric_abort_hba(phba);
        }
+       offline = pci_channel_offline(phba->pcidev);
 
        /* Error everything on txq and txcmplq
         * First do the txq.
         */
        if (phba->sli_rev >= LPFC_SLI_REV4) {
                spin_lock_irq(&pring->ring_lock);
-               list_splice_init(&pring->txq, &completions);
+               list_splice_init(&pring->txq, &tx_completions);
                pring->txq_cnt = 0;
-               spin_unlock_irq(&pring->ring_lock);
 
-               spin_lock_irq(&phba->hbalock);
-               /* Next issue ABTS for everything on the txcmplq */
-               list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-                       lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL);
-               spin_unlock_irq(&phba->hbalock);
+               if (offline) {
+                       list_splice_init(&pring->txcmplq,
+                                        &txcmplq_completions);
+               } else {
+                       /* Next issue ABTS for everything on the txcmplq */
+                       list_for_each_entry_safe(iocb, next_iocb,
+                                                &pring->txcmplq, list)
+                               lpfc_sli_issue_abort_iotag(phba, pring,
+                                                          iocb, NULL);
+               }
+               spin_unlock_irq(&pring->ring_lock);
        } else {
                spin_lock_irq(&phba->hbalock);
-               list_splice_init(&pring->txq, &completions);
+               list_splice_init(&pring->txq, &tx_completions);
                pring->txq_cnt = 0;
 
-               /* Next issue ABTS for everything on the txcmplq */
-               list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
-                       lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL);
+               if (offline) {
+                       list_splice_init(&pring->txcmplq, &txcmplq_completions);
+               } else {
+                       /* Next issue ABTS for everything on the txcmplq */
+                       list_for_each_entry_safe(iocb, next_iocb,
+                                                &pring->txcmplq, list)
+                               lpfc_sli_issue_abort_iotag(phba, pring,
+                                                          iocb, NULL);
+               }
                spin_unlock_irq(&phba->hbalock);
        }
-       /* Make sure HBA is alive */
-       lpfc_issue_hb_tmo(phba);
 
+       if (offline) {
+               /* Cancel all the IOCBs from the completions list */
+               lpfc_sli_cancel_iocbs(phba, &txcmplq_completions,
+                                     IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
+       } else {
+               /* Make sure HBA is alive */
+               lpfc_issue_hb_tmo(phba);
+       }
        /* Cancel all the IOCBs from the completions list */
-       lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
+       lpfc_sli_cancel_iocbs(phba, &tx_completions, IOSTAT_LOCAL_REJECT,
                              IOERR_SLI_ABORTED);
 }
 
@@ -4624,11 +4656,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
        struct lpfc_iocbq *piocb, *next_iocb;
 
        spin_lock_irq(&phba->hbalock);
-       if (phba->hba_flag & HBA_IOQ_FLUSH ||
-           !phba->sli4_hba.hdwq) {
-               spin_unlock_irq(&phba->hbalock);
-               return;
-       }
        /* Indicate the I/O queues are flushed */
        phba->hba_flag |= HBA_IOQ_FLUSH;
        spin_unlock_irq(&phba->hbalock);
@@ -10693,10 +10720,10 @@ __lpfc_sli_prep_gen_req_s4(struct lpfc_iocbq *cmdiocbq, struct lpfc_dmabuf *bmp,
 
        /* Words 0 - 2 */
        bde = (struct ulp_bde64_le *)&cmdwqe->generic.bde;
-       bde->addr_low = cpu_to_le32(putPaddrLow(bmp->phys));
-       bde->addr_high = cpu_to_le32(putPaddrHigh(bmp->phys));
+       bde->addr_low = bpl->addr_low;
+       bde->addr_high = bpl->addr_high;
        bde->type_size = cpu_to_le32(xmit_len);
-       bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BLP_64);
+       bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BDE_64);
 
        /* Word 3 */
        cmdwqe->gen_req.request_payload_len = xmit_len;
@@ -10997,6 +11024,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
        unsigned long iflags;
        int rc;
 
+       /* If the PCI channel is in offline state, do not post iocbs. */
+       if (unlikely(pci_channel_offline(phba->pcidev)))
+               return IOCB_ERROR;
+
        if (phba->sli_rev == LPFC_SLI_REV4) {
                lpfc_sli_prep_wqe(phba, piocb);
 
index e52f37e5d8965ef19abfa3a15d0dc57c02e8c40e..a4d3259b8c52ad55de71ff3b1e93932a573c6820 100644 (file)
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "14.2.0.0"
+#define LPFC_DRIVER_VERSION "14.2.0.1"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
index 611871ef15b5d0bcc0a7c010d1f299a9c739b0fe..4919ea54b8277b9c847d40107b501a759fa02037 100644 (file)
@@ -2560,6 +2560,9 @@ struct megasas_instance_template {
 #define MEGASAS_IS_LOGICAL(sdev)                                       \
        ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1)
 
+#define MEGASAS_IS_LUN_VALID(sdev)                                     \
+       (((sdev)->lun == 0) ? 1 : 0)
+
 #define MEGASAS_DEV_INDEX(scp)                                         \
        (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) +   \
        scp->device->id)
index 8bf72dbc33b73ce651ec092b23e59308ce55e233..db6793608447a77ffd878b26a75934aac6ca8469 100644 (file)
@@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev)
                        goto scan_target;
                }
                return -ENXIO;
+       } else if (!MEGASAS_IS_LUN_VALID(sdev)) {
+               sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
+               return -ENXIO;
        }
 
 scan_target:
@@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev)
        instance = megasas_lookup_instance(sdev->host->host_no);
 
        if (MEGASAS_IS_LOGICAL(sdev)) {
+               if (!MEGASAS_IS_LUN_VALID(sdev)) {
+                       sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
+                       return;
+               }
                ld_tgt_id = MEGASAS_TARGET_ID(sdev);
                instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED;
                if (megasas_dbg_lvl & LD_PD_DEBUG)
index b57f1803371eafd451bd7cdd597dee8f6158bb44..538d2c0cd971303865eeace130c3eb5d95af4168 100644 (file)
@@ -5716,13 +5716,12 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
 /**
  * mpt3sas_check_same_4gb_region - checks whether all reply queues in a set are
  *     having same upper 32bits in their base memory address.
- * @reply_pool_start_address: Base address of a reply queue set
+ * @start_address: Base address of a reply queue set
  * @pool_sz: Size of single Reply Descriptor Post Queues pool size
  *
  * Return: 1 if reply queues in a set have a same upper 32bits in their base
  * memory address, else 0.
  */
-
 static int
 mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz)
 {
index 0563078227de6c1265c72289e45b84d99424025c..a8dd14c91efdb2d55be89a647f553589fd84fcc4 100644 (file)
@@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t
                retry_count++;
                if (ioc->config_cmds.smid == smid)
                        mpt3sas_base_free_smid(ioc, smid);
-               if ((ioc->shost_recovery) || (ioc->config_cmds.status &
-                   MPT3_CMD_RESET) || ioc->pci_error_recovery)
+               if (ioc->config_cmds.status & MPT3_CMD_RESET)
                        goto retry_config;
-               issue_host_reset = 1;
+               if (ioc->shost_recovery || ioc->pci_error_recovery) {
+                       issue_host_reset = 0;
+                       r = -EFAULT;
+               } else
+                       issue_host_reset = 1;
                goto free_mem;
        }
 
index 00792767c620d707be9f8a64329b32877327778f..7e476f50935b8bd86a3fdecd167aae099239dd27 100644 (file)
@@ -11035,6 +11035,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
 {
        struct _sas_port *mpt3sas_port, *next;
        unsigned long flags;
+       int port_id;
 
        /* remove sibling ports attached to this expander */
        list_for_each_entry_safe(mpt3sas_port, next,
@@ -11055,6 +11056,8 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
                            mpt3sas_port->hba_port);
        }
 
+       port_id = sas_expander->port->port_id;
+
        mpt3sas_transport_port_remove(ioc, sas_expander->sas_address,
            sas_expander->sas_address_parent, sas_expander->port);
 
@@ -11062,7 +11065,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
            "expander_remove: handle(0x%04x), sas_addr(0x%016llx), port:%d\n",
            sas_expander->handle, (unsigned long long)
            sas_expander->sas_address,
-           sas_expander->port->port_id);
+           port_id);
 
        spin_lock_irqsave(&ioc->sas_node_lock, flags);
        list_del(&sas_expander->list);
index 7ac63eb5ccd385292bbda62f7f6ade9edaa44597..2fde496fff5f7409c9fbab5e0cad45a8f1af9903 100644 (file)
@@ -647,6 +647,7 @@ static struct pci_device_id mvs_pci_table[] = {
        { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 },
        { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 },
        { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 },
+       { PCI_VDEVICE(TTI, 0x2640), chip_6440 },
        { PCI_VDEVICE(TTI, 0x2710), chip_9480 },
        { PCI_VDEVICE(TTI, 0x2720), chip_9480 },
        { PCI_VDEVICE(TTI, 0x2721), chip_9480 },
index c4a838635893a6413b944a5423ba900742e8b56f..5d7dfefd6f6c9ee86613bec6b49b00f5d8962603 100644 (file)
@@ -192,10 +192,11 @@ struct sym53c500_data {
        int fast_pio;
 };
 
-static struct scsi_pointer *sym53c500_scsi_pointer(struct scsi_cmnd *cmd)
-{
-       return scsi_cmd_priv(cmd);
-}
+struct sym53c500_cmd_priv {
+       int status;
+       int message;
+       int phase;
+};
 
 enum Phase {
     idle,
@@ -356,7 +357,7 @@ SYM53C500_intr(int irq, void *dev_id)
        struct sym53c500_data *data =
            (struct sym53c500_data *)dev->hostdata;
        struct scsi_cmnd *curSC = data->current_SC;
-       struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(curSC);
+       struct sym53c500_cmd_priv *scp = scsi_cmd_priv(curSC);
        int fast_pio = data->fast_pio;
 
        spin_lock_irqsave(dev->host_lock, flags);
@@ -403,12 +404,11 @@ SYM53C500_intr(int irq, void *dev_id)
 
        if (int_reg & 0x20) {           /* Disconnect */
                DEB(printk("SYM53C500: disconnect intr received\n"));
-               if (scsi_pointer->phase != message_in) {        /* Unexpected disconnect */
+               if (scp->phase != message_in) { /* Unexpected disconnect */
                        curSC->result = DID_NO_CONNECT << 16;
                } else {        /* Command complete, return status and message */
-                       curSC->result = (scsi_pointer->Status & 0xff) |
-                               ((scsi_pointer->Message & 0xff) << 8) |
-                               (DID_OK << 16);
+                       curSC->result = (scp->status & 0xff) |
+                               ((scp->message & 0xff) << 8) | (DID_OK << 16);
                }
                goto idle_out;
        }
@@ -419,7 +419,7 @@ SYM53C500_intr(int irq, void *dev_id)
                        struct scatterlist *sg;
                        int i;
 
-                       scsi_pointer->phase = data_out;
+                       scp->phase = data_out;
                        VDEB(printk("SYM53C500: Data-Out phase\n"));
                        outb(FLUSH_FIFO, port_base + CMD_REG);
                        LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */
@@ -438,7 +438,7 @@ SYM53C500_intr(int irq, void *dev_id)
                        struct scatterlist *sg;
                        int i;
 
-                       scsi_pointer->phase = data_in;
+                       scp->phase = data_in;
                        VDEB(printk("SYM53C500: Data-In phase\n"));
                        outb(FLUSH_FIFO, port_base + CMD_REG);
                        LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */
@@ -453,12 +453,12 @@ SYM53C500_intr(int irq, void *dev_id)
                break;
 
        case 0x02:              /* COMMAND */
-               scsi_pointer->phase = command_ph;
+               scp->phase = command_ph;
                printk("SYM53C500: Warning: Unknown interrupt occurred in command phase!\n");
                break;
 
        case 0x03:              /* STATUS */
-               scsi_pointer->phase = status_ph;
+               scp->phase = status_ph;
                VDEB(printk("SYM53C500: Status phase\n"));
                outb(FLUSH_FIFO, port_base + CMD_REG);
                outb(INIT_CMD_COMPLETE, port_base + CMD_REG);
@@ -471,24 +471,22 @@ SYM53C500_intr(int irq, void *dev_id)
 
        case 0x06:              /* MESSAGE-OUT */
                DEB(printk("SYM53C500: Message-Out phase\n"));
-               scsi_pointer->phase = message_out;
+               scp->phase = message_out;
                outb(SET_ATN, port_base + CMD_REG);     /* Reject the message */
                outb(MSG_ACCEPT, port_base + CMD_REG);
                break;
 
        case 0x07:              /* MESSAGE-IN */
                VDEB(printk("SYM53C500: Message-In phase\n"));
-               scsi_pointer->phase = message_in;
+               scp->phase = message_in;
 
-               scsi_pointer->Status = inb(port_base + SCSI_FIFO);
-               scsi_pointer->Message = inb(port_base + SCSI_FIFO);
+               scp->status = inb(port_base + SCSI_FIFO);
+               scp->message = inb(port_base + SCSI_FIFO);
 
                VDEB(printk("SCSI FIFO size=%d\n", inb(port_base + FIFO_FLAGS) & 0x1f));
-               DEB(printk("Status = %02x  Message = %02x\n",
-                          scsi_pointer->Status, scsi_pointer->Message));
+               DEB(printk("Status = %02x  Message = %02x\n", scp->status, scp->message));
 
-               if (scsi_pointer->Message == SAVE_POINTERS ||
-                   scsi_pointer->Message == DISCONNECT) {
+               if (scp->message == SAVE_POINTERS || scp->message == DISCONNECT) {
                        outb(SET_ATN, port_base + CMD_REG);     /* Reject message */
                        DEB(printk("Discarding SAVE_POINTERS message\n"));
                }
@@ -500,7 +498,7 @@ out:
        return IRQ_HANDLED;
 
 idle_out:
-       scsi_pointer->phase = idle;
+       scp->phase = idle;
        scsi_done(curSC);
        goto out;
 }
@@ -548,7 +546,7 @@ SYM53C500_info(struct Scsi_Host *SChost)
 
 static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt)
 {
-       struct scsi_pointer *scsi_pointer = sym53c500_scsi_pointer(SCpnt);
+       struct sym53c500_cmd_priv *scp = scsi_cmd_priv(SCpnt);
        int i;
        int port_base = SCpnt->device->host->io_port;
        struct sym53c500_data *data =
@@ -565,9 +563,9 @@ static int SYM53C500_queue_lck(struct scsi_cmnd *SCpnt)
        VDEB(printk("\n"));
 
        data->current_SC = SCpnt;
-       scsi_pointer->phase = command_ph;
-       scsi_pointer->Status = 0;
-       scsi_pointer->Message = 0;
+       scp->phase = command_ph;
+       scp->status = 0;
+       scp->message = 0;
 
        /* We are locked here already by the mid layer */
        REG0(port_base);
@@ -682,7 +680,7 @@ static struct scsi_host_template sym53c500_driver_template = {
      .this_id                  = 7,
      .sg_tablesize             = 32,
      .shost_groups             = SYM53C500_shost_groups,
-     .cmd_size                 = sizeof(struct scsi_pointer),
+     .cmd_size                 = sizeof(struct sym53c500_cmd_priv),
 };
 
 static int SYM53C500_config_check(struct pcmcia_device *p_dev, void *priv_data)
index f90b707c190bfdd49000ef392e92b0cff49dbd30..01c5e8ff4cc5fbe41525fe64bcafa47a060e94ed 100644 (file)
@@ -766,6 +766,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
        pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity       = 0x01;
        pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt          = 0x01;
 
+       /* Enable higher IQs and OQs, 32 to 63, bit 16 */
+       if (pm8001_ha->max_q_num > 32)
+               pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |=
+                                                       1 << 16;
        /* Disable end to end CRC checking */
        pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16);
 
@@ -1027,6 +1031,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha)
        if (0x0000 != gst_len_mpistate)
                return -EBUSY;
 
+       /*
+        *  As per controller datasheet, after successful MPI
+        *  initialization minimum 500ms delay is required before
+        *  issuing commands.
+        */
+       msleep(500);
+
        return 0;
 }
 
@@ -1727,10 +1738,11 @@ static void
 pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec)
 {
 #ifdef PM8001_USE_MSIX
-       u32 mask;
-       mask = (u32)(1 << vec);
-
-       pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF));
+       if (vec < 32)
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec);
+       else
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U,
+                           1U << (vec - 32));
        return;
 #endif
        pm80xx_chip_intx_interrupt_enable(pm8001_ha);
@@ -1746,12 +1758,15 @@ static void
 pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec)
 {
 #ifdef PM8001_USE_MSIX
-       u32 mask;
-       if (vec == 0xFF)
-               mask = 0xFFFFFFFF;
+       if (vec == 0xFF) {
+               /* disable all vectors 0-31, 32-63 */
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF);
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF);
+       } else if (vec < 32)
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec);
        else
-               mask = (u32)(1 << vec);
-       pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF));
+               pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U,
+                           1U << (vec - 32));
        return;
 #endif
        pm80xx_chip_intx_interrupt_disable(pm8001_ha);
index 928532180d323a81643948c4d95a9e1f2eda02e0..fd674ed1febed1c549859301545da4be19c91717 100644 (file)
@@ -3181,124 +3181,6 @@ static int pmcraid_build_ioadl(
        return 0;
 }
 
-/**
- * pmcraid_free_sglist - Frees an allocated SG buffer list
- * @sglist: scatter/gather list pointer
- *
- * Free a DMA'able memory previously allocated with pmcraid_alloc_sglist
- *
- * Return value:
- *     none
- */
-static void pmcraid_free_sglist(struct pmcraid_sglist *sglist)
-{
-       sgl_free_order(sglist->scatterlist, sglist->order);
-       kfree(sglist);
-}
-
-/**
- * pmcraid_alloc_sglist - Allocates memory for a SG list
- * @buflen: buffer length
- *
- * Allocates a DMA'able buffer in chunks and assembles a scatter/gather
- * list.
- *
- * Return value
- *     pointer to sglist / NULL on failure
- */
-static struct pmcraid_sglist *pmcraid_alloc_sglist(int buflen)
-{
-       struct pmcraid_sglist *sglist;
-       int sg_size;
-       int order;
-
-       sg_size = buflen / (PMCRAID_MAX_IOADLS - 1);
-       order = (sg_size > 0) ? get_order(sg_size) : 0;
-
-       /* Allocate a scatter/gather list for the DMA */
-       sglist = kzalloc(sizeof(struct pmcraid_sglist), GFP_KERNEL);
-       if (sglist == NULL)
-               return NULL;
-
-       sglist->order = order;
-       sgl_alloc_order(buflen, order, false, GFP_KERNEL | __GFP_ZERO,
-                       &sglist->num_sg);
-
-       return sglist;
-}
-
-/**
- * pmcraid_copy_sglist - Copy user buffer to kernel buffer's SG list
- * @sglist: scatter/gather list pointer
- * @buffer: buffer pointer
- * @len: buffer length
- * @direction: data transfer direction
- *
- * Copy a user buffer into a buffer allocated by pmcraid_alloc_sglist
- *
- * Return value:
- * 0 on success / other on failure
- */
-static int pmcraid_copy_sglist(
-       struct pmcraid_sglist *sglist,
-       void __user *buffer,
-       u32 len,
-       int direction
-)
-{
-       struct scatterlist *sg;
-       void *kaddr;
-       int bsize_elem;
-       int i;
-       int rc = 0;
-
-       /* Determine the actual number of bytes per element */
-       bsize_elem = PAGE_SIZE * (1 << sglist->order);
-
-       sg = sglist->scatterlist;
-
-       for (i = 0; i < (len / bsize_elem); i++, sg = sg_next(sg), buffer += bsize_elem) {
-               struct page *page = sg_page(sg);
-
-               kaddr = kmap(page);
-               if (direction == DMA_TO_DEVICE)
-                       rc = copy_from_user(kaddr, buffer, bsize_elem);
-               else
-                       rc = copy_to_user(buffer, kaddr, bsize_elem);
-
-               kunmap(page);
-
-               if (rc) {
-                       pmcraid_err("failed to copy user data into sg list\n");
-                       return -EFAULT;
-               }
-
-               sg->length = bsize_elem;
-       }
-
-       if (len % bsize_elem) {
-               struct page *page = sg_page(sg);
-
-               kaddr = kmap(page);
-
-               if (direction == DMA_TO_DEVICE)
-                       rc = copy_from_user(kaddr, buffer, len % bsize_elem);
-               else
-                       rc = copy_to_user(buffer, kaddr, len % bsize_elem);
-
-               kunmap(page);
-
-               sg->length = len % bsize_elem;
-       }
-
-       if (rc) {
-               pmcraid_err("failed to copy user data into sg list\n");
-               rc = -EFAULT;
-       }
-
-       return rc;
-}
-
 /**
  * pmcraid_queuecommand_lck - Queue a mid-layer request
  * @scsi_cmd: scsi command struct
@@ -3454,365 +3336,6 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode)
        return rc;
 }
 
-
-/**
- * pmcraid_build_passthrough_ioadls - builds SG elements for passthrough
- * commands sent over IOCTL interface
- *
- * @cmd       : pointer to struct pmcraid_cmd
- * @buflen    : length of the request buffer
- * @direction : data transfer direction
- *
- * Return value
- *  0 on success, non-zero error code on failure
- */
-static int pmcraid_build_passthrough_ioadls(
-       struct pmcraid_cmd *cmd,
-       int buflen,
-       int direction
-)
-{
-       struct pmcraid_sglist *sglist = NULL;
-       struct scatterlist *sg = NULL;
-       struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb;
-       struct pmcraid_ioadl_desc *ioadl;
-       int i;
-
-       sglist = pmcraid_alloc_sglist(buflen);
-
-       if (!sglist) {
-               pmcraid_err("can't allocate memory for passthrough SGls\n");
-               return -ENOMEM;
-       }
-
-       sglist->num_dma_sg = dma_map_sg(&cmd->drv_inst->pdev->dev,
-                                       sglist->scatterlist,
-                                       sglist->num_sg, direction);
-
-       if (!sglist->num_dma_sg || sglist->num_dma_sg > PMCRAID_MAX_IOADLS) {
-               dev_err(&cmd->drv_inst->pdev->dev,
-                       "Failed to map passthrough buffer!\n");
-               pmcraid_free_sglist(sglist);
-               return -EIO;
-       }
-
-       cmd->sglist = sglist;
-       ioarcb->request_flags0 |= NO_LINK_DESCS;
-
-       ioadl = pmcraid_init_ioadls(cmd, sglist->num_dma_sg);
-
-       /* Initialize IOADL descriptor addresses */
-       for_each_sg(sglist->scatterlist, sg, sglist->num_dma_sg, i) {
-               ioadl[i].data_len = cpu_to_le32(sg_dma_len(sg));
-               ioadl[i].address = cpu_to_le64(sg_dma_address(sg));
-               ioadl[i].flags = 0;
-       }
-
-       /* setup the last descriptor */
-       ioadl[i - 1].flags = IOADL_FLAGS_LAST_DESC;
-
-       return 0;
-}
-
-
-/**
- * pmcraid_release_passthrough_ioadls - release passthrough ioadls
- *
- * @cmd: pointer to struct pmcraid_cmd for which ioadls were allocated
- * @buflen: size of the request buffer
- * @direction: data transfer direction
- *
- * Return value
- *  0 on success, non-zero error code on failure
- */
-static void pmcraid_release_passthrough_ioadls(
-       struct pmcraid_cmd *cmd,
-       int buflen,
-       int direction
-)
-{
-       struct pmcraid_sglist *sglist = cmd->sglist;
-
-       if (buflen > 0) {
-               dma_unmap_sg(&cmd->drv_inst->pdev->dev,
-                            sglist->scatterlist,
-                            sglist->num_sg,
-                            direction);
-               pmcraid_free_sglist(sglist);
-               cmd->sglist = NULL;
-       }
-}
-
-/**
- * pmcraid_ioctl_passthrough - handling passthrough IOCTL commands
- *
- * @pinstance: pointer to adapter instance structure
- * @ioctl_cmd: ioctl code
- * @buflen: unused
- * @arg: pointer to pmcraid_passthrough_buffer user buffer
- *
- * Return value
- *  0 on success, non-zero error code on failure
- */
-static long pmcraid_ioctl_passthrough(
-       struct pmcraid_instance *pinstance,
-       unsigned int ioctl_cmd,
-       unsigned int buflen,
-       void __user *arg
-)
-{
-       struct pmcraid_passthrough_ioctl_buffer *buffer;
-       struct pmcraid_ioarcb *ioarcb;
-       struct pmcraid_cmd *cmd;
-       struct pmcraid_cmd *cancel_cmd;
-       void __user *request_buffer;
-       unsigned long request_offset;
-       unsigned long lock_flags;
-       void __user *ioasa;
-       u32 ioasc;
-       int request_size;
-       int buffer_size;
-       u8 direction;
-       int rc = 0;
-
-       /* If IOA reset is in progress, wait 10 secs for reset to complete */
-       if (pinstance->ioa_reset_in_progress) {
-               rc = wait_event_interruptible_timeout(
-                               pinstance->reset_wait_q,
-                               !pinstance->ioa_reset_in_progress,
-                               msecs_to_jiffies(10000));
-
-               if (!rc)
-                       return -ETIMEDOUT;
-               else if (rc < 0)
-                       return -ERESTARTSYS;
-       }
-
-       /* If adapter is not in operational state, return error */
-       if (pinstance->ioa_state != IOA_STATE_OPERATIONAL) {
-               pmcraid_err("IOA is not operational\n");
-               return -ENOTTY;
-       }
-
-       buffer_size = sizeof(struct pmcraid_passthrough_ioctl_buffer);
-       buffer = kmalloc(buffer_size, GFP_KERNEL);
-
-       if (!buffer) {
-               pmcraid_err("no memory for passthrough buffer\n");
-               return -ENOMEM;
-       }
-
-       request_offset =
-           offsetof(struct pmcraid_passthrough_ioctl_buffer, request_buffer);
-
-       request_buffer = arg + request_offset;
-
-       rc = copy_from_user(buffer, arg,
-                            sizeof(struct pmcraid_passthrough_ioctl_buffer));
-
-       ioasa = arg + offsetof(struct pmcraid_passthrough_ioctl_buffer, ioasa);
-
-       if (rc) {
-               pmcraid_err("ioctl: can't copy passthrough buffer\n");
-               rc = -EFAULT;
-               goto out_free_buffer;
-       }
-
-       request_size = le32_to_cpu(buffer->ioarcb.data_transfer_length);
-
-       if (buffer->ioarcb.request_flags0 & TRANSFER_DIR_WRITE) {
-               direction = DMA_TO_DEVICE;
-       } else {
-               direction = DMA_FROM_DEVICE;
-       }
-
-       if (request_size < 0) {
-               rc = -EINVAL;
-               goto out_free_buffer;
-       }
-
-       /* check if we have any additional command parameters */
-       if (le16_to_cpu(buffer->ioarcb.add_cmd_param_length)
-            > PMCRAID_ADD_CMD_PARAM_LEN) {
-               rc = -EINVAL;
-               goto out_free_buffer;
-       }
-
-       cmd = pmcraid_get_free_cmd(pinstance);
-
-       if (!cmd) {
-               pmcraid_err("free command block is not available\n");
-               rc = -ENOMEM;
-               goto out_free_buffer;
-       }
-
-       cmd->scsi_cmd = NULL;
-       ioarcb = &(cmd->ioa_cb->ioarcb);
-
-       /* Copy the user-provided IOARCB stuff field by field */
-       ioarcb->resource_handle = buffer->ioarcb.resource_handle;
-       ioarcb->data_transfer_length = buffer->ioarcb.data_transfer_length;
-       ioarcb->cmd_timeout = buffer->ioarcb.cmd_timeout;
-       ioarcb->request_type = buffer->ioarcb.request_type;
-       ioarcb->request_flags0 = buffer->ioarcb.request_flags0;
-       ioarcb->request_flags1 = buffer->ioarcb.request_flags1;
-       memcpy(ioarcb->cdb, buffer->ioarcb.cdb, PMCRAID_MAX_CDB_LEN);
-
-       if (buffer->ioarcb.add_cmd_param_length) {
-               ioarcb->add_cmd_param_length =
-                       buffer->ioarcb.add_cmd_param_length;
-               ioarcb->add_cmd_param_offset =
-                       buffer->ioarcb.add_cmd_param_offset;
-               memcpy(ioarcb->add_data.u.add_cmd_params,
-                       buffer->ioarcb.add_data.u.add_cmd_params,
-                       le16_to_cpu(buffer->ioarcb.add_cmd_param_length));
-       }
-
-       /* set hrrq number where the IOA should respond to. Note that all cmds
-        * generated internally uses hrrq_id 0, exception to this is the cmd
-        * block of scsi_cmd which is re-used (e.g. cancel/abort), which uses
-        * hrrq_id assigned here in queuecommand
-        */
-       ioarcb->hrrq_id = atomic_add_return(1, &(pinstance->last_message_id)) %
-                         pinstance->num_hrrq;
-
-       if (request_size) {
-               rc = pmcraid_build_passthrough_ioadls(cmd,
-                                                     request_size,
-                                                     direction);
-               if (rc) {
-                       pmcraid_err("couldn't build passthrough ioadls\n");
-                       goto out_free_cmd;
-               }
-       }
-
-       /* If data is being written into the device, copy the data from user
-        * buffers
-        */
-       if (direction == DMA_TO_DEVICE && request_size > 0) {
-               rc = pmcraid_copy_sglist(cmd->sglist,
-                                        request_buffer,
-                                        request_size,
-                                        direction);
-               if (rc) {
-                       pmcraid_err("failed to copy user buffer\n");
-                       goto out_free_sglist;
-               }
-       }
-
-       /* passthrough ioctl is a blocking command so, put the user to sleep
-        * until timeout. Note that a timeout value of 0 means, do timeout.
-        */
-       cmd->cmd_done = pmcraid_internal_done;
-       init_completion(&cmd->wait_for_completion);
-       cmd->completion_req = 1;
-
-       pmcraid_info("command(%d) (CDB[0] = %x) for %x\n",
-                    le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2,
-                    cmd->ioa_cb->ioarcb.cdb[0],
-                    le32_to_cpu(cmd->ioa_cb->ioarcb.resource_handle));
-
-       spin_lock_irqsave(pinstance->host->host_lock, lock_flags);
-       _pmcraid_fire_command(cmd);
-       spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags);
-
-       /* NOTE ! Remove the below line once abort_task is implemented
-        * in firmware. This line disables ioctl command timeout handling logic
-        * similar to IO command timeout handling, making ioctl commands to wait
-        * until the command completion regardless of timeout value specified in
-        * ioarcb
-        */
-       buffer->ioarcb.cmd_timeout = 0;
-
-       /* If command timeout is specified put caller to wait till that time,
-        * otherwise it would be blocking wait. If command gets timed out, it
-        * will be aborted.
-        */
-       if (buffer->ioarcb.cmd_timeout == 0) {
-               wait_for_completion(&cmd->wait_for_completion);
-       } else if (!wait_for_completion_timeout(
-                       &cmd->wait_for_completion,
-                       msecs_to_jiffies(le16_to_cpu(buffer->ioarcb.cmd_timeout) * 1000))) {
-
-               pmcraid_info("aborting cmd %d (CDB[0] = %x) due to timeout\n",
-                       le32_to_cpu(cmd->ioa_cb->ioarcb.response_handle) >> 2,
-                       cmd->ioa_cb->ioarcb.cdb[0]);
-
-               spin_lock_irqsave(pinstance->host->host_lock, lock_flags);
-               cancel_cmd = pmcraid_abort_cmd(cmd);
-               spin_unlock_irqrestore(pinstance->host->host_lock, lock_flags);
-
-               if (cancel_cmd) {
-                       wait_for_completion(&cancel_cmd->wait_for_completion);
-                       ioasc = le32_to_cpu(cancel_cmd->ioa_cb->ioasa.ioasc);
-                       pmcraid_return_cmd(cancel_cmd);
-
-                       /* if abort task couldn't find the command i.e it got
-                        * completed prior to aborting, return good completion.
-                        * if command got aborted successfully or there was IOA
-                        * reset due to abort task itself getting timedout then
-                        * return -ETIMEDOUT
-                        */
-                       if (ioasc == PMCRAID_IOASC_IOA_WAS_RESET ||
-                           PMCRAID_IOASC_SENSE_KEY(ioasc) == 0x00) {
-                               if (ioasc != PMCRAID_IOASC_GC_IOARCB_NOTFOUND)
-                                       rc = -ETIMEDOUT;
-                               goto out_handle_response;
-                       }
-               }
-
-               /* no command block for abort task or abort task failed to abort
-                * the IOARCB, then wait for 150 more seconds and initiate reset
-                * sequence after timeout
-                */
-               if (!wait_for_completion_timeout(
-                       &cmd->wait_for_completion,
-                       msecs_to_jiffies(150 * 1000))) {
-                       pmcraid_reset_bringup(cmd->drv_inst);
-                       rc = -ETIMEDOUT;
-               }
-       }
-
-out_handle_response:
-       /* copy entire IOASA buffer and return IOCTL success.
-        * If copying IOASA to user-buffer fails, return
-        * EFAULT
-        */
-       if (copy_to_user(ioasa, &cmd->ioa_cb->ioasa,
-               sizeof(struct pmcraid_ioasa))) {
-               pmcraid_err("failed to copy ioasa buffer to user\n");
-               rc = -EFAULT;
-       }
-
-       /* If the data transfer was from device, copy the data onto user
-        * buffers
-        */
-       else if (direction == DMA_FROM_DEVICE && request_size > 0) {
-               rc = pmcraid_copy_sglist(cmd->sglist,
-                                        request_buffer,
-                                        request_size,
-                                        direction);
-               if (rc) {
-                       pmcraid_err("failed to copy user buffer\n");
-                       rc = -EFAULT;
-               }
-       }
-
-out_free_sglist:
-       pmcraid_release_passthrough_ioadls(cmd, request_size, direction);
-
-out_free_cmd:
-       pmcraid_return_cmd(cmd);
-
-out_free_buffer:
-       kfree(buffer);
-
-       return rc;
-}
-
-
-
-
 /**
  * pmcraid_ioctl_driver - ioctl handler for commands handled by driver itself
  *
@@ -3922,20 +3445,6 @@ static long pmcraid_chr_ioctl(
 
        switch (_IOC_TYPE(cmd)) {
 
-       case PMCRAID_PASSTHROUGH_IOCTL:
-               /* If ioctl code is to download microcode, we need to block
-                * mid-layer requests.
-                */
-               if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE)
-                       scsi_block_requests(pinstance->host);
-
-               retval = pmcraid_ioctl_passthrough(pinstance, cmd,
-                                                  hdr->buffer_length, argp);
-
-               if (cmd == PMCRAID_IOCTL_DOWNLOAD_MICROCODE)
-                       scsi_unblock_requests(pinstance->host);
-               break;
-
        case PMCRAID_DRIVER_IOCTL:
                arg += sizeof(struct pmcraid_ioctl_header);
                retval = pmcraid_ioctl_driver(pinstance, cmd,
index bbb75318f1e7fb6067d3b19ab4dbe08cd425d6a8..9f59930e8b4fdd1052f80c3d8fc5f7426fb1435c 100644 (file)
@@ -1022,41 +1022,16 @@ struct pmcraid_ioctl_header {
 
 #define PMCRAID_IOCTL_SIGNATURE      "PMCRAID"
 
-/*
- * pmcraid_passthrough_ioctl_buffer - structure given as argument to
- * passthrough(or firmware handled) IOCTL commands. Note that ioarcb requires
- * 32-byte alignment so, it is necessary to pack this structure to avoid any
- * holes between ioctl_header and passthrough buffer
- *
- * .ioactl_header : ioctl header
- * .ioarcb        : filled-up ioarcb buffer, driver always reads this buffer
- * .ioasa         : buffer for ioasa, driver fills this with IOASA from firmware
- * .request_buffer: The I/O buffer (flat), driver reads/writes to this based on
- *                  the transfer directions passed in ioarcb.flags0. Contents
- *                  of this buffer are valid only when ioarcb.data_transfer_len
- *                  is not zero.
- */
-struct pmcraid_passthrough_ioctl_buffer {
-       struct pmcraid_ioctl_header ioctl_header;
-       struct pmcraid_ioarcb ioarcb;
-       struct pmcraid_ioasa  ioasa;
-       u8  request_buffer[];
-} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
-
 /*
  * keys to differentiate between driver handled IOCTLs and passthrough
  * IOCTLs passed to IOA. driver determines the ioctl type using macro
  * _IOC_TYPE
  */
 #define PMCRAID_DRIVER_IOCTL         'D'
-#define PMCRAID_PASSTHROUGH_IOCTL    'F'
 
 #define DRV_IOCTL(n, size) \
        _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_DRIVER_IOCTL, (n), (size))
 
-#define FMW_IOCTL(n, size) \
-       _IOC(_IOC_READ|_IOC_WRITE, PMCRAID_PASSTHROUGH_IOCTL,  (n), (size))
-
 /*
  * _ARGSIZE: macro that gives size of the argument type passed to an IOCTL cmd.
  * This is to facilitate applications avoiding un-necessary memory allocations.
@@ -1069,12 +1044,4 @@ struct pmcraid_passthrough_ioctl_buffer {
 #define PMCRAID_IOCTL_RESET_ADAPTER          \
        DRV_IOCTL(5, sizeof(struct pmcraid_ioctl_header))
 
-/* passthrough/firmware handled commands */
-#define PMCRAID_IOCTL_PASSTHROUGH_COMMAND         \
-       FMW_IOCTL(1, sizeof(struct pmcraid_passthrough_ioctl_buffer))
-
-#define PMCRAID_IOCTL_DOWNLOAD_MICROCODE     \
-       FMW_IOCTL(2, sizeof(struct pmcraid_passthrough_ioctl_buffer))
-
-
 #endif /* _PMCRAID_H */
index 8196f89f404e724a53bb294f14914a71b3594b70..31ec429104e2bfa427c3b16abfe37765dcdd5046 100644 (file)
@@ -860,6 +860,37 @@ static int qedi_task_xmit(struct iscsi_task *task)
        return qedi_iscsi_send_ioreq(task);
 }
 
+static void qedi_offload_work(struct work_struct *work)
+{
+       struct qedi_endpoint *qedi_ep =
+               container_of(work, struct qedi_endpoint, offload_work);
+       struct qedi_ctx *qedi;
+       int wait_delay = 5 * HZ;
+       int ret;
+
+       qedi = qedi_ep->qedi;
+
+       ret = qedi_iscsi_offload_conn(qedi_ep);
+       if (ret) {
+               QEDI_ERR(&qedi->dbg_ctx,
+                        "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
+                        qedi_ep->iscsi_cid, qedi_ep, ret);
+               qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+               return;
+       }
+
+       ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
+                                              (qedi_ep->state ==
+                                              EP_STATE_OFLDCONN_COMPL),
+                                              wait_delay);
+       if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) {
+               qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+               QEDI_ERR(&qedi->dbg_ctx,
+                        "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
+                        qedi_ep->iscsi_cid, qedi_ep);
+       }
+}
+
 static struct iscsi_endpoint *
 qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
                int non_blocking)
@@ -908,6 +939,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
        }
        qedi_ep = ep->dd_data;
        memset(qedi_ep, 0, sizeof(struct qedi_endpoint));
+       INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
        qedi_ep->state = EP_STATE_IDLE;
        qedi_ep->iscsi_cid = (u32)-1;
        qedi_ep->qedi = qedi;
@@ -1056,12 +1088,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
        qedi_ep = ep->dd_data;
        qedi = qedi_ep->qedi;
 
+       flush_work(&qedi_ep->offload_work);
+
        if (qedi_ep->state == EP_STATE_OFLDCONN_START)
                goto ep_exit_recover;
 
-       if (qedi_ep->state != EP_STATE_OFLDCONN_NONE)
-               flush_work(&qedi_ep->offload_work);
-
        if (qedi_ep->conn) {
                qedi_conn = qedi_ep->conn;
                abrt_conn = qedi_conn->abrt_conn;
@@ -1235,37 +1266,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid)
        return rc;
 }
 
-static void qedi_offload_work(struct work_struct *work)
-{
-       struct qedi_endpoint *qedi_ep =
-               container_of(work, struct qedi_endpoint, offload_work);
-       struct qedi_ctx *qedi;
-       int wait_delay = 5 * HZ;
-       int ret;
-
-       qedi = qedi_ep->qedi;
-
-       ret = qedi_iscsi_offload_conn(qedi_ep);
-       if (ret) {
-               QEDI_ERR(&qedi->dbg_ctx,
-                        "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
-                        qedi_ep->iscsi_cid, qedi_ep, ret);
-               qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
-               return;
-       }
-
-       ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
-                                              (qedi_ep->state ==
-                                              EP_STATE_OFLDCONN_COMPL),
-                                              wait_delay);
-       if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) {
-               qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
-               QEDI_ERR(&qedi->dbg_ctx,
-                        "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
-                        qedi_ep->iscsi_cid, qedi_ep);
-       }
-}
-
 static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
 {
        struct qedi_ctx *qedi;
@@ -1381,7 +1381,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
                          qedi_ep->dst_addr, qedi_ep->dst_port);
        }
 
-       INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
        queue_work(qedi->offload_thread, &qedi_ep->offload_work);
 
        ret = 0;
index 85dbf81f3204aeeb5ec01e0597ff4ad4be0576f8..6dfcfd8e73371cfe1538cdd5bef223f1ab96c815 100644 (file)
@@ -3826,6 +3826,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 
        spin_lock_irqsave(&cmd->cmd_lock, flags);
        if (cmd->aborted) {
+               if (cmd->sg_mapped)
+                       qlt_unmap_sg(vha, cmd);
+
                spin_unlock_irqrestore(&cmd->cmd_lock, flags);
                /*
                 * It's normal to see 2 calls in this path:
index c607755cce00d84897f17a8b3027b3309ae1810a..592a290e6cfaad1d7e0f033c8e941040254aa861 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/blkdev.h>
 #include <linux/crc-t10dif.h>
 #include <linux/spinlock.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/atomic.h>
 #include <linux/hrtimer.h>
@@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
            {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 };
 
-static atomic_t sdebug_num_hosts;
-static DEFINE_MUTEX(add_host_mutex);
-
+static int sdebug_num_hosts;
 static int sdebug_add_host = DEF_NUM_HOST;  /* in sysfs this is relative */
 static int sdebug_ato = DEF_ATO;
 static int sdebug_cdb_len = DEF_CDB_LEN;
@@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL;
 static bool sdebug_random = DEF_RANDOM;
 static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
 static bool sdebug_removable = DEF_REMOVABLE;
-static bool sdebug_deflect_incoming;
 static bool sdebug_clustering;
 static bool sdebug_host_lock = DEF_HOST_LOCK;
 static bool sdebug_strict = DEF_STRICT;
@@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp)
                       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
        if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN)
                sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN;
-       if (smp_load_acquire(&sdebug_deflect_incoming)) {
-               pr_info("Exit early due to deflect_incoming\n");
-               return 1;
-       }
        if (devip == NULL) {
                devip = find_build_dev_info(sdp);
                if (devip == NULL)
@@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd)
 }
 
 /* Deletes (stops) timers or work queues of all queued commands */
-static void stop_all_queued(bool done_with_no_conn)
+static void stop_all_queued(void)
 {
        unsigned long iflags;
        int j, k;
@@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn)
        struct sdebug_queued_cmd *sqcp;
        struct sdebug_dev_info *devip;
        struct sdebug_defer *sd_dp;
-       struct scsi_cmnd *scp;
 
        for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
                spin_lock_irqsave(&sqp->qc_lock, iflags);
                for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
                        if (test_bit(k, sqp->in_use_bm)) {
                                sqcp = &sqp->qc_arr[k];
-                               scp = sqcp->a_cmnd;
-                               if (!scp)
+                               if (sqcp->a_cmnd == NULL)
                                        continue;
                                devip = (struct sdebug_dev_info *)
                                        sqcp->a_cmnd->device->hostdata;
@@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn)
                                        l_defer_t = SDEB_DEFER_NONE;
                                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
                                stop_qc_helper(sd_dp, l_defer_t);
-                               if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) {
-                                       scp->result = DID_NO_CONNECT << 16;
-                                       scsi_done(scp);
-                               }
                                clear_bit(k, sqp->in_use_bm);
                                spin_lock_irqsave(&sqp->qc_lock, iflags);
                        }
@@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt)
                }
        }
        spin_unlock(&sdebug_host_list_lock);
-       stop_all_queued(false);
+       stop_all_queued();
        if (SDEBUG_OPT_RESET_NOISE & sdebug_opts)
                sdev_printk(KERN_INFO, SCpnt->device,
                            "%s: %d device(s) found\n", __func__, k);
@@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size)
        }
 }
 
-static void sdeb_block_all_queues(void)
-{
-       int j;
-       struct sdebug_queue *sqp;
-
-       for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
-               atomic_set(&sqp->blocked, (int)true);
-}
-
-static void sdeb_unblock_all_queues(void)
+static void block_unblock_all_queues(bool block)
 {
        int j;
        struct sdebug_queue *sqp;
 
        for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp)
-               atomic_set(&sqp->blocked, (int)false);
-}
-
-static void
-sdeb_add_n_hosts(int num_hosts)
-{
-       if (num_hosts < 1)
-               return;
-       do {
-               bool found;
-               unsigned long idx;
-               struct sdeb_store_info *sip;
-               bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
-
-               found = false;
-               if (want_phs) {
-                       xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) {
-                               sdeb_most_recent_idx = (int)idx;
-                               found = true;
-                               break;
-                       }
-                       if (found)      /* re-use case */
-                               sdebug_add_host_helper((int)idx);
-                       else
-                               sdebug_do_add_host(true /* make new store */);
-               } else {
-                       sdebug_do_add_host(false);
-               }
-       } while (--num_hosts);
+               atomic_set(&sqp->blocked, (int)block);
 }
 
 /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1
@@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void)
        modulo = abs(sdebug_every_nth);
        if (modulo < 2)
                return;
-       sdeb_block_all_queues();
+       block_unblock_all_queues(true);
        count = atomic_read(&sdebug_cmnd_count);
        atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo);
-       sdeb_unblock_all_queues();
+       block_unblock_all_queues(false);
 }
 
 static void clear_queue_stats(void)
@@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void)
        return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0;
 }
 
-static int process_deflect_incoming(struct scsi_cmnd *scp)
-{
-       u8 opcode = scp->cmnd[0];
-
-       if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16)
-               return 0;
-       return DID_NO_CONNECT << 16;
-}
-
 #define INCLUSIVE_TIMING_MAX_NS 1000000                /* 1 millisecond */
 
 /* Complete the processing of the thread that queued a SCSI command to this
@@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp)
  */
 static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                         int scsi_result,
-                        int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *),
+                        int (*pfp)(struct scsi_cmnd *,
+                                   struct sdebug_dev_info *),
                         int delta_jiff, int ndelay)
 {
        bool new_sd_dp;
@@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
        }
        sdp = cmnd->device;
 
-       if (delta_jiff == 0) {
-               sqp = get_queue(cmnd);
-               if (atomic_read(&sqp->blocked)) {
-                       if (smp_load_acquire(&sdebug_deflect_incoming))
-                               return process_deflect_incoming(cmnd);
-                       else
-                               return SCSI_MLQUEUE_HOST_BUSY;
-               }
+       if (delta_jiff == 0)
                goto respond_in_thread;
-       }
 
        sqp = get_queue(cmnd);
        spin_lock_irqsave(&sqp->qc_lock, iflags);
        if (unlikely(atomic_read(&sqp->blocked))) {
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-               if (smp_load_acquire(&sdebug_deflect_incoming)) {
-                       scsi_result = process_deflect_incoming(cmnd);
-                       goto respond_in_thread;
-               }
-               if (sdebug_verbose)
-                       pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n");
                return SCSI_MLQUEUE_HOST_BUSY;
        }
        num_in_q = atomic_read(&devip->num_in_q);
@@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 respond_in_thread:     /* call back to mid-layer using invocation thread */
        cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0;
        cmnd->result &= ~SDEG_RES_IMMED_MASK;
-       if (cmnd->result == 0 && scsi_result != 0) {
+       if (cmnd->result == 0 && scsi_result != 0)
                cmnd->result = scsi_result;
-               if (sdebug_verbose)
-                       pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n",
-                               blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result);
-       }
        scsi_done(cmnd);
        return 0;
 }
@@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
                        int j, k;
                        struct sdebug_queue *sqp;
 
-                       sdeb_block_all_queues();
+                       block_unblock_all_queues(true);
                        for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
                             ++j, ++sqp) {
                                k = find_first_bit(sqp->in_use_bm,
@@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf,
                                sdebug_jdelay = jdelay;
                                sdebug_ndelay = 0;
                        }
-                       sdeb_unblock_all_queues();
+                       block_unblock_all_queues(false);
                }
                return res;
        }
@@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
                        int j, k;
                        struct sdebug_queue *sqp;
 
-                       sdeb_block_all_queues();
+                       block_unblock_all_queues(true);
                        for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
                             ++j, ++sqp) {
                                k = find_first_bit(sqp->in_use_bm,
@@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf,
                                sdebug_jdelay = ndelay  ? JDELAY_OVERRIDDEN
                                                        : DEF_JDELAY;
                        }
-                       sdeb_unblock_all_queues();
+                       block_unblock_all_queues(false);
                }
                return res;
        }
@@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
        if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
            (n <= SDEBUG_CANQUEUE) &&
            (sdebug_host_max_queue == 0)) {
-               sdeb_block_all_queues();
+               block_unblock_all_queues(true);
                k = 0;
                for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
                     ++j, ++sqp) {
@@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
                        atomic_set(&retired_max_queue, k + 1);
                else
                        atomic_set(&retired_max_queue, 0);
-               sdeb_unblock_all_queues();
+               block_unblock_all_queues(false);
                return count;
        }
        return -EINVAL;
@@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb);
 static ssize_t add_host_show(struct device_driver *ddp, char *buf)
 {
        /* absolute number of hosts currently active is what is shown */
-       return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts));
+       return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts);
 }
 
-/*
- * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'.
- * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing.
- * Returns -EBUSY if another add_host sysfs invocation is active.
- */
 static ssize_t add_host_store(struct device_driver *ddp, const char *buf,
                              size_t count)
 {
+       bool found;
+       unsigned long idx;
+       struct sdeb_store_info *sip;
+       bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store;
        int delta_hosts;
 
-       if (count == 0 || kstrtoint(buf, 0, &delta_hosts))
+       if (sscanf(buf, "%d", &delta_hosts) != 1)
                return -EINVAL;
-       if (sdebug_verbose)
-               pr_info("prior num_hosts=%d, num_to_add=%d\n",
-                       atomic_read(&sdebug_num_hosts), delta_hosts);
-       if (delta_hosts == 0)
-               return count;
-       if (mutex_trylock(&add_host_mutex) == 0)
-               return -EBUSY;
        if (delta_hosts > 0) {
-               sdeb_add_n_hosts(delta_hosts);
-       } else if (delta_hosts < 0) {
-               smp_store_release(&sdebug_deflect_incoming, true);
-               sdeb_block_all_queues();
-               if (delta_hosts >= atomic_read(&sdebug_num_hosts))
-                       stop_all_queued(true);
                do {
-                       if (atomic_read(&sdebug_num_hosts) < 1) {
-                               free_all_queued();
-                               break;
+                       found = false;
+                       if (want_phs) {
+                               xa_for_each_marked(per_store_ap, idx, sip,
+                                                  SDEB_XA_NOT_IN_USE) {
+                                       sdeb_most_recent_idx = (int)idx;
+                                       found = true;
+                                       break;
+                               }
+                               if (found)      /* re-use case */
+                                       sdebug_add_host_helper((int)idx);
+                               else
+                                       sdebug_do_add_host(true);
+                       } else {
+                               sdebug_do_add_host(false);
                        }
+               } while (--delta_hosts);
+       } else if (delta_hosts < 0) {
+               do {
                        sdebug_do_remove_host(false);
                } while (++delta_hosts);
-               sdeb_unblock_all_queues();
-               smp_store_release(&sdebug_deflect_incoming, false);
        }
-       mutex_unlock(&add_host_mutex);
-       if (sdebug_verbose)
-               pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts));
        return count;
 }
 static DRIVER_ATTR_RW(add_host);
@@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void)
        sdebug_add_host = 0;
 
        for (k = 0; k < hosts_to_add; k++) {
-               if (smp_load_acquire(&sdebug_deflect_incoming)) {
-                       pr_info("exit early as sdebug_deflect_incoming is set\n");
-                       return 0;
-               }
                if (want_store && k == 0) {
                        ret = sdebug_add_host_helper(idx);
                        if (ret < 0) {
@@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void)
                }
        }
        if (sdebug_verbose)
-               pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts));
+               pr_info("built %d host(s)\n", sdebug_num_hosts);
 
-       /*
-        * Even though all the hosts have been established, due to async device (LU) scanning
-        * by the scsi mid-level, there may still be devices (LUs) being set up.
-        */
        return 0;
 
 bus_unreg:
@@ -7131,17 +7041,12 @@ free_q_arr:
 
 static void __exit scsi_debug_exit(void)
 {
-       int k;
+       int k = sdebug_num_hosts;
 
-       /* Possible race with LUs still being set up; stop them asap */
-       sdeb_block_all_queues();
-       smp_store_release(&sdebug_deflect_incoming, true);
-       stop_all_queued(false);
-       for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++)
+       stop_all_queued();
+       for (; k; k--)
                sdebug_do_remove_host(true);
        free_all_queued();
-       if (sdebug_verbose)
-               pr_info("removed %d hosts\n", k);
        driver_unregister(&sdebug_driverfs_driver);
        bus_unregister(&pseudo_lld_bus);
        root_device_unregister(pseudo_primary);
@@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx)
        sdbg_host->dev.bus = &pseudo_lld_bus;
        sdbg_host->dev.parent = pseudo_primary;
        sdbg_host->dev.release = &sdebug_release_adapter;
-       dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts));
+       dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
 
        error = device_register(&sdbg_host->dev);
        if (error)
                goto clean;
 
-       atomic_inc(&sdebug_num_hosts);
+       ++sdebug_num_hosts;
        return 0;
 
 clean:
@@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end)
                return;
 
        device_unregister(&sdbg_host->dev);
-       atomic_dec(&sdebug_num_hosts);
+       --sdebug_num_hosts;
 }
 
 static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
@@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
        int num_in_q = 0;
        struct sdebug_dev_info *devip;
 
-       sdeb_block_all_queues();
+       block_unblock_all_queues(true);
        devip = (struct sdebug_dev_info *)sdev->hostdata;
        if (NULL == devip) {
-               sdeb_unblock_all_queues();
+               block_unblock_all_queues(false);
                return  -ENODEV;
        }
        num_in_q = atomic_read(&devip->num_in_q);
@@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
                sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
                            __func__, qdepth, num_in_q);
        }
-       sdeb_unblock_all_queues();
+       block_unblock_all_queues(false);
        return sdev->queue_depth;
 }
 
@@ -7519,12 +7424,13 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
        struct sdebug_defer *sd_dp;
 
        sqp = sdebug_q_arr + queue_num;
-       qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
-       if (qc_idx >= sdebug_max_queue)
-               return 0;
 
        spin_lock_irqsave(&sqp->qc_lock, iflags);
 
+       qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
+       if (qc_idx >= sdebug_max_queue)
+               goto unlock;
+
        for (first = true; first || qc_idx + 1 < sdebug_max_queue; )   {
                if (first) {
                        first = false;
@@ -7589,6 +7495,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                        break;
        }
 
+unlock:
        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
        if (num_entries > 0)
index ff89de86545d1f4d8f9d3d050cca5ae419787aae..b02af340c2d3d15e4aa05ef84faafc596386f307 100644 (file)
@@ -30,7 +30,7 @@ static inline const char *scmd_name(const struct scsi_cmnd *scmd)
 {
        struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd);
 
-       if (!rq->q->disk)
+       if (!rq->q || !rq->q->disk)
                return NULL;
        return rq->q->disk->disk_name;
 }
index f4e6c68ac99eddad151716e36e075347ddb54d1d..2ef78083f1eff616ab727352365b9875506dca2a 100644 (file)
@@ -223,6 +223,8 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
        int ret;
        struct sbitmap sb_backup;
 
+       depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev));
+
        /*
         * realloc if new shift is calculated, which is caused by setting
         * up one new default queue depth after calling ->slave_configure
@@ -245,6 +247,9 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
                                scsi_device_max_queue_depth(sdev),
                                new_shift, GFP_KERNEL,
                                sdev->request_queue->node, false, true);
+       if (!ret)
+               sbitmap_resize(&sdev->budget_map, depth);
+
        if (need_free) {
                if (ret)
                        sdev->budget_map = sb_backup;
index 226a50944c005c55a1b2b2ce76b40cc4089247cd..dc6872e352bd4ea09b5cf7c9c945c2631c1af355 100644 (file)
@@ -1384,10 +1384,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
        if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) {
                sdev->bsg_dev = scsi_bsg_register_queue(sdev);
                if (IS_ERR(sdev->bsg_dev)) {
-                       /*
-                        * We're treating error on bsg register as non-fatal, so
-                        * pretend nothing went wrong.
-                        */
                        error = PTR_ERR(sdev->bsg_dev);
                        sdev_printk(KERN_INFO, sdev,
                                    "Failed to register bsg queue, errno=%d\n",
index 27951ea05dd419ae17138625bb5a8d480b93c6f4..2c0dd64159b09d4956b5b105fa3761839ec74f9b 100644 (file)
@@ -86,6 +86,9 @@ struct iscsi_internal {
        struct transport_container session_cont;
 };
 
+static DEFINE_IDR(iscsi_ep_idr);
+static DEFINE_MUTEX(iscsi_ep_idr_mutex);
+
 static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
 
 static struct workqueue_struct *iscsi_conn_cleanup_workq;
@@ -168,6 +171,11 @@ struct device_attribute dev_attr_##_prefix##_##_name =     \
 static void iscsi_endpoint_release(struct device *dev)
 {
        struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
+
+       mutex_lock(&iscsi_ep_idr_mutex);
+       idr_remove(&iscsi_ep_idr, ep->id);
+       mutex_unlock(&iscsi_ep_idr_mutex);
+
        kfree(ep);
 }
 
@@ -180,7 +188,7 @@ static ssize_t
 show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
-       return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
+       return sysfs_emit(buf, "%d\n", ep->id);
 }
 static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
 
@@ -193,48 +201,32 @@ static struct attribute_group iscsi_endpoint_group = {
        .attrs = iscsi_endpoint_attrs,
 };
 
-#define ISCSI_MAX_EPID -1
-
-static int iscsi_match_epid(struct device *dev, const void *data)
-{
-       struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
-       const uint64_t *epid = data;
-
-       return *epid == ep->id;
-}
-
 struct iscsi_endpoint *
 iscsi_create_endpoint(int dd_size)
 {
-       struct device *dev;
        struct iscsi_endpoint *ep;
-       uint64_t id;
-       int err;
-
-       for (id = 1; id < ISCSI_MAX_EPID; id++) {
-               dev = class_find_device(&iscsi_endpoint_class, NULL, &id,
-                                       iscsi_match_epid);
-               if (!dev)
-                       break;
-               else
-                       put_device(dev);
-       }
-       if (id == ISCSI_MAX_EPID) {
-               printk(KERN_ERR "Too many connections. Max supported %u\n",
-                      ISCSI_MAX_EPID - 1);
-               return NULL;
-       }
+       int err, id;
 
        ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL);
        if (!ep)
                return NULL;
 
+       mutex_lock(&iscsi_ep_idr_mutex);
+       id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO);
+       if (id < 0) {
+               mutex_unlock(&iscsi_ep_idr_mutex);
+               printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n",
+                      id);
+               goto free_ep;
+       }
+       mutex_unlock(&iscsi_ep_idr_mutex);
+
        ep->id = id;
        ep->dev.class = &iscsi_endpoint_class;
-       dev_set_name(&ep->dev, "ep-%llu", (unsigned long long) id);
+       dev_set_name(&ep->dev, "ep-%d", id);
        err = device_register(&ep->dev);
         if (err)
-                goto free_ep;
+               goto free_id;
 
        err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group);
        if (err)
@@ -248,6 +240,10 @@ unregister_dev:
        device_unregister(&ep->dev);
        return NULL;
 
+free_id:
+       mutex_lock(&iscsi_ep_idr_mutex);
+       idr_remove(&iscsi_ep_idr, id);
+       mutex_unlock(&iscsi_ep_idr_mutex);
 free_ep:
        kfree(ep);
        return NULL;
@@ -275,14 +271,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint);
  */
 struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle)
 {
-       struct device *dev;
+       struct iscsi_endpoint *ep;
 
-       dev = class_find_device(&iscsi_endpoint_class, NULL, &handle,
-                               iscsi_match_epid);
-       if (!dev)
-               return NULL;
+       mutex_lock(&iscsi_ep_idr_mutex);
+       ep = idr_find(&iscsi_ep_idr, handle);
+       if (!ep)
+               goto unlock;
 
-       return iscsi_dev_to_endpoint(dev);
+       get_device(&ep->dev);
+unlock:
+       mutex_unlock(&iscsi_ep_idr_mutex);
+       return ep;
 }
 EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint);
 
@@ -2202,10 +2201,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
 
        switch (flag) {
        case STOP_CONN_RECOVER:
-               conn->state = ISCSI_CONN_FAILED;
+               WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
                break;
        case STOP_CONN_TERM:
-               conn->state = ISCSI_CONN_DOWN;
+               WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
                break;
        default:
                iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
@@ -2217,6 +2216,49 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
        ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n");
 }
 
+static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
+{
+       struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
+       struct iscsi_endpoint *ep;
+
+       ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+       WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+
+       if (!conn->ep || !session->transport->ep_disconnect)
+               return;
+
+       ep = conn->ep;
+       conn->ep = NULL;
+
+       session->transport->unbind_conn(conn, is_active);
+       session->transport->ep_disconnect(ep);
+       ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
+}
+
+static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn,
+                                        struct iscsi_endpoint *ep,
+                                        bool is_active)
+{
+       /* Check if this was a conn error and the kernel took ownership */
+       spin_lock_irq(&conn->lock);
+       if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
+               spin_unlock_irq(&conn->lock);
+               iscsi_ep_disconnect(conn, is_active);
+       } else {
+               spin_unlock_irq(&conn->lock);
+               ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
+               mutex_unlock(&conn->ep_mutex);
+
+               flush_work(&conn->cleanup_work);
+               /*
+                * Userspace is now done with the EP so we can release the ref
+                * iscsi_cleanup_conn_work_fn took.
+                */
+               iscsi_put_endpoint(ep);
+               mutex_lock(&conn->ep_mutex);
+       }
+}
+
 static int iscsi_if_stop_conn(struct iscsi_transport *transport,
                              struct iscsi_uevent *ev)
 {
@@ -2237,12 +2279,25 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport,
                cancel_work_sync(&conn->cleanup_work);
                iscsi_stop_conn(conn, flag);
        } else {
+               /*
+                * For offload, when iscsid is restarted it won't know about
+                * existing endpoints so it can't do a ep_disconnect. We clean
+                * it up here for userspace.
+                */
+               mutex_lock(&conn->ep_mutex);
+               if (conn->ep)
+                       iscsi_if_disconnect_bound_ep(conn, conn->ep, true);
+               mutex_unlock(&conn->ep_mutex);
+
                /*
                 * Figure out if it was the kernel or userspace initiating this.
                 */
+               spin_lock_irq(&conn->lock);
                if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
+                       spin_unlock_irq(&conn->lock);
                        iscsi_stop_conn(conn, flag);
                } else {
+                       spin_unlock_irq(&conn->lock);
                        ISCSI_DBG_TRANS_CONN(conn,
                                             "flush kernel conn cleanup.\n");
                        flush_work(&conn->cleanup_work);
@@ -2251,31 +2306,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport,
                 * Only clear for recovery to avoid extra cleanup runs during
                 * termination.
                 */
+               spin_lock_irq(&conn->lock);
                clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
+               spin_unlock_irq(&conn->lock);
        }
        ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n");
        return 0;
 }
 
-static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
-{
-       struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
-       struct iscsi_endpoint *ep;
-
-       ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
-       conn->state = ISCSI_CONN_FAILED;
-
-       if (!conn->ep || !session->transport->ep_disconnect)
-               return;
-
-       ep = conn->ep;
-       conn->ep = NULL;
-
-       session->transport->unbind_conn(conn, is_active);
-       session->transport->ep_disconnect(ep);
-       ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
-}
-
 static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
 {
        struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn,
@@ -2284,18 +2322,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
 
        mutex_lock(&conn->ep_mutex);
        /*
-        * If we are not at least bound there is nothing for us to do. Userspace
-        * will do a ep_disconnect call if offload is used, but will not be
-        * doing a stop since there is nothing to clean up, so we have to clear
-        * the cleanup bit here.
+        * Get a ref to the ep, so we don't release its ID until after
+        * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
         */
-       if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
-               ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
-               clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
-               mutex_unlock(&conn->ep_mutex);
-               return;
-       }
-
+       if (conn->ep)
+               get_device(&conn->ep->dev);
        iscsi_ep_disconnect(conn, false);
 
        if (system_state != SYSTEM_RUNNING) {
@@ -2340,11 +2371,12 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
                conn->dd_data = &conn[1];
 
        mutex_init(&conn->ep_mutex);
+       spin_lock_init(&conn->lock);
        INIT_LIST_HEAD(&conn->conn_list);
        INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
        conn->transport = transport;
        conn->cid = cid;
-       conn->state = ISCSI_CONN_DOWN;
+       WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
 
        /* this is released in the dev's release function */
        if (!get_device(&session->dev))
@@ -2542,9 +2574,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
        struct iscsi_uevent *ev;
        struct iscsi_internal *priv;
        int len = nlmsg_total_size(sizeof(*ev));
+       unsigned long flags;
+       int state;
 
-       if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
-               queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
+       spin_lock_irqsave(&conn->lock, flags);
+       /*
+        * Userspace will only do a stop call if we are at least bound. And, we
+        * only need to do the in kernel cleanup if in the UP state so cmds can
+        * be released to upper layers. If in other states just wait for
+        * userspace to avoid races that can leave the cleanup_work queued.
+        */
+       state = READ_ONCE(conn->state);
+       switch (state) {
+       case ISCSI_CONN_BOUND:
+       case ISCSI_CONN_UP:
+               if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
+                                     &conn->flags)) {
+                       queue_work(iscsi_conn_cleanup_workq,
+                                  &conn->cleanup_work);
+               }
+               break;
+       default:
+               ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
+                                    state);
+               break;
+       }
+       spin_unlock_irqrestore(&conn->lock, flags);
 
        priv = iscsi_if_transport_lookup(conn->transport);
        if (!priv)
@@ -2894,7 +2949,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
        char *data = (char*)ev + sizeof(*ev);
        struct iscsi_cls_conn *conn;
        struct iscsi_cls_session *session;
-       int err = 0, value = 0;
+       int err = 0, value = 0, state;
 
        if (ev->u.set_param.len > PAGE_SIZE)
                return -EINVAL;
@@ -2911,8 +2966,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
                        session->recovery_tmo = value;
                break;
        default:
-               if ((conn->state == ISCSI_CONN_BOUND) ||
-                       (conn->state == ISCSI_CONN_UP)) {
+               state = READ_ONCE(conn->state);
+               if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
                        err = transport->set_param(conn, ev->u.set_param.param,
                                        data, ev->u.set_param.len);
                } else {
@@ -2984,16 +3039,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
        }
 
        mutex_lock(&conn->ep_mutex);
-       /* Check if this was a conn error and the kernel took ownership */
-       if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
-               ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
-               mutex_unlock(&conn->ep_mutex);
-
-               flush_work(&conn->cleanup_work);
-               goto put_ep;
-       }
-
-       iscsi_ep_disconnect(conn, false);
+       iscsi_if_disconnect_bound_ep(conn, ep, false);
        mutex_unlock(&conn->ep_mutex);
 put_ep:
        iscsi_put_endpoint(ep);
@@ -3696,24 +3742,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
                return -EINVAL;
 
        mutex_lock(&conn->ep_mutex);
+       spin_lock_irq(&conn->lock);
        if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
+               spin_unlock_irq(&conn->lock);
                mutex_unlock(&conn->ep_mutex);
                ev->r.retcode = -ENOTCONN;
                return 0;
        }
+       spin_unlock_irq(&conn->lock);
 
        switch (nlh->nlmsg_type) {
        case ISCSI_UEVENT_BIND_CONN:
-               if (conn->ep) {
-                       /*
-                        * For offload boot support where iscsid is restarted
-                        * during the pivot root stage, the ep will be intact
-                        * here when the new iscsid instance starts up and
-                        * reconnects.
-                        */
-                       iscsi_ep_disconnect(conn, true);
-               }
-
                session = iscsi_session_lookup(ev->u.b_conn.sid);
                if (!session) {
                        err = -EINVAL;
@@ -3724,7 +3763,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
                                                ev->u.b_conn.transport_eph,
                                                ev->u.b_conn.is_leading);
                if (!ev->r.retcode)
-                       conn->state = ISCSI_CONN_BOUND;
+                       WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
 
                if (ev->r.retcode || !transport->ep_connect)
                        break;
@@ -3743,7 +3782,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
        case ISCSI_UEVENT_START_CONN:
                ev->r.retcode = transport->start_conn(conn);
                if (!ev->r.retcode)
-                       conn->state = ISCSI_CONN_UP;
+                       WRITE_ONCE(conn->state, ISCSI_CONN_UP);
+
                break;
        case ISCSI_UEVENT_SEND_PDU:
                pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
@@ -4050,10 +4090,11 @@ static ssize_t show_conn_state(struct device *dev,
 {
        struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
        const char *state = "unknown";
+       int conn_state = READ_ONCE(conn->state);
 
-       if (conn->state >= 0 &&
-           conn->state < ARRAY_SIZE(connection_state_names))
-               state = connection_state_names[conn->state];
+       if (conn_state >= 0 &&
+           conn_state < ARRAY_SIZE(connection_state_names))
+               state = connection_state_names[conn_state];
 
        return sysfs_emit(buf, "%s\n", state);
 }
index a390679cf45848925889f43af87a8adc328b9c32..9694e2cfaf9a60f527f65af4b1c78d030389e806 100644 (file)
@@ -797,7 +797,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
        case SD_LBP_FULL:
        case SD_LBP_DISABLE:
                blk_queue_max_discard_sectors(q, 0);
-               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
                return;
 
        case SD_LBP_UNMAP:
@@ -830,7 +829,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
        }
 
        blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
-       blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
@@ -3216,6 +3214,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
                        sd_read_block_limits(sdkp);
                        sd_read_block_characteristics(sdkp);
                        sd_zbc_read_zones(sdkp, buffer);
+                       sd_read_cpr(sdkp);
                }
 
                sd_print_capacity(sdkp, old_capacity);
@@ -3225,7 +3224,6 @@ static int sd_revalidate_disk(struct gendisk *disk)
                sd_read_app_tag_own(sdkp, buffer);
                sd_read_write_same(sdkp, buffer);
                sd_read_security(sdkp, buffer);
-               sd_read_cpr(sdkp);
        }
 
        /*
@@ -3475,6 +3473,7 @@ static int sd_probe(struct device *dev)
        error = device_add_disk(dev, gd, NULL);
        if (error) {
                put_device(&sdkp->disk_dev);
+               blk_cleanup_disk(gd);
                goto out;
        }
 
index 5ba9df334968d6dc2ef284a99e4c1eb760003199..cbd92891a762c854fe35181fe1bb355dd94c776a 100644 (file)
@@ -535,7 +535,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 
        scsi_autopm_get_device(sdev);
 
-       if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) {
+       if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) {
                ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
                if (ret != -ENOSYS)
                        goto put;
index ddd00efc488252644e732a481d29d3ee5ecc8e3c..fbdb5124d7f7d1d8a541adf463b37f920da469f9 100644 (file)
@@ -41,7 +41,7 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
        int result;
        unsigned char *buffer;
 
-       buffer = kmalloc(32, GFP_KERNEL);
+       buffer = kzalloc(32, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;
 
@@ -55,10 +55,13 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
        cgc.data_direction = DMA_FROM_DEVICE;
 
        result = sr_do_ioctl(cd, &cgc);
+       if (result)
+               goto err;
 
        tochdr->cdth_trk0 = buffer[2];
        tochdr->cdth_trk1 = buffer[3];
 
+err:
        kfree(buffer);
        return result;
 }
@@ -71,7 +74,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
        int result;
        unsigned char *buffer;
 
-       buffer = kmalloc(32, GFP_KERNEL);
+       buffer = kzalloc(32, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;
 
@@ -86,6 +89,8 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
        cgc.data_direction = DMA_FROM_DEVICE;
 
        result = sr_do_ioctl(cd, &cgc);
+       if (result)
+               goto err;
 
        tocentry->cdte_ctrl = buffer[5] & 0xf;
        tocentry->cdte_adr = buffer[5] >> 4;
@@ -98,6 +103,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
                tocentry->cdte_addr.lba = (((((buffer[8] << 8) + buffer[9]) << 8)
                        + buffer[10]) << 8) + buffer[11];
 
+err:
        kfree(buffer);
        return result;
 }
@@ -384,7 +390,7 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
 {
        Scsi_CD *cd = cdi->handle;
        struct packet_command cgc;
-       char *buffer = kmalloc(32, GFP_KERNEL);
+       char *buffer = kzalloc(32, GFP_KERNEL);
        int result;
 
        if (!buffer)
@@ -400,10 +406,13 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
        cgc.data_direction = DMA_FROM_DEVICE;
        cgc.timeout = IOCTL_TIMEOUT;
        result = sr_do_ioctl(cd, &cgc);
+       if (result)
+               goto err;
 
        memcpy(mcn->medium_catalog_number, buffer + 9, 13);
        mcn->medium_catalog_number[13] = 0;
 
+err:
        kfree(buffer);
        return result;
 }
index 0d2e950d0865eb7f629e4d5c901fa252d38d44bb..586c0e567ff9abd9c51123cee8f0f15d36bed1c7 100644 (file)
@@ -957,18 +957,6 @@ static const struct reset_control_ops ufs_qcom_reset_ops = {
        .deassert = ufs_qcom_reset_deassert,
 };
 
-#define        ANDROID_BOOT_DEV_MAX    30
-static char android_boot_dev[ANDROID_BOOT_DEV_MAX];
-
-#ifndef MODULE
-static int __init get_android_boot_dev(char *str)
-{
-       strlcpy(android_boot_dev, str, ANDROID_BOOT_DEV_MAX);
-       return 1;
-}
-__setup("androidboot.bootdevice=", get_android_boot_dev);
-#endif
-
 /**
  * ufs_qcom_init - bind phy with controller
  * @hba: host controller instance
@@ -988,9 +976,6 @@ static int ufs_qcom_init(struct ufs_hba *hba)
        struct resource *res;
        struct ufs_clk_info *clki;
 
-       if (strlen(android_boot_dev) && strcmp(android_boot_dev, dev_name(dev)))
-               return -ENODEV;
-
        host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
        if (!host) {
                err = -ENOMEM;
index f76692053ca17813a1d237fb35aed0e327a832ea..e892b9feffb11e6e7f3b5b3737dd828feaa85371 100644 (file)
@@ -428,6 +428,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba)
        return ufs_intel_common_init(hba);
 }
 
+static int ufs_intel_mtl_init(struct ufs_hba *hba)
+{
+       hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN;
+       return ufs_intel_common_init(hba);
+}
+
 static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
        .name                   = "intel-pci",
        .init                   = ufs_intel_common_init,
@@ -465,6 +471,16 @@ static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = {
        .device_reset           = ufs_intel_device_reset,
 };
 
+static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = {
+       .name                   = "intel-pci",
+       .init                   = ufs_intel_mtl_init,
+       .exit                   = ufs_intel_common_exit,
+       .hce_enable_notify      = ufs_intel_hce_enable_notify,
+       .link_startup_notify    = ufs_intel_link_startup_notify,
+       .resume                 = ufs_intel_resume,
+       .device_reset           = ufs_intel_device_reset,
+};
+
 #ifdef CONFIG_PM_SLEEP
 static int ufshcd_pci_restore(struct device *dev)
 {
@@ -579,6 +595,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops },
        { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
        { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
+       { PCI_VDEVICE(INTEL, 0x7E47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops },
        { }     /* terminate list */
 };
 
index 88c20f3608c24b80635d689f2430d4c27d758a09..94f545be183aa13aa572b3ba12a90ee8bbf6b980 100644 (file)
@@ -820,8 +820,6 @@ struct ufs_hba {
        enum ufs_pm_level rpm_lvl;
        /* Desired UFS power management level during system PM */
        enum ufs_pm_level spm_lvl;
-       struct device_attribute rpm_lvl_attr;
-       struct device_attribute spm_lvl_attr;
        int pm_op_in_progress;
 
        /* Auto-Hibernate Idle Timer register value */
index b2bec19022cdd31868fc5047aedb5f45d7c34e6c..588c0329b80ca5370c0f16910c0b0a766ff19759 100644 (file)
@@ -867,12 +867,6 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb)
        struct ufshpb_region *rgn, *victim_rgn = NULL;
 
        list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {
-               if (!rgn) {
-                       dev_err(&hpb->sdev_ufs_lu->sdev_dev,
-                               "%s: no region allocated\n",
-                               __func__);
-                       return NULL;
-               }
                if (ufshpb_check_srgns_issue_state(hpb, rgn))
                        continue;
 
@@ -888,6 +882,11 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb)
                break;
        }
 
+       if (!victim_rgn)
+               dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+                       "%s: no region allocated\n",
+                       __func__);
+
        return victim_rgn;
 }
 
@@ -1255,6 +1254,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
        struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr;
        int data_seg_len;
 
+       data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
+               & MASK_RSP_UPIU_DATA_SEG_LEN;
+
+       /* If data segment length is zero, rsp_field is not valid */
+       if (!data_seg_len)
+               return;
+
        if (unlikely(lrbp->lun != rsp_field->lun)) {
                struct scsi_device *sdev;
                bool found = false;
@@ -1289,18 +1295,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
                return;
        }
 
-       data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
-               & MASK_RSP_UPIU_DATA_SEG_LEN;
-
-       /* To flush remained rsp_list, we queue the map_work task */
-       if (!data_seg_len) {
-               if (!ufshpb_is_general_lun(hpb->lun))
-                       return;
-
-               ufshpb_kick_map_work(hpb);
-               return;
-       }
-
        BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE);
 
        if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field))
index 0e6110da69e7646f3b4b01cee646bad26222d328..578c4b6d0f7d97b1caddb46d8e0baef6f0b21822 100644 (file)
@@ -988,7 +988,7 @@ static struct virtio_driver virtio_scsi_driver = {
        .remove = virtscsi_remove,
 };
 
-static int __init init(void)
+static int __init virtio_scsi_init(void)
 {
        int ret = -ENOMEM;
 
@@ -1020,14 +1020,14 @@ error:
        return ret;
 }
 
-static void __exit fini(void)
+static void __exit virtio_scsi_fini(void)
 {
        unregister_virtio_driver(&virtio_scsi_driver);
        mempool_destroy(virtscsi_cmd_pool);
        kmem_cache_destroy(virtscsi_cmd_cache);
 }
-module_init(init);
-module_exit(fini);
+module_init(virtio_scsi_init);
+module_exit(virtio_scsi_fini);
 
 MODULE_DEVICE_TABLE(virtio, id_table);
 MODULE_DESCRIPTION("Virtio SCSI HBA driver");
index 27b9e2baab1a61c2ca62b7caf0f9ced69ee0025c..7acf9193a9e800519f6381b8ef27b201bf34650d 100644 (file)
@@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z)
        scsi_remove_host(host);
 
        NCR_700_release(host);
+       if (host->base > 0x01000000)
+               iounmap(hostdata->base);
        kfree(hostdata);
        free_irq(host->irq, host);
        zorro_release_device(z);
index f04b961b96cd4de5513a46fc2b24d27f9ef22e35..ec58091fc948a26608baf717a44d67c679d180e0 100644 (file)
@@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev)
        }
 
        ctrl->irq = platform_get_irq(pdev, 0);
-       if (!ctrl->irq) {
+       if (ctrl->irq < 0) {
                dev_err(&pdev->dev, "no slimbus IRQ\n");
-               return -ENODEV;
+               return ctrl->irq;
        }
 
        sctrl = &ctrl->ctrl;
index 122f9c884b38bfd01239324eb4d2c7e87366dce9..ccd0577a771e4284be143499ea071e3931d29166 100644 (file)
@@ -50,7 +50,7 @@ struct imx8m_blk_ctrl_domain_data {
        u32 mipi_phy_rst_mask;
 };
 
-#define DOMAIN_MAX_CLKS 3
+#define DOMAIN_MAX_CLKS 4
 
 struct imx8m_blk_ctrl_domain {
        struct generic_pm_domain genpd;
index 92d9610df1fd8fb50857839a5a1041bd9c80be08..938017a60c8ed2ae54ed01ecb5edad7a2e1b83b7 100644 (file)
@@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op)
 static bool atmel_qspi_supports_op(struct spi_mem *mem,
                                   const struct spi_mem_op *op)
 {
+       if (!spi_mem_default_supports_op(mem, op))
+               return false;
+
        if (atmel_qspi_find_mode(op) < 0)
                return false;
 
index 86c76211b3d3dd4f735cd788d97159514a8a1250..cad2d55dcd3d2b7c05642461a2fe8f23fbae28f9 100644 (file)
@@ -1205,7 +1205,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
        addr = op->addr.val;
        len = op->data.nbytes;
 
-       if (bcm_qspi_bspi_ver_three(qspi) == true) {
+       if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) {
                /*
                 * The address coming into this function is a raw flash offset.
                 * But for BSPI <= V3, we need to convert it to a remapped BSPI
@@ -1224,7 +1224,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
            len < 4)
                mspi_read = true;
 
-       if (mspi_read)
+       if (!has_bspi(qspi) || mspi_read)
                return bcm_qspi_mspi_exec_mem_op(spi, op);
 
        ret = bcm_qspi_bspi_set_mode(qspi, op, 0);
index b0c9f62ccefbb9eaa83aa9d9897fa8d9c0a6148a..19686fb47bb352727fc9f2038403dd300909469a 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/iopoll.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
@@ -102,12 +103,6 @@ struct cqspi_driver_platdata {
 #define CQSPI_TIMEOUT_MS                       500
 #define CQSPI_READ_TIMEOUT_MS                  10
 
-/* Instruction type */
-#define CQSPI_INST_TYPE_SINGLE                 0
-#define CQSPI_INST_TYPE_DUAL                   1
-#define CQSPI_INST_TYPE_QUAD                   2
-#define CQSPI_INST_TYPE_OCTAL                  3
-
 #define CQSPI_DUMMY_CLKS_PER_BYTE              8
 #define CQSPI_DUMMY_BYTES_MAX                  4
 #define CQSPI_DUMMY_CLKS_MAX                   31
@@ -376,10 +371,6 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr)
 static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
                              const struct spi_mem_op *op)
 {
-       f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE;
-       f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE;
-       f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
-
        /*
         * For an op to be DTR, cmd phase along with every other non-empty
         * phase should have dtr field set to 1. If an op phase has zero
@@ -389,32 +380,23 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
                       (!op->addr.nbytes || op->addr.dtr) &&
                       (!op->data.nbytes || op->data.dtr);
 
-       switch (op->data.buswidth) {
-       case 0:
-               break;
-       case 1:
-               f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
-               break;
-       case 2:
-               f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
-               break;
-       case 4:
-               f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
-               break;
-       case 8:
-               f_pdata->data_width = CQSPI_INST_TYPE_OCTAL;
-               break;
-       default:
-               return -EINVAL;
-       }
+       f_pdata->inst_width = 0;
+       if (op->cmd.buswidth)
+               f_pdata->inst_width = ilog2(op->cmd.buswidth);
+
+       f_pdata->addr_width = 0;
+       if (op->addr.buswidth)
+               f_pdata->addr_width = ilog2(op->addr.buswidth);
+
+       f_pdata->data_width = 0;
+       if (op->data.buswidth)
+               f_pdata->data_width = ilog2(op->data.buswidth);
 
        /* Right now we only support 8-8-8 DTR mode. */
        if (f_pdata->dtr) {
                switch (op->cmd.buswidth) {
                case 0:
-                       break;
                case 8:
-                       f_pdata->inst_width = CQSPI_INST_TYPE_OCTAL;
                        break;
                default:
                        return -EINVAL;
@@ -422,9 +404,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
 
                switch (op->addr.buswidth) {
                case 0:
-                       break;
                case 8:
-                       f_pdata->addr_width = CQSPI_INST_TYPE_OCTAL;
                        break;
                default:
                        return -EINVAL;
@@ -432,9 +412,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
 
                switch (op->data.buswidth) {
                case 0:
-                       break;
                case 8:
-                       f_pdata->data_width = CQSPI_INST_TYPE_OCTAL;
                        break;
                default:
                        return -EINVAL;
@@ -1437,9 +1415,24 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem,
        all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
                    !op->data.dtr;
 
-       /* Mixed DTR modes not supported. */
-       if (!(all_true || all_false))
+       if (all_true) {
+               /* Right now we only support 8-8-8 DTR mode. */
+               if (op->cmd.nbytes && op->cmd.buswidth != 8)
+                       return false;
+               if (op->addr.nbytes && op->addr.buswidth != 8)
+                       return false;
+               if (op->data.nbytes && op->data.buswidth != 8)
+                       return false;
+       } else if (all_false) {
+               /* Only 1-1-X ops are supported without DTR */
+               if (op->cmd.nbytes && op->cmd.buswidth > 1)
+                       return false;
+               if (op->addr.nbytes && op->addr.buswidth > 1)
+                       return false;
+       } else {
+               /* Mixed DTR modes are not supported. */
                return false;
+       }
 
        return spi_mem_default_supports_op(mem, op);
 }
index a5ef7a526a7fc9e22d1bc9be094fb144a7d6411c..f6eec7a869b6a7a3a0c05aa3bb134102e50b394d 100644 (file)
@@ -72,6 +72,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info },
        { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info },
+       { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info },
        { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info },
        { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info },
        { PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info },
index 94fb09696677f14a53f1b24375c4d9429a518252..d167699a1a96bd7015a278f5345dd8e650e6e8ed 100644 (file)
@@ -960,7 +960,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev)
 
 static int __maybe_unused mtk_nor_resume(struct device *dev)
 {
-       return pm_runtime_force_resume(dev);
+       struct spi_controller *ctlr = dev_get_drvdata(dev);
+       struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+       int ret;
+
+       ret = pm_runtime_force_resume(dev);
+       if (ret)
+               return ret;
+
+       mtk_nor_init(sp);
+
+       return 0;
 }
 
 static const struct dev_pm_ops mtk_nor_pm_ops = {
index 55c092069301761336b520f01c861985d7d2bdcf..65be8e085ab8392c25029b2aebd521b7610193ae 100644 (file)
@@ -813,6 +813,7 @@ static int mxic_spi_probe(struct platform_device *pdev)
        if (ret) {
                dev_err(&pdev->dev, "spi_register_master failed\n");
                pm_runtime_disable(&pdev->dev);
+               mxic_spi_mem_ecc_remove(mxic);
        }
 
        return ret;
index fe82f3575df4f38694df791cefb7e1068c16d0ae..24ec1c83f379ceec3c224f5c008572b708982908 100644 (file)
@@ -158,14 +158,18 @@ static int rpcif_spi_probe(struct platform_device *pdev)
 
        error = rpcif_hw_init(rpc, false);
        if (error)
-               return error;
+               goto out_disable_rpm;
 
        error = spi_register_controller(ctlr);
        if (error) {
                dev_err(&pdev->dev, "spi_register_controller failed\n");
-               rpcif_disable_rpm(rpc);
+               goto out_disable_rpm;
        }
 
+       return 0;
+
+out_disable_rpm:
+       rpcif_disable_rpm(rpc);
        return error;
 }
 
index c4dd1200fe99166f81285cdd9a0d09bb4a211f01..2e6d6bbeb7842e5def216a589e670c813e5caf42 100644 (file)
@@ -1130,11 +1130,15 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
 
        if (ctlr->dma_tx)
                tx_dev = ctlr->dma_tx->device->dev;
+       else if (ctlr->dma_map_dev)
+               tx_dev = ctlr->dma_map_dev;
        else
                tx_dev = ctlr->dev.parent;
 
        if (ctlr->dma_rx)
                rx_dev = ctlr->dma_rx->device->dev;
+       else if (ctlr->dma_map_dev)
+               rx_dev = ctlr->dma_map_dev;
        else
                rx_dev = ctlr->dev.parent;
 
@@ -2406,7 +2410,8 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
                        } else {
                                struct acpi_device *adev;
 
-                               if (acpi_bus_get_device(parent_handle, &adev))
+                               adev = acpi_fetch_acpi_dev(parent_handle);
+                               if (!adev)
                                        return -ENODEV;
 
                                ctlr = acpi_spi_find_controller_by_adev(adev);
index d68611ef22f80f07d9f0a6dc18b4efcb927b6ac5..f056204c0fdb1ae7ae0b41784b0e8f7f3f2e64ec 100644 (file)
@@ -70,7 +70,7 @@ static int __nat25_add_pppoe_tag(struct sk_buff *skb, struct pppoe_tag *tag)
        struct pppoe_hdr *ph = (struct pppoe_hdr *)(skb->data + ETH_HLEN);
        int data_len;
 
-       data_len = tag->tag_len + TAG_HDR_LEN;
+       data_len = be16_to_cpu(tag->tag_len) + TAG_HDR_LEN;
        if (skb_tailroom(skb) < data_len)
                return -1;
 
index 6fe6a6bab3f465703a82c578ae8365f8d5931be0..ddf6c2a7212bc5993551b9a5bbd07964843e857c 100644 (file)
@@ -3596,10 +3596,7 @@ static int iscsit_send_reject(
 void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
 {
        int ord, cpu;
-       cpumask_t conn_allowed_cpumask;
-
-       cpumask_and(&conn_allowed_cpumask, iscsit_global->allowed_cpumask,
-                   cpu_online_mask);
+       cpumask_var_t conn_allowed_cpumask;
 
        /*
         * bitmap_id is assigned from iscsit_global->ts_bitmap from
@@ -3609,13 +3606,28 @@ void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
         * iSCSI connection's RX/TX threads will be scheduled to
         * execute upon.
         */
-       cpumask_clear(conn->conn_cpumask);
-       ord = conn->bitmap_id % cpumask_weight(&conn_allowed_cpumask);
-       for_each_cpu(cpu, &conn_allowed_cpumask) {
-               if (ord-- == 0) {
-                       cpumask_set_cpu(cpu, conn->conn_cpumask);
-                       return;
+       if (!zalloc_cpumask_var(&conn_allowed_cpumask, GFP_KERNEL)) {
+               ord = conn->bitmap_id % cpumask_weight(cpu_online_mask);
+               for_each_online_cpu(cpu) {
+                       if (ord-- == 0) {
+                               cpumask_set_cpu(cpu, conn->conn_cpumask);
+                               return;
+                       }
+               }
+       } else {
+               cpumask_and(conn_allowed_cpumask, iscsit_global->allowed_cpumask,
+                       cpu_online_mask);
+
+               cpumask_clear(conn->conn_cpumask);
+               ord = conn->bitmap_id % cpumask_weight(conn_allowed_cpumask);
+               for_each_cpu(cpu, conn_allowed_cpumask) {
+                       if (ord-- == 0) {
+                               cpumask_set_cpu(cpu, conn->conn_cpumask);
+                               free_cpumask_var(conn_allowed_cpumask);
+                               return;
+                       }
                }
+               free_cpumask_var(conn_allowed_cpumask);
        }
        /*
         * This should never be reached..
index 0cedcfe207b56b8b5bae41cdac9a876b2227dc78..57b4fd56d92aba1a7a2becb04a83aca60500fb68 100644 (file)
@@ -1137,23 +1137,27 @@ static ssize_t lio_target_wwn_cpus_allowed_list_show(
 static ssize_t lio_target_wwn_cpus_allowed_list_store(
                struct config_item *item, const char *page, size_t count)
 {
-       int ret;
+       int ret = -ENOMEM;
        char *orig;
-       cpumask_t new_allowed_cpumask;
+       cpumask_var_t new_allowed_cpumask;
+
+       if (!zalloc_cpumask_var(&new_allowed_cpumask, GFP_KERNEL))
+               goto out;
 
        orig = kstrdup(page, GFP_KERNEL);
        if (!orig)
-               return -ENOMEM;
+               goto out_free_cpumask;
 
-       cpumask_clear(&new_allowed_cpumask);
-       ret = cpulist_parse(orig, &new_allowed_cpumask);
+       ret = cpulist_parse(orig, new_allowed_cpumask);
+       if (!ret)
+               cpumask_copy(iscsit_global->allowed_cpumask,
+                            new_allowed_cpumask);
 
        kfree(orig);
-       if (ret != 0)
-               return ret;
-
-       cpumask_copy(iscsit_global->allowed_cpumask, &new_allowed_cpumask);
-       return count;
+out_free_cpumask:
+       free_cpumask_var(new_allowed_cpumask);
+out:
+       return ret ? ret : count;
 }
 
 CONFIGFS_ATTR(lio_target_wwn_, cpus_allowed_list);
index 44bb380e7390c7319cd8eaa3ab00c7aa57158685..25f33eb25337c685c6b19461c98c83702b8b6ee9 100644 (file)
@@ -829,28 +829,26 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 }
 
 /*
- * Check if the underlying struct block_device request_queue supports
- * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
- * in ATA and we need to set TPE=1
+ * Check if the underlying struct block_device supports discard and if yes
+ * configure the UNMAP parameters.
  */
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-                                      struct request_queue *q)
+                                      struct block_device *bdev)
 {
-       int block_size = queue_logical_block_size(q);
+       int block_size = bdev_logical_block_size(bdev);
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return false;
 
        attrib->max_unmap_lba_count =
-               q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
+               bdev_max_discard_sectors(bdev) >> (ilog2(block_size) - 9);
        /*
         * Currently hardcoded to 1 in Linux/SCSI code..
         */
        attrib->max_unmap_block_desc_count = 1;
-       attrib->unmap_granularity = q->limits.discard_granularity / block_size;
-       attrib->unmap_granularity_alignment = q->limits.discard_alignment /
-                                                               block_size;
-       attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors);
+       attrib->unmap_granularity = bdev_discard_granularity(bdev) / block_size;
+       attrib->unmap_granularity_alignment =
+               bdev_discard_alignment(bdev) / block_size;
        return true;
 }
 EXPORT_SYMBOL(target_configure_unmap_from_queue);
index 8190b840065f3e9137d6d872dbbdb8c051358e49..e68f1cc8ef98bd56098bdad6680576a4e6167b5b 100644 (file)
@@ -134,10 +134,10 @@ static int fd_configure_device(struct se_device *dev)
         */
        inode = file->f_mapping->host;
        if (S_ISBLK(inode->i_mode)) {
-               struct request_queue *q = bdev_get_queue(I_BDEV(inode));
+               struct block_device *bdev = I_BDEV(inode);
                unsigned long long dev_size;
 
-               fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode));
+               fd_dev->fd_block_size = bdev_logical_block_size(bdev);
                /*
                 * Determine the number of bytes from i_size_read() minus
                 * one (1) logical sector from underlying struct block_device
@@ -150,7 +150,7 @@ static int fd_configure_device(struct se_device *dev)
                        dev_size, div_u64(dev_size, fd_dev->fd_block_size),
                        fd_dev->fd_block_size);
 
-               if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+               if (target_configure_unmap_from_queue(&dev->dev_attrib, bdev))
                        pr_debug("IFILE: BLOCK Discard support available,"
                                 " disabled by default\n");
                /*
@@ -159,7 +159,7 @@ static int fd_configure_device(struct se_device *dev)
                 */
                dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-               if (blk_queue_nonrot(q))
+               if (bdev_nonrot(bdev))
                        dev->dev_attrib.is_nonrot = 1;
        } else {
                if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
@@ -558,7 +558,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
                ret = blkdev_issue_discard(bdev,
                                           target_to_linux_sector(dev, lba),
                                           target_to_linux_sector(dev,  nolb),
-                                          GFP_KERNEL, 0);
+                                          GFP_KERNEL);
                if (ret < 0) {
                        pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
                                ret);
index 87ede165ddba435ef8eeb3912e9368599e87b0c4..378c80313a0f27a4aef7ece5f6215bcb30c237e1 100644 (file)
@@ -119,7 +119,7 @@ static int iblock_configure_device(struct se_device *dev)
        dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
        dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
-       if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+       if (target_configure_unmap_from_queue(&dev->dev_attrib, bd))
                pr_debug("IBLOCK: BLOCK Discard support available,"
                         " disabled by default\n");
 
@@ -133,7 +133,7 @@ static int iblock_configure_device(struct se_device *dev)
        else
                dev->dev_attrib.max_write_same_len = 0xFFFF;
 
-       if (blk_queue_nonrot(q))
+       if (bdev_nonrot(bd))
                dev->dev_attrib.is_nonrot = 1;
 
        bi = bdev_get_integrity(bd);
@@ -434,7 +434,7 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
        ret = blkdev_issue_discard(bdev,
                                   target_to_linux_sector(dev, lba),
                                   target_to_linux_sector(dev,  nolb),
-                                  GFP_KERNEL, 0);
+                                  GFP_KERNEL);
        if (ret < 0) {
                pr_err("blkdev_issue_discard() failed: %d\n", ret);
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -727,17 +727,16 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
        if (data_direction == DMA_TO_DEVICE) {
                struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
-               struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd);
                /*
                 * Force writethrough using REQ_FUA if a volatile write cache
                 * is not enabled, or if initiator set the Force Unit Access bit.
                 */
                opf = REQ_OP_WRITE;
                miter_dir = SG_MITER_TO_SG;
-               if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) {
+               if (bdev_fua(ib_dev->ibd_bd)) {
                        if (cmd->se_cmd_flags & SCF_FUA)
                                opf |= REQ_FUA;
-                       else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+                       else if (!bdev_write_cache(ib_dev->ibd_bd))
                                opf |= REQ_FUA;
                }
        } else {
@@ -886,11 +885,7 @@ iblock_parse_cdb(struct se_cmd *cmd)
 
 static bool iblock_get_write_cache(struct se_device *dev)
 {
-       struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
-       struct block_device *bd = ib_dev->ibd_bd;
-       struct request_queue *q = bdev_get_queue(bd);
-
-       return test_bit(QUEUE_FLAG_WC, &q->queue_flags);
+       return bdev_write_cache(IBLOCK_DEV(dev)->ibd_bd);
 }
 
 static const struct target_backend_ops iblock_ops = {
index ff292b75e23f907f859b951f9c07944adb33b268..bb3fb18b2316d5a6236e7f8ae8035d41f26306fc 100644 (file)
@@ -588,7 +588,7 @@ static void pscsi_destroy_device(struct se_device *dev)
 }
 
 static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status,
-                              unsigned char *req_sense)
+                              unsigned char *req_sense, int valid_data)
 {
        struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
        struct scsi_device *sd = pdv->pdv_sd;
@@ -681,7 +681,7 @@ after_mode_select:
                 * back despite framework assumption that a
                 * check condition means there is no data
                 */
-               if (sd->type == TYPE_TAPE &&
+               if (sd->type == TYPE_TAPE && valid_data &&
                    cmd->data_direction == DMA_FROM_DEVICE) {
                        /*
                         * is sense data valid, fixed format,
@@ -818,24 +818,8 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b)
 
 static void pscsi_bi_endio(struct bio *bio)
 {
-       bio_put(bio);
-}
-
-static inline struct bio *pscsi_get_bio(int nr_vecs)
-{
-       struct bio *bio;
-       /*
-        * Use bio_malloc() following the comment in for bio -> struct request
-        * in block/blk-core.c:blk_make_request()
-        */
-       bio = bio_kmalloc(GFP_KERNEL, nr_vecs);
-       if (!bio) {
-               pr_err("PSCSI: bio_kmalloc() failed\n");
-               return NULL;
-       }
-       bio->bi_end_io = pscsi_bi_endio;
-
-       return bio;
+       bio_uninit(bio);
+       kfree(bio);
 }
 
 static sense_reason_t
@@ -878,15 +862,12 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                        if (!bio) {
 new_bio:
                                nr_vecs = bio_max_segs(nr_pages);
-                               /*
-                                * Calls bio_kmalloc() and sets bio->bi_end_io()
-                                */
-                               bio = pscsi_get_bio(nr_vecs);
+                               bio = bio_kmalloc(nr_vecs, GFP_KERNEL);
                                if (!bio)
                                        goto fail;
-
-                               if (rw)
-                                       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+                               bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs,
+                                        rw ? REQ_OP_WRITE : REQ_OP_READ);
+                               bio->bi_end_io = pscsi_bi_endio;
 
                                pr_debug("PSCSI: Allocated bio: %p,"
                                        " dir: %s nr_vecs: %d\n", bio,
@@ -912,11 +893,6 @@ new_bio:
                                        goto fail;
                                }
 
-                               /*
-                                * Clear the pointer so that another bio will
-                                * be allocated with pscsi_get_bio() above.
-                                */
-                               bio = NULL;
                                goto new_bio;
                        }
 
@@ -1032,6 +1008,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
        struct se_cmd *cmd = req->end_io_data;
        struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
        enum sam_status scsi_status = scmd->result & 0xff;
+       int valid_data = cmd->data_length - scmd->resid_len;
        u8 *cdb = cmd->priv;
 
        if (scsi_status != SAM_STAT_GOOD) {
@@ -1039,12 +1016,11 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
                        " 0x%02x Result: 0x%08x\n", cmd, cdb[0], scmd->result);
        }
 
-       pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer);
+       pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer, valid_data);
 
        switch (host_byte(scmd->result)) {
        case DID_OK:
-               target_complete_cmd_with_length(cmd, scsi_status,
-                       cmd->data_length - scmd->resid_len);
+               target_complete_cmd_with_length(cmd, scsi_status, valid_data);
                break;
        default:
                pr_debug("PSCSI Host Byte exception at cmd: %p CDB:"
index 95d4ca50a605b7a4bd27f7846ac513b47f306dcf..fd7267baa707838a3eed720728390f1534cb4db2 100644 (file)
@@ -1821,6 +1821,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
        mutex_lock(&udev->cmdr_lock);
        page = xa_load(&udev->data_pages, dpi);
        if (likely(page)) {
+               get_page(page);
                mutex_unlock(&udev->cmdr_lock);
                return page;
        }
@@ -1877,6 +1878,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
                /* For the vmalloc()ed cmd area pages */
                addr = (void *)(unsigned long)info->mem[mi].addr + offset;
                page = vmalloc_to_page(addr);
+               get_page(page);
        } else {
                uint32_t dpi;
 
@@ -1887,7 +1889,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
                        return VM_FAULT_SIGBUS;
        }
 
-       get_page(page);
        vmf->page = page;
        return 0;
 }
index a5eb4ef46971b0d990f961968ab67b8dc23700d0..c9b3b2cfb2b2a8a533699676ad1ad80e58eb4524 100644 (file)
@@ -865,6 +865,7 @@ err_rhashtable_free:
        rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL);
        optee_supp_uninit(&optee->supp);
        mutex_destroy(&optee->call_queue.mutex);
+       mutex_destroy(&optee->ffa.mutex);
 err_unreg_supp_teedev:
        tee_device_unregister(optee->supp_teedev);
 err_unreg_teedev:
index e37691e0bf206605f8fa63d7ceca24a8b5f2cb0f..0e5cc948373c4bcd14d8f540a33f8fdc93a499b9 100644 (file)
@@ -113,8 +113,10 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
        bool "user_space"
        select THERMAL_GOV_USER_SPACE
        help
-         Select this if you want to let the user space manage the
-         platform thermals.
+         The Userspace governor allows to get trip point crossed
+         notification from the kernel via uevents. It is recommended
+         to use the netlink interface instead which gives richer
+         information about the thermal framework events.
 
 config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
        bool "power_allocator"
index 64a18e354a20403c41a987bac14d569ba2bd2099..a62a4e90bd3f5b8e2ce57073d4b1aa2df9892aba 100644 (file)
@@ -17,8 +17,7 @@
 
 static int user_space_bind(struct thermal_zone_device *tz)
 {
-       pr_warn_once("Userspace governor deprecated: use thermal netlink " \
-                    "notification instead\n");
+       pr_info_once("Consider using thermal netlink events interface\n");
 
        return 0;
 }
index 4954800b9850256da3eb6c1544594f2aa2d919d5..79931ddc582a5369a9ade4485985034a5bbc02e9 100644 (file)
@@ -68,7 +68,7 @@ static int evaluate_odvp(struct int3400_thermal_priv *priv);
 struct odvp_attr {
        int odvp;
        struct int3400_thermal_priv *priv;
-       struct kobj_attribute attr;
+       struct device_attribute attr;
 };
 
 static ssize_t data_vault_read(struct file *file, struct kobject *kobj,
@@ -194,12 +194,31 @@ static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enab
        return result;
 }
 
+static int set_os_uuid_mask(struct int3400_thermal_priv *priv, u32 mask)
+{
+       int cap = 0;
+
+       /*
+        * Capability bits:
+        * Bit 0: set to 1 to indicate DPTF is active
+        * Bi1 1: set to 1 to active cooling is supported by user space daemon
+        * Bit 2: set to 1 to passive cooling is supported by user space daemon
+        * Bit 3: set to 1 to critical trip is handled by user space daemon
+        */
+       if (mask)
+               cap = (priv->os_uuid_mask << 1) | 0x01;
+
+       return int3400_thermal_run_osc(priv->adev->handle,
+                                      "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+                                      &cap);
+}
+
 static ssize_t current_uuid_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
 {
        struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
-       int i;
+       int ret, i;
 
        for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
                if (!strncmp(buf, int3400_thermal_uuids[i],
@@ -231,19 +250,7 @@ static ssize_t current_uuid_store(struct device *dev,
        }
 
        if (priv->os_uuid_mask) {
-               int cap, ret;
-
-               /*
-                * Capability bits:
-                * Bit 0: set to 1 to indicate DPTF is active
-                * Bi1 1: set to 1 to active cooling is supported by user space daemon
-                * Bit 2: set to 1 to passive cooling is supported by user space daemon
-                * Bit 3: set to 1 to critical trip is handled by user space daemon
-                */
-               cap = ((priv->os_uuid_mask << 1) | 0x01);
-               ret = int3400_thermal_run_osc(priv->adev->handle,
-                                             "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
-                                             &cap);
+               ret = set_os_uuid_mask(priv, priv->os_uuid_mask);
                if (ret)
                        return ret;
        }
@@ -311,7 +318,7 @@ end:
        return result;
 }
 
-static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
+static ssize_t odvp_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
        struct odvp_attr *odvp_attr;
@@ -469,17 +476,26 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
        if (mode != thermal->mode) {
                int enabled;
 
+               enabled = mode == THERMAL_DEVICE_ENABLED;
+
+               if (priv->os_uuid_mask) {
+                       if (!enabled) {
+                               priv->os_uuid_mask = 0;
+                               result = set_os_uuid_mask(priv, priv->os_uuid_mask);
+                       }
+                       goto eval_odvp;
+               }
+
                if (priv->current_uuid_index < 0 ||
                    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
                        return -EINVAL;
 
-               enabled = (mode == THERMAL_DEVICE_ENABLED);
                result = int3400_thermal_run_osc(priv->adev->handle,
                                                 int3400_thermal_uuids[priv->current_uuid_index],
                                                 &enabled);
        }
 
-
+eval_odvp:
        evaluate_odvp(priv);
 
        return result;
index f154bada2906445d145c0a0c139cb95d83a0cdac..1c4aac8464a709d3fb01f667daab973a7cace8b2 100644 (file)
@@ -610,9 +610,6 @@ cur_state_store(struct device *dev, struct device_attribute *attr,
        unsigned long state;
        int result;
 
-       dev_warn_once(&cdev->device,
-                     "Setting cooling device state is deprecated\n");
-       
        if (sscanf(buf, "%ld\n", &state) != 1)
                return -EINVAL;
 
index fa92f727fdf895fafd556f41ca1460039181ce81..fd8b86dde525507ee09d0961492f786e5c291a4a 100644 (file)
@@ -73,6 +73,8 @@ module_param(debug, int, 0600);
  */
 #define MAX_MRU 1500
 #define MAX_MTU 1500
+/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */
+#define PROT_OVERHEAD 7
 #define        GSM_NET_TX_TIMEOUT (HZ*10)
 
 /*
@@ -135,6 +137,7 @@ struct gsm_dlci {
        int retries;
        /* Uplink tty if active */
        struct tty_port port;   /* The tty bound to this DLCI if there is one */
+#define TX_SIZE                4096    /* Must be power of 2. */
        struct kfifo fifo;      /* Queue fifo for the DLCI */
        int adaption;           /* Adaption layer in use */
        int prev_adaption;
@@ -219,7 +222,6 @@ struct gsm_mux {
        int encoding;
        u8 control;
        u8 fcs;
-       u8 received_fcs;
        u8 *txframe;                    /* TX framing buffer */
 
        /* Method for the receiver side */
@@ -231,6 +233,7 @@ struct gsm_mux {
        int initiator;                  /* Did we initiate connection */
        bool dead;                      /* Has the mux been shut down */
        struct gsm_dlci *dlci[NUM_DLCI];
+       int old_c_iflag;                /* termios c_iflag value before attach */
        bool constipated;               /* Asked by remote to shut up */
 
        spinlock_t tx_lock;
@@ -271,10 +274,6 @@ static DEFINE_SPINLOCK(gsm_mux_lock);
 
 static struct tty_driver *gsm_tty_driver;
 
-/* Save dlci open address */
-static int addr_open[256] = { 0 };
-/* Save dlci open count */
-static int addr_cnt;
 /*
  *     This section of the driver logic implements the GSM encodings
  *     both the basic and the 'advanced'. Reliable transport is not
@@ -369,6 +368,7 @@ static const u8 gsm_fcs8[256] = {
 #define GOOD_FCS       0xCF
 
 static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len);
+static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk);
 
 /**
  *     gsm_fcs_add     -       update FCS
@@ -832,7 +832,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci)
                        break;
                case 2: /* Unstructed with modem bits.
                Always one byte as we never send inline break data */
-                       *dp++ = gsm_encode_modem(dlci);
+                       *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
                        break;
                }
                WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len);
@@ -916,6 +916,66 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
        return size;
 }
 
+/**
+ *     gsm_dlci_modem_output   -       try and push modem status out of a DLCI
+ *     @gsm: mux
+ *     @dlci: the DLCI to pull modem status from
+ *     @brk: break signal
+ *
+ *     Push an empty frame in to the transmit queue to update the modem status
+ *     bits and to transmit an optional break.
+ *
+ *     Caller must hold the tx_lock of the mux.
+ */
+
+static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci,
+                                u8 brk)
+{
+       u8 *dp = NULL;
+       struct gsm_msg *msg;
+       int size = 0;
+
+       /* for modem bits without break data */
+       switch (dlci->adaption) {
+       case 1: /* Unstructured */
+               break;
+       case 2: /* Unstructured with modem bits. */
+               size++;
+               if (brk > 0)
+                       size++;
+               break;
+       default:
+               pr_err("%s: unsupported adaption %d\n", __func__,
+                      dlci->adaption);
+               return -EINVAL;
+       }
+
+       msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+       if (!msg) {
+               pr_err("%s: gsm_data_alloc error", __func__);
+               return -ENOMEM;
+       }
+       dp = msg->data;
+       switch (dlci->adaption) {
+       case 1: /* Unstructured */
+               break;
+       case 2: /* Unstructured with modem bits. */
+               if (brk == 0) {
+                       *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
+               } else {
+                       *dp++ = gsm_encode_modem(dlci) << 1;
+                       *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */
+               }
+               break;
+       default:
+               /* Handled above */
+               break;
+       }
+
+       __gsm_data_queue(dlci, msg);
+       return size;
+}
+
 /**
  *     gsm_dlci_data_sweep             -       look for data to send
  *     @gsm: the GSM mux
@@ -1093,7 +1153,6 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
 {
        unsigned int addr = 0;
        unsigned int modem = 0;
-       unsigned int brk = 0;
        struct gsm_dlci *dlci;
        int len = clen;
        int slen;
@@ -1123,17 +1182,8 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
                        return;
        }
        len--;
-       if (len > 0) {
-               while (gsm_read_ea(&brk, *dp++) == 0) {
-                       len--;
-                       if (len == 0)
-                               return;
-               }
-               modem <<= 7;
-               modem |= (brk & 0x7f);
-       }
        tty = tty_port_tty_get(&dlci->port);
-       gsm_process_modem(tty, dlci, modem, slen);
+       gsm_process_modem(tty, dlci, modem, slen - len);
        if (tty) {
                tty_wakeup(tty);
                tty_kref_put(tty);
@@ -1193,7 +1243,6 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen)
 }
 
 static void gsm_dlci_begin_close(struct gsm_dlci *dlci);
-static void gsm_dlci_close(struct gsm_dlci *dlci);
 
 /**
  *     gsm_control_message     -       DLCI 0 control processing
@@ -1212,28 +1261,15 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
 {
        u8 buf[1];
        unsigned long flags;
-       struct gsm_dlci *dlci;
-       int i;
-       int address;
 
        switch (command) {
        case CMD_CLD: {
-               if (addr_cnt > 0) {
-                       for (i = 0; i < addr_cnt; i++) {
-                               address = addr_open[i];
-                               dlci = gsm->dlci[address];
-                               gsm_dlci_close(dlci);
-                               addr_open[i] = 0;
-                       }
-               }
+               struct gsm_dlci *dlci = gsm->dlci[0];
                /* Modem wishes to close down */
-               dlci = gsm->dlci[0];
                if (dlci) {
                        dlci->dead = true;
                        gsm->dead = true;
-                       gsm_dlci_close(dlci);
-                       addr_cnt = 0;
-                       gsm_response(gsm, 0, UA|PF);
+                       gsm_dlci_begin_close(dlci);
                }
                }
                break;
@@ -1326,11 +1362,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
 
 static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl)
 {
-       struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype);
+       struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype);
        if (msg == NULL)
                return;
-       msg->data[0] = (ctrl->cmd << 1) | 2 | EA;       /* command */
-       memcpy(msg->data + 1, ctrl->data, ctrl->len);
+       msg->data[0] = (ctrl->cmd << 1) | CR | EA;      /* command */
+       msg->data[1] = (ctrl->len << 1) | EA;
+       memcpy(msg->data + 2, ctrl->data, ctrl->len);
        gsm_data_queue(gsm->dlci[0], msg);
 }
 
@@ -1353,7 +1390,6 @@ static void gsm_control_retransmit(struct timer_list *t)
        spin_lock_irqsave(&gsm->control_lock, flags);
        ctrl = gsm->pending_cmd;
        if (ctrl) {
-               gsm->cretries--;
                if (gsm->cretries == 0) {
                        gsm->pending_cmd = NULL;
                        ctrl->error = -ETIMEDOUT;
@@ -1362,6 +1398,7 @@ static void gsm_control_retransmit(struct timer_list *t)
                        wake_up(&gsm->event);
                        return;
                }
+               gsm->cretries--;
                gsm_control_transmit(gsm, ctrl);
                mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
        }
@@ -1402,7 +1439,7 @@ retry:
 
        /* If DLCI0 is in ADM mode skip retries, it won't respond */
        if (gsm->dlci[0]->mode == DLCI_MODE_ADM)
-               gsm->cretries = 1;
+               gsm->cretries = 0;
        else
                gsm->cretries = gsm->n2;
 
@@ -1450,20 +1487,22 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control)
 
 static void gsm_dlci_close(struct gsm_dlci *dlci)
 {
+       unsigned long flags;
+
        del_timer(&dlci->t1);
        if (debug & 8)
                pr_debug("DLCI %d goes closed.\n", dlci->addr);
        dlci->state = DLCI_CLOSED;
        if (dlci->addr != 0) {
                tty_port_tty_hangup(&dlci->port, false);
+               spin_lock_irqsave(&dlci->lock, flags);
                kfifo_reset(&dlci->fifo);
+               spin_unlock_irqrestore(&dlci->lock, flags);
                /* Ensure that gsmtty_open() can return. */
                tty_port_set_initialized(&dlci->port, 0);
                wake_up_interruptible(&dlci->port.open_wait);
        } else
                dlci->gsm->dead = true;
-       /* Unregister gsmtty driver,report gsmtty dev remove uevent for user */
-       tty_unregister_device(gsm_tty_driver, dlci->addr);
        wake_up(&dlci->gsm->event);
        /* A DLCI 0 close is a MUX termination so we need to kick that
           back to userspace somehow */
@@ -1485,8 +1524,9 @@ static void gsm_dlci_open(struct gsm_dlci *dlci)
        dlci->state = DLCI_OPEN;
        if (debug & 8)
                pr_debug("DLCI %d goes open.\n", dlci->addr);
-       /* Register gsmtty driver,report gsmtty dev add uevent for user */
-       tty_register_device(gsm_tty_driver, dlci->addr, NULL);
+       /* Send current modem state */
+       if (dlci->addr)
+               gsm_modem_update(dlci, 0);
        wake_up(&dlci->gsm->event);
 }
 
@@ -1619,10 +1659,12 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
                        if (len == 0)
                                return;
                }
+               len--;
                slen++;
                tty = tty_port_tty_get(port);
                if (tty) {
                        gsm_process_modem(tty, dlci, modem, slen);
+                       tty_wakeup(tty);
                        tty_kref_put(tty);
                }
                fallthrough;
@@ -1690,7 +1732,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
                return NULL;
        spin_lock_init(&dlci->lock);
        mutex_init(&dlci->mutex);
-       if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) {
+       if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) {
                kfree(dlci);
                return NULL;
        }
@@ -1793,19 +1835,7 @@ static void gsm_queue(struct gsm_mux *gsm)
        struct gsm_dlci *dlci;
        u8 cr;
        int address;
-       int i, j, k, address_tmp;
-       /* We have to sneak a look at the packet body to do the FCS.
-          A somewhat layering violation in the spec */
 
-       if ((gsm->control & ~PF) == UI)
-               gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len);
-       if (gsm->encoding == 0) {
-               /* WARNING: gsm->received_fcs is used for
-               gsm->encoding = 0 only.
-               In this case it contain the last piece of data
-               required to generate final CRC */
-               gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs);
-       }
        if (gsm->fcs != GOOD_FCS) {
                gsm->bad_fcs++;
                if (debug & 4)
@@ -1836,11 +1866,6 @@ static void gsm_queue(struct gsm_mux *gsm)
                else {
                        gsm_response(gsm, address, UA|PF);
                        gsm_dlci_open(dlci);
-                       /* Save dlci open address */
-                       if (address) {
-                               addr_open[addr_cnt] = address;
-                               addr_cnt++;
-                       }
                }
                break;
        case DISC|PF:
@@ -1851,35 +1876,9 @@ static void gsm_queue(struct gsm_mux *gsm)
                        return;
                }
                /* Real close complete */
-               if (!address) {
-                       if (addr_cnt > 0) {
-                               for (i = 0; i < addr_cnt; i++) {
-                                       address = addr_open[i];
-                                       dlci = gsm->dlci[address];
-                                       gsm_dlci_close(dlci);
-                                       addr_open[i] = 0;
-                               }
-                       }
-                       dlci = gsm->dlci[0];
-                       gsm_dlci_close(dlci);
-                       addr_cnt = 0;
-                       gsm_response(gsm, 0, UA|PF);
-               } else {
-                       gsm_response(gsm, address, UA|PF);
-                       gsm_dlci_close(dlci);
-                       /* clear dlci address */
-                       for (j = 0; j < addr_cnt; j++) {
-                               address_tmp = addr_open[j];
-                               if (address_tmp == address) {
-                                       for (k = j; k < addr_cnt; k++)
-                                               addr_open[k] = addr_open[k+1];
-                                       addr_cnt--;
-                                       break;
-                               }
-                       }
-               }
+               gsm_response(gsm, address, UA|PF);
+               gsm_dlci_close(dlci);
                break;
-       case UA:
        case UA|PF:
                if (cr == 0 || dlci == NULL)
                        break;
@@ -1993,19 +1992,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
                break;
        case GSM_DATA:          /* Data */
                gsm->buf[gsm->count++] = c;
-               if (gsm->count == gsm->len)
+               if (gsm->count == gsm->len) {
+                       /* Calculate final FCS for UI frames over all data */
+                       if ((gsm->control & ~PF) != UIH) {
+                               gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
+                                                            gsm->count);
+                       }
                        gsm->state = GSM_FCS;
+               }
                break;
        case GSM_FCS:           /* FCS follows the packet */
-               gsm->received_fcs = c;
-               gsm_queue(gsm);
+               gsm->fcs = gsm_fcs_add(gsm->fcs, c);
                gsm->state = GSM_SSOF;
                break;
        case GSM_SSOF:
-               if (c == GSM0_SOF) {
-                       gsm->state = GSM_SEARCH;
-                       break;
-               }
+               gsm->state = GSM_SEARCH;
+               if (c == GSM0_SOF)
+                       gsm_queue(gsm);
+               else
+                       gsm->bad_size++;
                break;
        default:
                pr_debug("%s: unhandled state: %d\n", __func__, gsm->state);
@@ -2023,12 +2028,35 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
 
 static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
 {
+       /* handle XON/XOFF */
+       if ((c & ISO_IEC_646_MASK) == XON) {
+               gsm->constipated = true;
+               return;
+       } else if ((c & ISO_IEC_646_MASK) == XOFF) {
+               gsm->constipated = false;
+               /* Kick the link in case it is idling */
+               gsm_data_kick(gsm, NULL);
+               return;
+       }
        if (c == GSM1_SOF) {
-               /* EOF is only valid in frame if we have got to the data state
-                  and received at least one byte (the FCS) */
-               if (gsm->state == GSM_DATA && gsm->count) {
-                       /* Extract the FCS */
+               /* EOF is only valid in frame if we have got to the data state */
+               if (gsm->state == GSM_DATA) {
+                       if (gsm->count < 1) {
+                               /* Missing FSC */
+                               gsm->malformed++;
+                               gsm->state = GSM_START;
+                               return;
+                       }
+                       /* Remove the FCS from data */
                        gsm->count--;
+                       if ((gsm->control & ~PF) != UIH) {
+                               /* Calculate final FCS for UI frames over all
+                                * data but FCS
+                                */
+                               gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
+                                                            gsm->count);
+                       }
+                       /* Add the FCS itself to test against GOOD_FCS */
                        gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]);
                        gsm->len = gsm->count;
                        gsm_queue(gsm);
@@ -2037,7 +2065,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
                }
                /* Any partial frame was a runt so go back to start */
                if (gsm->state != GSM_START) {
-                       gsm->malformed++;
+                       if (gsm->state != GSM_SEARCH)
+                               gsm->malformed++;
                        gsm->state = GSM_START;
                }
                /* A SOF in GSM_START means we are still reading idling or
@@ -2106,74 +2135,43 @@ static void gsm_error(struct gsm_mux *gsm)
        gsm->io_error++;
 }
 
-static int gsm_disconnect(struct gsm_mux *gsm)
-{
-       struct gsm_dlci *dlci = gsm->dlci[0];
-       struct gsm_control *gc;
-
-       if (!dlci)
-               return 0;
-
-       /* In theory disconnecting DLCI 0 is sufficient but for some
-          modems this is apparently not the case. */
-       gc = gsm_control_send(gsm, CMD_CLD, NULL, 0);
-       if (gc)
-               gsm_control_wait(gsm, gc);
-
-       del_timer_sync(&gsm->t2_timer);
-       /* Now we are sure T2 has stopped */
-
-       gsm_dlci_begin_close(dlci);
-       wait_event_interruptible(gsm->event,
-                               dlci->state == DLCI_CLOSED);
-
-       if (signal_pending(current))
-               return -EINTR;
-
-       return 0;
-}
-
 /**
  *     gsm_cleanup_mux         -       generic GSM protocol cleanup
  *     @gsm: our mux
+ *     @disc: disconnect link?
  *
  *     Clean up the bits of the mux which are the same for all framing
  *     protocols. Remove the mux from the mux table, stop all the timers
  *     and then shut down each device hanging up the channels as we go.
  */
 
-static void gsm_cleanup_mux(struct gsm_mux *gsm)
+static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
 {
        int i;
        struct gsm_dlci *dlci = gsm->dlci[0];
        struct gsm_msg *txq, *ntxq;
 
        gsm->dead = true;
+       mutex_lock(&gsm->mutex);
 
-       spin_lock(&gsm_mux_lock);
-       for (i = 0; i < MAX_MUX; i++) {
-               if (gsm_mux[i] == gsm) {
-                       gsm_mux[i] = NULL;
-                       break;
+       if (dlci) {
+               if (disc && dlci->state != DLCI_CLOSED) {
+                       gsm_dlci_begin_close(dlci);
+                       wait_event(gsm->event, dlci->state == DLCI_CLOSED);
                }
+               dlci->dead = true;
        }
-       spin_unlock(&gsm_mux_lock);
-       /* open failed before registering => nothing to do */
-       if (i == MAX_MUX)
-               return;
 
+       /* Finish outstanding timers, making sure they are done */
        del_timer_sync(&gsm->t2_timer);
-       /* Now we are sure T2 has stopped */
-       if (dlci)
-               dlci->dead = true;
 
-       /* Free up any link layer users */
-       mutex_lock(&gsm->mutex);
-       for (i = 0; i < NUM_DLCI; i++)
+       /* Free up any link layer users and finally the control channel */
+       for (i = NUM_DLCI - 1; i >= 0; i--)
                if (gsm->dlci[i])
                        gsm_dlci_release(gsm->dlci[i]);
        mutex_unlock(&gsm->mutex);
        /* Now wipe the queues */
+       tty_ldisc_flush(gsm->tty);
        list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list)
                kfree(txq);
        INIT_LIST_HEAD(&gsm->tx_list);
@@ -2191,7 +2189,6 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm)
 static int gsm_activate_mux(struct gsm_mux *gsm)
 {
        struct gsm_dlci *dlci;
-       int i = 0;
 
        timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0);
        init_waitqueue_head(&gsm->event);
@@ -2203,18 +2200,6 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
        else
                gsm->receive = gsm1_receive;
 
-       spin_lock(&gsm_mux_lock);
-       for (i = 0; i < MAX_MUX; i++) {
-               if (gsm_mux[i] == NULL) {
-                       gsm->num = i;
-                       gsm_mux[i] = gsm;
-                       break;
-               }
-       }
-       spin_unlock(&gsm_mux_lock);
-       if (i == MAX_MUX)
-               return -EBUSY;
-
        dlci = gsm_dlci_alloc(gsm, 0);
        if (dlci == NULL)
                return -ENOMEM;
@@ -2230,6 +2215,15 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
  */
 static void gsm_free_mux(struct gsm_mux *gsm)
 {
+       int i;
+
+       for (i = 0; i < MAX_MUX; i++) {
+               if (gsm == gsm_mux[i]) {
+                       gsm_mux[i] = NULL;
+                       break;
+               }
+       }
+       mutex_destroy(&gsm->mutex);
        kfree(gsm->txframe);
        kfree(gsm->buf);
        kfree(gsm);
@@ -2249,12 +2243,20 @@ static void gsm_free_muxr(struct kref *ref)
 
 static inline void mux_get(struct gsm_mux *gsm)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&gsm_mux_lock, flags);
        kref_get(&gsm->ref);
+       spin_unlock_irqrestore(&gsm_mux_lock, flags);
 }
 
 static inline void mux_put(struct gsm_mux *gsm)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&gsm_mux_lock, flags);
        kref_put(&gsm->ref, gsm_free_muxr);
+       spin_unlock_irqrestore(&gsm_mux_lock, flags);
 }
 
 static inline unsigned int mux_num_to_base(struct gsm_mux *gsm)
@@ -2275,6 +2277,7 @@ static inline unsigned int mux_line_to_num(unsigned int line)
 
 static struct gsm_mux *gsm_alloc_mux(void)
 {
+       int i;
        struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL);
        if (gsm == NULL)
                return NULL;
@@ -2283,7 +2286,7 @@ static struct gsm_mux *gsm_alloc_mux(void)
                kfree(gsm);
                return NULL;
        }
-       gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL);
+       gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL);
        if (gsm->txframe == NULL) {
                kfree(gsm->buf);
                kfree(gsm);
@@ -2304,6 +2307,26 @@ static struct gsm_mux *gsm_alloc_mux(void)
        gsm->mtu = 64;
        gsm->dead = true;       /* Avoid early tty opens */
 
+       /* Store the instance to the mux array or abort if no space is
+        * available.
+        */
+       spin_lock(&gsm_mux_lock);
+       for (i = 0; i < MAX_MUX; i++) {
+               if (!gsm_mux[i]) {
+                       gsm_mux[i] = gsm;
+                       gsm->num = i;
+                       break;
+               }
+       }
+       spin_unlock(&gsm_mux_lock);
+       if (i == MAX_MUX) {
+               mutex_destroy(&gsm->mutex);
+               kfree(gsm->txframe);
+               kfree(gsm->buf);
+               kfree(gsm);
+               return NULL;
+       }
+
        return gsm;
 }
 
@@ -2330,6 +2353,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
 
 static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 {
+       int ret = 0;
        int need_close = 0;
        int need_restart = 0;
 
@@ -2339,7 +2363,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
        /* Check the MRU/MTU range looks sane */
        if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8)
                return -EINVAL;
-       if (c->n2 < 3)
+       if (c->n2 > 255)
                return -EINVAL;
        if (c->encapsulation > 1)       /* Basic, advanced, no I */
                return -EINVAL;
@@ -2370,19 +2394,11 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 
        /*
         * Close down what is needed, restart and initiate the new
-        * configuration
+        * configuration. On the first time there is no DLCI[0]
+        * and closing or cleaning up is not necessary.
         */
-
-       if (gsm->initiator && (need_close || need_restart)) {
-               int ret;
-
-               ret = gsm_disconnect(gsm);
-
-               if (ret)
-                       return ret;
-       }
-       if (need_restart)
-               gsm_cleanup_mux(gsm);
+       if (need_close || need_restart)
+               gsm_cleanup_mux(gsm, true);
 
        gsm->initiator = c->initiator;
        gsm->mru = c->mru;
@@ -2405,10 +2421,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
         * FIXME: We need to separate activation/deactivation from adding
         * and removing from the mux array
         */
-       if (need_restart)
-               gsm_activate_mux(gsm);
-       if (gsm->initiator && need_close)
-               gsm_dlci_begin_open(gsm->dlci[0]);
+       if (gsm->dead) {
+               ret = gsm_activate_mux(gsm);
+               if (ret)
+                       return ret;
+               if (gsm->initiator)
+                       gsm_dlci_begin_open(gsm->dlci[0]);
+       }
        return 0;
 }
 
@@ -2450,25 +2469,26 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
        int ret, i;
 
        gsm->tty = tty_kref_get(tty);
+       /* Turn off tty XON/XOFF handling to handle it explicitly. */
+       gsm->old_c_iflag = tty->termios.c_iflag;
+       tty->termios.c_iflag &= (IXON | IXOFF);
        ret =  gsm_activate_mux(gsm);
        if (ret != 0)
                tty_kref_put(gsm->tty);
        else {
                /* Don't register device 0 - this is the control channel and not
                   a usable tty interface */
-               if (gsm->initiator) {
-                       base = mux_num_to_base(gsm); /* Base for this MUX */
-                       for (i = 1; i < NUM_DLCI; i++) {
-                               struct device *dev;
+               base = mux_num_to_base(gsm); /* Base for this MUX */
+               for (i = 1; i < NUM_DLCI; i++) {
+                       struct device *dev;
 
-                               dev = tty_register_device(gsm_tty_driver,
+                       dev = tty_register_device(gsm_tty_driver,
                                                        base + i, NULL);
-                               if (IS_ERR(dev)) {
-                                       for (i--; i >= 1; i--)
-                                               tty_unregister_device(gsm_tty_driver,
-                                                                       base + i);
-                                       return PTR_ERR(dev);
-                               }
+                       if (IS_ERR(dev)) {
+                               for (i--; i >= 1; i--)
+                                       tty_unregister_device(gsm_tty_driver,
+                                                               base + i);
+                               return PTR_ERR(dev);
                        }
                }
        }
@@ -2490,11 +2510,10 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
        int i;
 
        WARN_ON(tty != gsm->tty);
-       if (gsm->initiator) {
-               for (i = 1; i < NUM_DLCI; i++)
-                       tty_unregister_device(gsm_tty_driver, base + i);
-       }
-       gsm_cleanup_mux(gsm);
+       for (i = 1; i < NUM_DLCI; i++)
+               tty_unregister_device(gsm_tty_driver, base + i);
+       /* Restore tty XON/XOFF handling. */
+       gsm->tty->termios.c_iflag = gsm->old_c_iflag;
        tty_kref_put(gsm->tty);
        gsm->tty = NULL;
 }
@@ -2559,6 +2578,12 @@ static void gsmld_close(struct tty_struct *tty)
 {
        struct gsm_mux *gsm = tty->disc_data;
 
+       /* The ldisc locks and closes the port before calling our close. This
+        * means we have no way to do a proper disconnect. We will not bother
+        * to do one.
+        */
+       gsm_cleanup_mux(gsm, false);
+
        gsmld_detach_gsm(tty, gsm);
 
        gsmld_flush_buffer(tty);
@@ -2597,7 +2622,7 @@ static int gsmld_open(struct tty_struct *tty)
 
        ret = gsmld_attach_gsm(tty, gsm);
        if (ret != 0) {
-               gsm_cleanup_mux(gsm);
+               gsm_cleanup_mux(gsm, false);
                mux_put(gsm);
        }
        return ret;
@@ -2952,28 +2977,78 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
  *     Virtual tty side
  */
 
-#define TX_SIZE                512
+/**
+ *     gsm_modem_upd_via_data  -       send modem bits via convergence layer
+ *     @dlci: channel
+ *     @brk: break signal
+ *
+ *     Send an empty frame to signal mobile state changes and to transmit the
+ *     break signal for adaption 2.
+ */
+
+static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk)
+{
+       struct gsm_mux *gsm = dlci->gsm;
+       unsigned long flags;
+
+       if (dlci->state != DLCI_OPEN || dlci->adaption != 2)
+               return;
+
+       spin_lock_irqsave(&gsm->tx_lock, flags);
+       gsm_dlci_modem_output(gsm, dlci, brk);
+       spin_unlock_irqrestore(&gsm->tx_lock, flags);
+}
+
+/**
+ *     gsm_modem_upd_via_msc   -       send modem bits via control frame
+ *     @dlci: channel
+ *     @brk: break signal
+ */
 
-static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk)
+static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk)
 {
-       u8 modembits[5];
+       u8 modembits[3];
        struct gsm_control *ctrl;
        int len = 2;
 
-       if (brk)
-               len++;
+       if (dlci->gsm->encoding != 0)
+               return 0;
 
-       modembits[0] = len << 1 | EA;           /* Data bytes */
-       modembits[1] = dlci->addr << 2 | 3;     /* DLCI, EA, 1 */
-       modembits[2] = gsm_encode_modem(dlci) << 1 | EA;
-       if (brk)
-               modembits[3] = brk << 4 | 2 | EA;       /* Valid, EA */
-       ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1);
+       modembits[0] = (dlci->addr << 2) | 2 | EA;  /* DLCI, Valid, EA */
+       if (!brk) {
+               modembits[1] = (gsm_encode_modem(dlci) << 1) | EA;
+       } else {
+               modembits[1] = gsm_encode_modem(dlci) << 1;
+               modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */
+               len++;
+       }
+       ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len);
        if (ctrl == NULL)
                return -ENOMEM;
        return gsm_control_wait(dlci->gsm, ctrl);
 }
 
+/**
+ *     gsm_modem_update        -       send modem status line state
+ *     @dlci: channel
+ *     @brk: break signal
+ */
+
+static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk)
+{
+       if (dlci->adaption == 2) {
+               /* Send convergence layer type 2 empty data frame. */
+               gsm_modem_upd_via_data(dlci, brk);
+               return 0;
+       } else if (dlci->gsm->encoding == 0) {
+               /* Send as MSC control message. */
+               return gsm_modem_upd_via_msc(dlci, brk);
+       }
+
+       /* Modem status lines are not supported. */
+       return -EPROTONOSUPPORT;
+}
+
 static int gsm_carrier_raised(struct tty_port *port)
 {
        struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
@@ -3006,7 +3081,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff)
                modem_tx &= ~(TIOCM_DTR | TIOCM_RTS);
        if (modem_tx != dlci->modem_tx) {
                dlci->modem_tx = modem_tx;
-               gsmtty_modem_update(dlci, 0);
+               gsm_modem_update(dlci, 0);
        }
 }
 
@@ -3141,7 +3216,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty)
        struct gsm_dlci *dlci = tty->driver_data;
        if (dlci->state == DLCI_CLOSED)
                return 0;
-       return TX_SIZE - kfifo_len(&dlci->fifo);
+       return kfifo_avail(&dlci->fifo);
 }
 
 static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
@@ -3155,13 +3230,17 @@ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
 static void gsmtty_flush_buffer(struct tty_struct *tty)
 {
        struct gsm_dlci *dlci = tty->driver_data;
+       unsigned long flags;
+
        if (dlci->state == DLCI_CLOSED)
                return;
        /* Caution needed: If we implement reliable transport classes
           then the data being transmitted can't simply be junked once
           it has first hit the stack. Until then we can just blow it
           away */
+       spin_lock_irqsave(&dlci->lock, flags);
        kfifo_reset(&dlci->fifo);
+       spin_unlock_irqrestore(&dlci->lock, flags);
        /* Need to unhook this DLCI from the transmit queue logic */
 }
 
@@ -3193,7 +3272,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty,
 
        if (modem_tx != dlci->modem_tx) {
                dlci->modem_tx = modem_tx;
-               return gsmtty_modem_update(dlci, 0);
+               return gsm_modem_update(dlci, 0);
        }
        return 0;
 }
@@ -3254,7 +3333,7 @@ static void gsmtty_throttle(struct tty_struct *tty)
                dlci->modem_tx &= ~TIOCM_RTS;
        dlci->throttled = true;
        /* Send an MSC with RTS cleared */
-       gsmtty_modem_update(dlci, 0);
+       gsm_modem_update(dlci, 0);
 }
 
 static void gsmtty_unthrottle(struct tty_struct *tty)
@@ -3266,7 +3345,7 @@ static void gsmtty_unthrottle(struct tty_struct *tty)
                dlci->modem_tx |= TIOCM_RTS;
        dlci->throttled = false;
        /* Send an MSC with RTS set */
-       gsmtty_modem_update(dlci, 0);
+       gsm_modem_update(dlci, 0);
 }
 
 static int gsmtty_break_ctl(struct tty_struct *tty, int state)
@@ -3284,7 +3363,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state)
                if (encode > 0x0F)
                        encode = 0x0F;  /* Best effort */
        }
-       return gsmtty_modem_update(dlci, encode);
+       return gsm_modem_update(dlci, encode);
 }
 
 static void gsmtty_cleanup(struct tty_struct *tty)
index f4a0caa56f84ace5cb1b251dc1518fd502ad5e05..21053db93ff1e5ad546a21e79b9485dee2213b89 100644 (file)
@@ -37,6 +37,7 @@
 #define MTK_UART_IER_RTSI      0x40    /* Enable RTS Modem status interrupt */
 #define MTK_UART_IER_CTSI      0x80    /* Enable CTS Modem status interrupt */
 
+#define MTK_UART_EFR           38      /* I/O: Extended Features Register */
 #define MTK_UART_EFR_EN                0x10    /* Enable enhancement feature */
 #define MTK_UART_EFR_RTS       0x40    /* Enable hardware rx flow control */
 #define MTK_UART_EFR_CTS       0x80    /* Enable hardware tx flow control */
 #define MTK_UART_TX_TRIGGER    1
 #define MTK_UART_RX_TRIGGER    MTK_UART_RX_SIZE
 
+#define MTK_UART_FEATURE_SEL   39      /* Feature Selection register */
+#define MTK_UART_FEAT_NEWRMAP  BIT(0)  /* Use new register map */
+
+#define MTK_UART_XON1          40      /* I/O: Xon character 1 */
+#define MTK_UART_XOFF1         42      /* I/O: Xoff character 1 */
+
 #ifdef CONFIG_SERIAL_8250_DMA
 enum dma_rx_status {
        DMA_RX_START = 0,
@@ -169,7 +176,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up)
                   MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX);
 
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-       serial_out(up, UART_EFR, UART_EFR_ECB);
+       serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
        serial_out(up, UART_LCR, lcr);
 
        if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0)
@@ -232,7 +239,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
        int lcr = serial_in(up, UART_LCR);
 
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-       serial_out(up, UART_EFR, UART_EFR_ECB);
+       serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
        serial_out(up, UART_LCR, lcr);
        lcr = serial_in(up, UART_LCR);
 
@@ -241,7 +248,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR);
                serial_out(up, MTK_UART_ESCAPE_EN, 0x00);
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-               serial_out(up, UART_EFR, serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)));
                serial_out(up, UART_LCR, lcr);
                mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI |
@@ -255,8 +262,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
                /*enable hw flow control*/
-               serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC |
-                       (serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC |
+                       (serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
 
                serial_out(up, UART_LCR, lcr);
@@ -270,12 +277,12 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
                /*enable sw flow control */
-               serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
-                       (serial_in(up, UART_EFR) &
+               serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
+                       (serial_in(up, MTK_UART_EFR) &
                        (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
 
-               serial_out(up, UART_XON1, START_CHAR(port->state->port.tty));
-               serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty));
+               serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty));
+               serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty));
                serial_out(up, UART_LCR, lcr);
                mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI);
                mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI);
@@ -568,6 +575,10 @@ static int mtk8250_probe(struct platform_device *pdev)
                uart.dma = data->dma;
 #endif
 
+       /* Set AP UART new register map */
+       writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase +
+              (MTK_UART_FEATURE_SEL << uart.port.regshift));
+
        /* Disable Rate Fix function */
        writel(0x0, uart.port.membase +
                        (MTK_UART_RATE_FIX << uart.port.regshift));
index e17e97ea86fadebf7b488c06e501697cee80952f..a293e9f107d0f241de60bdc1d191e42bf46908eb 100644 (file)
@@ -2667,7 +2667,7 @@ enum pci_board_num_t {
        pbn_panacom2,
        pbn_panacom4,
        pbn_plx_romulus,
-       pbn_endrun_2_4000000,
+       pbn_endrun_2_3906250,
        pbn_oxsemi,
        pbn_oxsemi_1_3906250,
        pbn_oxsemi_2_3906250,
@@ -3195,10 +3195,10 @@ static struct pciserial_board pci_boards[] = {
        * signal now many ports are available
        * 2 port 952 Uart support
        */
-       [pbn_endrun_2_4000000] = {
+       [pbn_endrun_2_3906250] = {
                .flags          = FL_BASE0,
                .num_ports      = 2,
-               .base_baud      = 4000000,
+               .base_baud      = 3906250,
                .uart_offset    = 0x200,
                .first_offset   = 0x1000,
        },
@@ -4115,7 +4115,7 @@ static const struct pci_device_id serial_pci_tbl[] = {
        */
        {       PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
                PCI_ANY_ID, PCI_ANY_ID, 0, 0,
-               pbn_endrun_2_4000000 },
+               pbn_endrun_2_3906250 },
        /*
         * Quatech cards. These actually have configurable clocks but for
         * now we just use the default.
index 318af6f1360504b6222cd3483b02fd83f3790e13..1fbd5bf264bec9ed8c3f43f3f05b85641da0e163 100644 (file)
@@ -1675,11 +1675,11 @@ static void serial8250_start_tx(struct uart_port *port)
        struct uart_8250_port *up = up_to_u8250p(port);
        struct uart_8250_em485 *em485 = up->em485;
 
-       serial8250_rpm_get_tx(up);
-
        if (!port->x_char && uart_circ_empty(&port->state->xmit))
                return;
 
+       serial8250_rpm_get_tx(up);
+
        if (em485 &&
            em485->active_timer == &em485->start_tx_timer)
                return;
@@ -3329,7 +3329,7 @@ static void serial8250_console_restore(struct uart_8250_port *up)
 
        serial8250_set_divisor(port, baud, quot, frac);
        serial_port_out(port, UART_LCR, up->lcr);
-       serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
+       serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
 }
 
 /*
index 51ecb050ae40fd97325c109dcbdea421889ad671..4d11a3e547f946e8d4160e7bf7bb6fb56600dcb0 100644 (file)
@@ -1255,13 +1255,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
 
 static void pl011_rs485_tx_stop(struct uart_amba_port *uap)
 {
+       /*
+        * To be on the safe side only time out after twice as many iterations
+        * as fifo size.
+        */
+       const int MAX_TX_DRAIN_ITERS = uap->port.fifosize * 2;
        struct uart_port *port = &uap->port;
        int i = 0;
        u32 cr;
 
        /* Wait until hardware tx queue is empty */
        while (!pl011_tx_empty(port)) {
-               if (i == port->fifosize) {
+               if (i > MAX_TX_DRAIN_ITERS) {
                        dev_warn(port->dev,
                                 "timeout while draining hardware tx queue\n");
                        break;
@@ -2052,7 +2057,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
         * with the given baud rate. We use this as the poll interval when we
         * wait for the tx queue to empty.
         */
-       uap->rs485_tx_drain_interval = (bits * 1000 * 1000) / baud;
+       uap->rs485_tx_drain_interval = DIV_ROUND_UP(bits * 1000 * 1000, baud);
 
        pl011_setup_status_masks(port, termios);
 
index 6d70fea76bb3e55a39a7838d10d63a29645beb05..e37a917b9dbbc77f6d69045f8d851fe5a79c7c65 100644 (file)
@@ -471,11 +471,10 @@ static int digicolor_uart_probe(struct platform_device *pdev)
        if (IS_ERR(uart_clk))
                return PTR_ERR(uart_clk);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       dp->port.mapbase = res->start;
-       dp->port.membase = devm_ioremap_resource(&pdev->dev, res);
+       dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(dp->port.membase))
                return PTR_ERR(dp->port.membase);
+       dp->port.mapbase = res->start;
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
index 87789872f400660cc917331703d787662e35a490..be12fee94db55a8ee1db85836da3f576a5818b7d 100644 (file)
@@ -2664,6 +2664,7 @@ static int lpuart_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct lpuart_port *sport;
        struct resource *res;
+       irq_handler_t handler;
        int ret;
 
        sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
@@ -2741,17 +2742,11 @@ static int lpuart_probe(struct platform_device *pdev)
 
        if (lpuart_is_32(sport)) {
                lpuart_reg.cons = LPUART32_CONSOLE;
-               ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0,
-                                       DRIVER_NAME, sport);
+               handler = lpuart32_int;
        } else {
                lpuart_reg.cons = LPUART_CONSOLE;
-               ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0,
-                                       DRIVER_NAME, sport);
+               handler = lpuart_int;
        }
-
-       if (ret)
-               goto failed_irq_request;
-
        ret = uart_add_one_port(&lpuart_reg, &sport->port);
        if (ret)
                goto failed_attach_port;
@@ -2773,13 +2768,18 @@ static int lpuart_probe(struct platform_device *pdev)
 
        sport->port.rs485_config(&sport->port, &sport->port.rs485);
 
+       ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0,
+                               DRIVER_NAME, sport);
+       if (ret)
+               goto failed_irq_request;
+
        return 0;
 
+failed_irq_request:
 failed_get_rs485:
 failed_reset:
        uart_remove_one_port(&lpuart_reg, &sport->port);
 failed_attach_port:
-failed_irq_request:
        lpuart_disable_clks(sport);
 failed_clock_enable:
 failed_out_of_range:
index fd38e6ed4fdab7d99343c79e29d4d9b4d7e23a64..a2100be8d5542ed6f8419be4c31b560f2a9a833e 100644 (file)
@@ -1448,7 +1448,7 @@ static int imx_uart_startup(struct uart_port *port)
        imx_uart_writel(sport, ucr1, UCR1);
 
        ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR);
-       if (!sport->dma_is_enabled)
+       if (!dma_is_inited)
                ucr4 |= UCR4_OREN;
        if (sport->inverted_rx)
                ucr4 |= UCR4_INVR;
index 8a69583777644cda1102c66fff3d9065f9cfcba6..3acc0f1857629cfd9ce92caec03bfbf70222ccc7 100644 (file)
@@ -436,31 +436,31 @@ static void mpc512x_psc_fifo_init(struct uart_port *port)
        out_be32(&FIFO_512x(port)->rximr, MPC512x_PSC_FIFO_ALARM);
 }
 
-static int mpc512x_psc_raw_rx_rdy(struct uart_port *port)
+static unsigned int mpc512x_psc_raw_rx_rdy(struct uart_port *port)
 {
        return !(in_be32(&FIFO_512x(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY);
 }
 
-static int mpc512x_psc_raw_tx_rdy(struct uart_port *port)
+static unsigned int mpc512x_psc_raw_tx_rdy(struct uart_port *port)
 {
        return !(in_be32(&FIFO_512x(port)->txsr) & MPC512x_PSC_FIFO_FULL);
 }
 
-static int mpc512x_psc_rx_rdy(struct uart_port *port)
+static unsigned int mpc512x_psc_rx_rdy(struct uart_port *port)
 {
        return in_be32(&FIFO_512x(port)->rxsr)
            & in_be32(&FIFO_512x(port)->rximr)
            & MPC512x_PSC_FIFO_ALARM;
 }
 
-static int mpc512x_psc_tx_rdy(struct uart_port *port)
+static unsigned int mpc512x_psc_tx_rdy(struct uart_port *port)
 {
        return in_be32(&FIFO_512x(port)->txsr)
            & in_be32(&FIFO_512x(port)->tximr)
            & MPC512x_PSC_FIFO_ALARM;
 }
 
-static int mpc512x_psc_tx_empty(struct uart_port *port)
+static unsigned int mpc512x_psc_tx_empty(struct uart_port *port)
 {
        return in_be32(&FIFO_512x(port)->txsr)
            & MPC512x_PSC_FIFO_EMPTY;
@@ -780,29 +780,29 @@ static void mpc5125_psc_fifo_init(struct uart_port *port)
        out_be32(&FIFO_5125(port)->rximr, MPC512x_PSC_FIFO_ALARM);
 }
 
-static int mpc5125_psc_raw_rx_rdy(struct uart_port *port)
+static unsigned int mpc5125_psc_raw_rx_rdy(struct uart_port *port)
 {
        return !(in_be32(&FIFO_5125(port)->rxsr) & MPC512x_PSC_FIFO_EMPTY);
 }
 
-static int mpc5125_psc_raw_tx_rdy(struct uart_port *port)
+static unsigned int mpc5125_psc_raw_tx_rdy(struct uart_port *port)
 {
        return !(in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_FULL);
 }
 
-static int mpc5125_psc_rx_rdy(struct uart_port *port)
+static unsigned int mpc5125_psc_rx_rdy(struct uart_port *port)
 {
        return in_be32(&FIFO_5125(port)->rxsr) &
               in_be32(&FIFO_5125(port)->rximr) & MPC512x_PSC_FIFO_ALARM;
 }
 
-static int mpc5125_psc_tx_rdy(struct uart_port *port)
+static unsigned int mpc5125_psc_tx_rdy(struct uart_port *port)
 {
        return in_be32(&FIFO_5125(port)->txsr) &
               in_be32(&FIFO_5125(port)->tximr) & MPC512x_PSC_FIFO_ALARM;
 }
 
-static int mpc5125_psc_tx_empty(struct uart_port *port)
+static unsigned int mpc5125_psc_tx_empty(struct uart_port *port)
 {
        return in_be32(&FIFO_5125(port)->txsr) & MPC512x_PSC_FIFO_EMPTY;
 }
index e857fb61efbfc4207fffee2b50324f66ad60a768..5fb201c1b563b3550f4caf4cb3e48070a1d5e9a6 100644 (file)
@@ -1238,12 +1238,10 @@ static void sc16is7xx_shutdown(struct uart_port *port)
 
        /* Disable all interrupts */
        sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0);
-       /* Disable TX/RX, clear auto RS485 and RTS invert */
+       /* Disable TX/RX */
        sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG,
                              SC16IS7XX_EFCR_RXDISABLE_BIT |
-                             SC16IS7XX_EFCR_TXDISABLE_BIT |
-                             SC16IS7XX_EFCR_AUTO_RS485_BIT |
-                             SC16IS7XX_EFCR_RTS_INVERT_BIT,
+                             SC16IS7XX_EFCR_TXDISABLE_BIT,
                              SC16IS7XX_EFCR_RXDISABLE_BIT |
                              SC16IS7XX_EFCR_TXDISABLE_BIT);
 
index f9af7ebe003d7b917c24cc83f4b8a47ee4a67263..d6d515d598dc0c22dcb75a905641cdda3ccf7c31 100644 (file)
@@ -2684,6 +2684,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
        struct usb_request *request;
        struct cdns3_request *priv_req;
        struct cdns3_trb *trb = NULL;
+       struct cdns3_trb trb_tmp;
        int ret;
        int val;
 
@@ -2693,8 +2694,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
        if (request) {
                priv_req = to_cdns3_request(request);
                trb = priv_req->trb;
-               if (trb)
+               if (trb) {
+                       trb_tmp = *trb;
                        trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
+               }
        }
 
        writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
@@ -2709,7 +2712,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
 
        if (request) {
                if (trb)
-                       trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
+                       *trb = trb_tmp;
 
                cdns3_rearm_transfer(priv_ep, 1);
        }
index 7f2c83f299d3283db6ee3a8f9958b6e33d6c5c5f..eebe782380fb9b55f1efb199615ea07524ca43f2 100644 (file)
@@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file)
                        poison_urbs(desc);
                        spin_lock_irq(&desc->iuspin);
                        desc->resp_count = 0;
+                       clear_bit(WDM_RESPONDING, &desc->flags);
                        spin_unlock_irq(&desc->iuspin);
                        desc->manage_power(desc->intf, 0);
                        unpoison_urbs(desc);
index 6abb7294e9192b4dea9a1afd9e4831cfe2b84b08..b5b85bf80329888b6b3fc60d200d5bddf1bb1af3 100644 (file)
@@ -1209,12 +1209,16 @@ static int do_proc_control(struct usb_dev_state *ps,
 
                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &actlen);
+
+               /* Linger a bit, prior to the next control message. */
+               if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+                       msleep(200);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen);
                if (!i && actlen) {
                        if (copy_to_user(ctrl->data, tbuf, actlen)) {
                                ret = -EFAULT;
-                               goto recv_fault;
+                               goto done;
                        }
                }
        } else {
@@ -1231,6 +1235,10 @@ static int do_proc_control(struct usb_dev_state *ps,
 
                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &actlen);
+
+               /* Linger a bit, prior to the next control message. */
+               if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+                       msleep(200);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0);
        }
@@ -1242,10 +1250,6 @@ static int do_proc_control(struct usb_dev_state *ps,
        }
        ret = (i < 0 ? i : actlen);
 
- recv_fault:
-       /* Linger a bit, prior to the next control message. */
-       if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
-               msleep(200);
  done:
        kfree(dr);
        usb_free_urb(urb);
index d3c14b5ed4a1f9fc438ce917390234b9d39f2275..97b44a68668a5455cbe390ff8a7fb66ce4d0eedf 100644 (file)
@@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
                        USB_QUIRK_IGNORE_REMOTE_WAKEUP },
 
+       /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/
+       { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
+
        /* Realtek hub in Dell WD19 (Type-C) */
        { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },
 
@@ -507,6 +510,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* DJI CineSSD */
        { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
 
+       /* VCOM device */
+       { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
+
        /* INTEL VALUE SSD */
        { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
 
index 1170b800acdceb342f84c7a1743c61ca5969af55..d28cd1a6709bbe399f7c843ebae74b6cf6c77522 100644 (file)
@@ -274,7 +274,8 @@ int dwc3_core_soft_reset(struct dwc3 *dwc)
 
        reg = dwc3_readl(dwc->regs, DWC3_DCTL);
        reg |= DWC3_DCTL_CSFTRST;
-       dwc3_writel(dwc->regs, DWC3_DCTL, reg);
+       reg &= ~DWC3_DCTL_RUN_STOP;
+       dwc3_gadget_dctl_write_safe(dwc, reg);
 
        /*
         * For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit
@@ -1377,10 +1378,10 @@ static void dwc3_get_properties(struct dwc3 *dwc)
        u8                      lpm_nyet_threshold;
        u8                      tx_de_emphasis;
        u8                      hird_threshold;
-       u8                      rx_thr_num_pkt_prd;
-       u8                      rx_max_burst_prd;
-       u8                      tx_thr_num_pkt_prd;
-       u8                      tx_max_burst_prd;
+       u8                      rx_thr_num_pkt_prd = 0;
+       u8                      rx_max_burst_prd = 0;
+       u8                      tx_thr_num_pkt_prd = 0;
+       u8                      tx_max_burst_prd = 0;
        u8                      tx_fifo_resize_max_num;
        const char              *usb_psy_name;
        int                     ret;
@@ -1690,21 +1691,44 @@ static int dwc3_probe(struct platform_device *pdev)
                /*
                 * Clocks are optional, but new DT platforms should support all
                 * clocks as required by the DT-binding.
+                * Some devices have different clock names in legacy device trees,
+                * check for them to retain backwards compatibility.
                 */
                dwc->bus_clk = devm_clk_get_optional(dev, "bus_early");
                if (IS_ERR(dwc->bus_clk))
                        return dev_err_probe(dev, PTR_ERR(dwc->bus_clk),
                                             "could not get bus clock\n");
 
+               if (dwc->bus_clk == NULL) {
+                       dwc->bus_clk = devm_clk_get_optional(dev, "bus_clk");
+                       if (IS_ERR(dwc->bus_clk))
+                               return dev_err_probe(dev, PTR_ERR(dwc->bus_clk),
+                                                    "could not get bus clock\n");
+               }
+
                dwc->ref_clk = devm_clk_get_optional(dev, "ref");
                if (IS_ERR(dwc->ref_clk))
                        return dev_err_probe(dev, PTR_ERR(dwc->ref_clk),
                                             "could not get ref clock\n");
 
+               if (dwc->ref_clk == NULL) {
+                       dwc->ref_clk = devm_clk_get_optional(dev, "ref_clk");
+                       if (IS_ERR(dwc->ref_clk))
+                               return dev_err_probe(dev, PTR_ERR(dwc->ref_clk),
+                                                    "could not get ref clock\n");
+               }
+
                dwc->susp_clk = devm_clk_get_optional(dev, "suspend");
                if (IS_ERR(dwc->susp_clk))
                        return dev_err_probe(dev, PTR_ERR(dwc->susp_clk),
                                             "could not get suspend clock\n");
+
+               if (dwc->susp_clk == NULL) {
+                       dwc->susp_clk = devm_clk_get_optional(dev, "suspend_clk");
+                       if (IS_ERR(dwc->susp_clk))
+                               return dev_err_probe(dev, PTR_ERR(dwc->susp_clk),
+                                                    "could not get suspend clock\n");
+               }
        }
 
        ret = reset_control_deassert(dwc->reset);
index b60b5f7b6dff469b4376080a31c7061b7102b2b6..8cad9e7d3368725145fc27457c70efa641764a13 100644 (file)
@@ -584,16 +584,15 @@ int dwc3_drd_init(struct dwc3 *dwc)
 {
        int ret, irq;
 
+       if (ROLE_SWITCH &&
+           device_property_read_bool(dwc->dev, "usb-role-switch"))
+               return dwc3_setup_role_switch(dwc);
+
        dwc->edev = dwc3_get_extcon(dwc);
        if (IS_ERR(dwc->edev))
                return PTR_ERR(dwc->edev);
 
-       if (ROLE_SWITCH &&
-           device_property_read_bool(dwc->dev, "usb-role-switch")) {
-               ret = dwc3_setup_role_switch(dwc);
-               if (ret < 0)
-                       return ret;
-       } else if (dwc->edev) {
+       if (dwc->edev) {
                dwc->edev_nb.notifier_call = dwc3_drd_notifier;
                ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST,
                                               &dwc->edev_nb);
index 33f657d83246066766b14014e1f3eeca655118b6..2e19e0e4ea538fcc3d759f1c27a01b6fd35c4564 100644 (file)
@@ -45,6 +45,8 @@
 #define PCI_DEVICE_ID_INTEL_ADLM               0x54ee
 #define PCI_DEVICE_ID_INTEL_ADLS               0x7ae1
 #define PCI_DEVICE_ID_INTEL_RPLS               0x7a61
+#define PCI_DEVICE_ID_INTEL_MTLP               0x7ec1
+#define PCI_DEVICE_ID_INTEL_MTL                        0x7e7e
 #define PCI_DEVICE_ID_INTEL_TGL                        0x9a15
 #define PCI_DEVICE_ID_AMD_MR                   0x163a
 
@@ -456,6 +458,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
        { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
          (kernel_ulong_t) &dwc3_pci_intel_swnode, },
 
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
        { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
          (kernel_ulong_t) &dwc3_pci_intel_swnode, },
 
index ab725d2262d65e067f14ad6d6410942b4870b79c..0b9c2493844a8f6f833cd7a4cd162be4500ff7b9 100644 (file)
@@ -3274,6 +3274,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
                const struct dwc3_event_depevt *event,
                struct dwc3_request *req, int status)
 {
+       int request_status;
        int ret;
 
        if (req->request.num_mapped_sgs)
@@ -3294,7 +3295,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
                req->needs_extra_trb = false;
        }
 
-       dwc3_gadget_giveback(dep, req, status);
+       /*
+        * The event status only reflects the status of the TRB with IOC set.
+        * For the requests that don't set interrupt on completion, the driver
+        * needs to check and return the status of the completed TRBs associated
+        * with the request. Use the status of the last TRB of the request.
+        */
+       if (req->request.no_interrupt) {
+               struct dwc3_trb *trb;
+
+               trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue);
+               switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) {
+               case DWC3_TRBSTS_MISSED_ISOC:
+                       /* Isoc endpoint only */
+                       request_status = -EXDEV;
+                       break;
+               case DWC3_TRB_STS_XFER_IN_PROG:
+                       /* Applicable when End Transfer with ForceRM=0 */
+               case DWC3_TRBSTS_SETUP_PENDING:
+                       /* Control endpoint only */
+               case DWC3_TRBSTS_OK:
+               default:
+                       request_status = 0;
+                       break;
+               }
+       } else {
+               request_status = status;
+       }
+
+       dwc3_gadget_giveback(dep, req, request_status);
 
 out:
        return ret;
index 1fb837d9271e155cfeb89987feb0ebc855877a30..84b73cb03f878714ebd844f288f2e5e4d2b23004 100644 (file)
@@ -1438,6 +1438,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
        usb_ep_autoconfig_reset(cdev->gadget);
        spin_lock_irqsave(&gi->spinlock, flags);
        cdev->gadget = NULL;
+       cdev->deactivations = 0;
+       gadget->deactivated = false;
        set_gadget_data(gadget, NULL);
        spin_unlock_irqrestore(&gi->spinlock, flags);
 }
index 71bb5e477dbad77d638dfda9bd0b507c0ae41743..d37965867b230ebfb5e10bfa1e0cc2bd43b132c6 100644 (file)
@@ -890,13 +890,37 @@ static void uvc_function_unbind(struct usb_configuration *c,
 {
        struct usb_composite_dev *cdev = c->cdev;
        struct uvc_device *uvc = to_uvc(f);
+       long wait_ret = 1;
 
        uvcg_info(f, "%s()\n", __func__);
 
+       /* If we know we're connected via v4l2, then there should be a cleanup
+        * of the device from userspace either via UVC_EVENT_DISCONNECT or
+        * though the video device removal uevent. Allow some time for the
+        * application to close out before things get deleted.
+        */
+       if (uvc->func_connected) {
+               uvcg_dbg(f, "waiting for clean disconnect\n");
+               wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+                               uvc->func_connected == false, msecs_to_jiffies(500));
+               uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret);
+       }
+
        device_remove_file(&uvc->vdev.dev, &dev_attr_function_name);
        video_unregister_device(&uvc->vdev);
        v4l2_device_unregister(&uvc->v4l2_dev);
 
+       if (uvc->func_connected) {
+               /* Wait for the release to occur to ensure there are no longer any
+                * pending operations that may cause panics when resources are cleaned
+                * up.
+                */
+               uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__);
+               wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+                               uvc->func_connected == false, msecs_to_jiffies(1000));
+               uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret);
+       }
+
        usb_ep_free_request(cdev->gadget->ep0, uvc->control_req);
        kfree(uvc->control_buf);
 
@@ -915,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi)
 
        mutex_init(&uvc->video.mutex);
        uvc->state = UVC_STATE_DISCONNECTED;
+       init_waitqueue_head(&uvc->func_connected_queue);
        opts = fi_to_f_uvc_opts(fi);
 
        mutex_lock(&opts->lock);
index c3607a32b986247ff56a22191eab5f0382d9bb39..886103a1fe9b7d21651da2ecd28c5fd041beb291 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/usb/composite.h>
 #include <linux/videodev2.h>
+#include <linux/wait.h>
 
 #include <media/v4l2-device.h>
 #include <media/v4l2-dev.h>
@@ -129,6 +130,7 @@ struct uvc_device {
        struct usb_function func;
        struct uvc_video video;
        bool func_connected;
+       wait_queue_head_t func_connected_queue;
 
        /* Descriptors */
        struct {
index d852ac9e47e72c65218bc7649c237a592578cee4..2cda982f376502ee3075942ee43d2542c4a8181b 100644 (file)
@@ -264,6 +264,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
                buf->state = UVC_BUF_STATE_ERROR;
                vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR);
        }
+       queue->buf_used = 0;
+
        /* This must be protected by the irqlock spinlock to avoid race
         * conditions between uvc_queue_buffer and the disconnection event that
         * could result in an interruptible wait in uvc_dequeue_buffer. Do not
index a2c78690c5c288a3229aa0a87e79ab04e9f50b9e..fd8f73bb726dd1589f885b096562934af511df1f 100644 (file)
@@ -253,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh,
 
 static void uvc_v4l2_disable(struct uvc_device *uvc)
 {
-       uvc->func_connected = false;
        uvc_function_disconnect(uvc);
        uvcg_video_enable(&uvc->video, 0);
        uvcg_free_buffers(&uvc->video.queue);
+       uvc->func_connected = false;
+       wake_up_interruptible(&uvc->func_connected_queue);
 }
 
 static int
index 8d40a1f2ec57d343b0c7f1f68ad3dfe0f377467c..e9440f7bf019dbe0552a80efc4922c6c7b66b199 100644 (file)
@@ -145,6 +145,7 @@ enum dev_state {
        STATE_DEV_INVALID = 0,
        STATE_DEV_OPENED,
        STATE_DEV_INITIALIZED,
+       STATE_DEV_REGISTERING,
        STATE_DEV_RUNNING,
        STATE_DEV_CLOSED,
        STATE_DEV_FAILED
@@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value)
                ret = -EINVAL;
                goto out_unlock;
        }
+       dev->state = STATE_DEV_REGISTERING;
        spin_unlock_irqrestore(&dev->lock, flags);
 
        ret = usb_gadget_probe_driver(&dev->driver);
index 3d82e0b853be521774827f1302fdab616252d797..684164fa97169d3da8f8c50e65c0cc86b089797b 100644 (file)
@@ -1103,6 +1103,26 @@ static void ehci_remove_device(struct usb_hcd *hcd, struct usb_device *udev)
 
 #ifdef CONFIG_PM
 
+/* Clear wakeup signal locked in zhaoxin platform when device plug in. */
+static void ehci_zx_wakeup_clear(struct ehci_hcd *ehci)
+{
+       u32 __iomem     *reg = &ehci->regs->port_status[4];
+       u32             t1 = ehci_readl(ehci, reg);
+
+       t1 &= (u32)~0xf0000;
+       t1 |= PORT_TEST_FORCE;
+       ehci_writel(ehci, t1, reg);
+       t1 = ehci_readl(ehci, reg);
+       msleep(1);
+       t1 &= (u32)~0xf0000;
+       ehci_writel(ehci, t1, reg);
+       ehci_readl(ehci, reg);
+       msleep(1);
+       t1 = ehci_readl(ehci, reg);
+       ehci_writel(ehci, t1 | PORT_CSC, reg);
+       ehci_readl(ehci, reg);
+}
+
 /* suspend/resume, section 4.3 */
 
 /* These routines handle the generic parts of controller suspend/resume */
@@ -1154,6 +1174,9 @@ int ehci_resume(struct usb_hcd *hcd, bool force_reset)
        if (ehci->shutdown)
                return 0;               /* Controller is dead */
 
+       if (ehci->zx_wakeup_clear_needed)
+               ehci_zx_wakeup_clear(ehci);
+
        /*
         * If CF is still set and reset isn't forced
         * then we maintained suspend power.
index 638f03b8973948171dbf70906a6493e086307d41..9937c5a7efc2d139cf759355fa0530f0dfc0b180 100644 (file)
@@ -231,6 +231,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
                        ehci->is_aspeed = 1;
                }
                break;
+       case PCI_VENDOR_ID_ZHAOXIN:
+               if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90)
+                       ehci->zx_wakeup_clear_needed = 1;
+               break;
        }
 
        /* optional debug port, normally in the first BAR */
index fdd073cc053b89e777701a8e48e04f69be3581ca..ad3f13a3eaf1b729a97e125a736b76b04078cfd3 100644 (file)
@@ -220,6 +220,7 @@ struct ehci_hcd {                   /* one per controller */
        unsigned                imx28_write_fix:1; /* For Freescale i.MX28 */
        unsigned                spurious_oc:1;
        unsigned                is_aspeed:1;
+       unsigned                zx_wakeup_clear_needed:1;
 
        /* required for usb32 quirk */
        #define OHCI_CTRL_HCFS          (3 << 6)
index 1e7dc130c39a657dcb82d3faa18679b5f61e4ee1..f65f1ba2b59292e16bc4fd1ef3f9a68233cbe414 100644 (file)
@@ -1434,7 +1434,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                }
                                spin_unlock_irqrestore(&xhci->lock, flags);
                                if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex],
-                                                                msecs_to_jiffies(100)))
+                                                                msecs_to_jiffies(500)))
                                        xhci_dbg(xhci, "missing U0 port change event for port %d-%d\n",
                                                 hcd->self.busnum, wIndex + 1);
                                spin_lock_irqsave(&xhci->lock, flags);
index f3139ce7b0a93df35d7bae8145247fbc15abb507..06a6b19acaae6869f045197aca926d744c645d5b 100644 (file)
 #define HS_BW_BOUNDARY 6144
 /* usb2 spec section11.18.1: at most 188 FS bytes per microframe */
 #define FS_PAYLOAD_MAX 188
-/*
- * max number of microframes for split transfer,
- * for fs isoc in : 1 ss + 1 idle + 7 cs
- */
-#define TT_MICROFRAMES_MAX 9
 
 #define DBG_BUF_EN     64
 
@@ -242,28 +237,17 @@ static void drop_tt(struct usb_device *udev)
 
 static struct mu3h_sch_ep_info *
 create_sch_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev,
-             struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx)
+             struct usb_host_endpoint *ep)
 {
        struct mu3h_sch_ep_info *sch_ep;
        struct mu3h_sch_bw_info *bw_info;
        struct mu3h_sch_tt *tt = NULL;
-       u32 len_bw_budget_table;
 
        bw_info = get_bw_info(mtk, udev, ep);
        if (!bw_info)
                return ERR_PTR(-ENODEV);
 
-       if (is_fs_or_ls(udev->speed))
-               len_bw_budget_table = TT_MICROFRAMES_MAX;
-       else if ((udev->speed >= USB_SPEED_SUPER)
-                       && usb_endpoint_xfer_isoc(&ep->desc))
-               len_bw_budget_table = get_esit(ep_ctx);
-       else
-               len_bw_budget_table = 1;
-
-       sch_ep = kzalloc(struct_size(sch_ep, bw_budget_table,
-                                    len_bw_budget_table),
-                        GFP_KERNEL);
+       sch_ep = kzalloc(sizeof(*sch_ep), GFP_KERNEL);
        if (!sch_ep)
                return ERR_PTR(-ENOMEM);
 
@@ -295,8 +279,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
        u32 mult;
        u32 esit_pkts;
        u32 max_esit_payload;
-       u32 *bwb_table = sch_ep->bw_budget_table;
-       int i;
 
        ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2));
        maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
@@ -332,7 +314,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                 */
                sch_ep->pkts = max_burst + 1;
                sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
-               bwb_table[0] = sch_ep->bw_cost_per_microframe;
        } else if (sch_ep->speed >= USB_SPEED_SUPER) {
                /* usb3_r1 spec section4.4.7 & 4.4.8 */
                sch_ep->cs_count = 0;
@@ -349,7 +330,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) {
                        sch_ep->pkts = esit_pkts;
                        sch_ep->num_budget_microframes = 1;
-                       bwb_table[0] = maxpkt * sch_ep->pkts;
                }
 
                if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) {
@@ -366,15 +346,8 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                                DIV_ROUND_UP(esit_pkts, sch_ep->pkts);
 
                        sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1);
-                       sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
-
-                       for (i = 0; i < sch_ep->num_budget_microframes - 1; i++)
-                               bwb_table[i] = sch_ep->bw_cost_per_microframe;
-
-                       /* last one <= bw_cost_per_microframe */
-                       bwb_table[i] = maxpkt * esit_pkts
-                                      - i * sch_ep->bw_cost_per_microframe;
                }
+               sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
        } else if (is_fs_or_ls(sch_ep->speed)) {
                sch_ep->pkts = 1; /* at most one packet for each microframe */
 
@@ -384,28 +357,7 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
                 */
                sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX);
                sch_ep->num_budget_microframes = sch_ep->cs_count;
-               sch_ep->bw_cost_per_microframe =
-                       (maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX;
-
-               /* init budget table */
-               if (ep_type == ISOC_OUT_EP) {
-                       for (i = 0; i < sch_ep->num_budget_microframes; i++)
-                               bwb_table[i] =  sch_ep->bw_cost_per_microframe;
-               } else if (ep_type == INT_OUT_EP) {
-                       /* only first one consumes bandwidth, others as zero */
-                       bwb_table[0] = sch_ep->bw_cost_per_microframe;
-               } else { /* INT_IN_EP or ISOC_IN_EP */
-                       bwb_table[0] = 0; /* start split */
-                       bwb_table[1] = 0; /* idle */
-                       /*
-                        * due to cs_count will be updated according to cs
-                        * position, assign all remainder budget array
-                        * elements as @bw_cost_per_microframe, but only first
-                        * @num_budget_microframes elements will be used later
-                        */
-                       for (i = 2; i < TT_MICROFRAMES_MAX; i++)
-                               bwb_table[i] =  sch_ep->bw_cost_per_microframe;
-               }
+               sch_ep->bw_cost_per_microframe = min_t(u32, maxpkt, FS_PAYLOAD_MAX);
        }
 }
 
@@ -422,7 +374,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 
                for (j = 0; j < sch_ep->num_budget_microframes; j++) {
                        k = XHCI_MTK_BW_INDEX(base + j);
-                       bw = sch_bw->bus_bw[k] + sch_ep->bw_budget_table[j];
+                       bw = sch_bw->bus_bw[k] + sch_ep->bw_cost_per_microframe;
                        if (bw > max_bw)
                                max_bw = bw;
                }
@@ -433,18 +385,16 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
 static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw,
        struct mu3h_sch_ep_info *sch_ep, bool used)
 {
+       int bw_updated;
        u32 base;
-       int i, j, k;
+       int i, j;
+
+       bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
 
        for (i = 0; i < sch_ep->num_esit; i++) {
                base = sch_ep->offset + i * sch_ep->esit;
-               for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                       k = XHCI_MTK_BW_INDEX(base + j);
-                       if (used)
-                               sch_bw->bus_bw[k] += sch_ep->bw_budget_table[j];
-                       else
-                               sch_bw->bus_bw[k] -= sch_ep->bw_budget_table[j];
-               }
+               for (j = 0; j < sch_ep->num_budget_microframes; j++)
+                       sch_bw->bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
        }
 }
 
@@ -464,7 +414,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
                 */
                for (j = 0; j < sch_ep->num_budget_microframes; j++) {
                        k = XHCI_MTK_BW_INDEX(base + j);
-                       tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j];
+                       tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe;
                        if (tmp > FS_PAYLOAD_MAX)
                                return -ESCH_BW_OVERFLOW;
                }
@@ -538,19 +488,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
 static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
 {
        struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+       int bw_updated;
        u32 base;
-       int i, j, k;
+       int i, j;
+
+       bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
 
        for (i = 0; i < sch_ep->num_esit; i++) {
                base = sch_ep->offset + i * sch_ep->esit;
 
-               for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                       k = XHCI_MTK_BW_INDEX(base + j);
-                       if (used)
-                               tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j];
-                       else
-                               tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j];
-               }
+               for (j = 0; j < sch_ep->num_budget_microframes; j++)
+                       tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
        }
 
        if (used)
@@ -710,7 +658,7 @@ static int add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
 
        xhci_dbg(xhci, "%s %s\n", __func__, decode_ep(ep, udev->speed));
 
-       sch_ep = create_sch_ep(mtk, udev, ep, ep_ctx);
+       sch_ep = create_sch_ep(mtk, udev, ep);
        if (IS_ERR_OR_NULL(sch_ep))
                return -ENOMEM;
 
index ffd4b493b4ba74e6802bde3be624cf064d61c3d2..1174a510dd38810fd7403a909024d21cfc8b8713 100644 (file)
@@ -83,7 +83,6 @@ struct mu3h_sch_bw_info {
  *             times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets
  *             according to @pkts and @repeat. normal mode is used by
  *             default
- * @bw_budget_table: table to record bandwidth budget per microframe
  */
 struct mu3h_sch_ep_info {
        u32 esit;
@@ -109,7 +108,6 @@ struct mu3h_sch_ep_info {
        u32 pkts;
        u32 cs_count;
        u32 burst_mode;
-       u32 bw_budget_table[];
 };
 
 #define MU3C_U3_PORT_MAX 4
index 5c351970cdf1ce7eb161dce7655897e9e6cb4274..d7e0e6ebf0800e8e1e23d35d1cd6cc76bf7ed613 100644 (file)
@@ -59,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI            0x9a13
 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI           0x1138
 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI            0x461e
+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI        0x51ed
 
 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI                  0x1639
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4                        0x43b9
@@ -266,7 +267,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
             pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI ||
             pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI ||
             pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
-            pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI))
+            pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI ||
+            pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI))
                xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
 
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
index d0b6806275e01a92e21fa79e575fc4eb3fea72a7..f9707997969d43ecf52021cfcb4dfb3a90bf035a 100644 (file)
@@ -3141,6 +3141,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
                if (event_loop++ < TRBS_PER_SEGMENT / 2)
                        continue;
                xhci_update_erst_dequeue(xhci, event_ring_deq);
+               event_ring_deq = xhci->event_ring->dequeue;
 
                /* ring is half-full, force isoc trbs to interrupt more often */
                if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN)
index c8af2cd2216d6012baa82cdcaa547851294c288c..996958a6565c3ade1ea02fed5f3dde94dc116a28 100644 (file)
@@ -1034,13 +1034,13 @@ static int tegra_xusb_unpowergate_partitions(struct tegra_xusb *tegra)
        int rc;
 
        if (tegra->use_genpd) {
-               rc = pm_runtime_get_sync(tegra->genpd_dev_ss);
+               rc = pm_runtime_resume_and_get(tegra->genpd_dev_ss);
                if (rc < 0) {
                        dev_err(dev, "failed to enable XUSB SS partition\n");
                        return rc;
                }
 
-               rc = pm_runtime_get_sync(tegra->genpd_dev_host);
+               rc = pm_runtime_resume_and_get(tegra->genpd_dev_host);
                if (rc < 0) {
                        dev_err(dev, "failed to enable XUSB Host partition\n");
                        pm_runtime_put_sync(tegra->genpd_dev_ss);
index 642610c78f58b910d46d65fc3b34a046e383d804..25b87e99b4dd4619380922cee8025ecfed248a5e 100644 (file)
@@ -781,6 +781,17 @@ void xhci_shutdown(struct usb_hcd *hcd)
        if (xhci->quirks & XHCI_SPURIOUS_REBOOT)
                usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev));
 
+       /* Don't poll the roothubs after shutdown. */
+       xhci_dbg(xhci, "%s: stopping usb%d port polling.\n",
+                       __func__, hcd->self.busnum);
+       clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+       del_timer_sync(&hcd->rh_timer);
+
+       if (xhci->shared_hcd) {
+               clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+               del_timer_sync(&xhci->shared_hcd->rh_timer);
+       }
+
        spin_lock_irq(&xhci->lock);
        xhci_halt(xhci);
        /* Workaround for spurious wakeups at shutdown with HSW */
index f929bffdc5d145e05fc473246cb8e4659c40c574..b7f13df007646f58ab14d8a9729d73e88b8bac8b 100644 (file)
@@ -186,16 +186,16 @@ static int eud_probe(struct platform_device *pdev)
 
        chip->dev = &pdev->dev;
 
-       ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip);
-       if (ret)
-               return dev_err_probe(chip->dev, ret,
-                               "failed to add role switch release action\n");
-
        chip->role_sw = usb_role_switch_get(&pdev->dev);
        if (IS_ERR(chip->role_sw))
                return dev_err_probe(chip->dev, PTR_ERR(chip->role_sw),
                                        "failed to get role switch\n");
 
+       ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip);
+       if (ret)
+               return dev_err_probe(chip->dev, ret,
+                               "failed to add role switch release action\n");
+
        chip->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(chip->base))
                return PTR_ERR(chip->base);
index 748139d262633cb1a8bec5c3a5843a2346c6614b..0be8efcda15d5791aafd7cf48e81440afb3744b5 100644 (file)
@@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref)
 
        dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n");
        usb_put_dev(priv->usbdev);
+       priv->usbdev = NULL;
        kfree(priv);
 }
 
@@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf,
        parport_announce_port(pp);
 
        usb_set_intfdata(intf, pp);
-       usb_put_dev(usbdev);
        return 0;
 
 probe_abort:
@@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf)
        usb_set_intfdata(intf, NULL);
        if (pp) {
                priv = pp->private_data;
-               priv->usbdev = NULL;
                priv->pp = NULL;
                dev_dbg(&intf->dev, "parport_remove_port\n");
                parport_remove_port(pp);
index a6b04831b20bf2943974f0bd54bfa5679e5d1ae0..9b8aded3d95e902e044b2dc1bf0bd9639a1e32c4 100644 (file)
@@ -21,10 +21,8 @@ static inline struct ssusb_mtk *otg_sx_to_ssusb(struct otg_switch_mtk *otg_sx)
 
 static void toggle_opstate(struct ssusb_mtk *ssusb)
 {
-       if (!ssusb->otg_switch.is_u3_drd) {
-               mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
-               mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
-       }
+       mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
+       mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
 }
 
 /* only port0 supports dual-role mode */
index 661a229c105ddd171438c71e3ef7f5964cab6d2d..34b9f8140187182f334014125c1f9eb745c627b0 100644 (file)
@@ -268,6 +268,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
                        return -EPROBE_DEFER;
        }
 
+       nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
+       if (PTR_ERR(nop->vbus_draw) == -ENODEV)
+               nop->vbus_draw = NULL;
+       if (IS_ERR(nop->vbus_draw))
+               return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
+                                    "could not get vbus regulator\n");
+
        nop->dev                = dev;
        nop->phy.dev            = nop->dev;
        nop->phy.label          = "nop-xceiv";
index a27f7efcec6a8ff8f9ae6ed670f5c8754fd7e78f..c374620a486f0a53f3772fb34effb11220fe3624 100644 (file)
@@ -194,6 +194,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
        { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */
        { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */
+       { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */
+       { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */
        { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
index e7755d9cfc61a2cc9222ab6d8a8afa54ad087fc0..152ad882657d7dcc23a8b46da1a4aca764451382 100644 (file)
@@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb);
 #define CINTERION_PRODUCT_CLS8                 0x00b0
 #define CINTERION_PRODUCT_MV31_MBIM            0x00b3
 #define CINTERION_PRODUCT_MV31_RMNET           0x00b7
+#define CINTERION_PRODUCT_MV32_WA              0x00f1
+#define CINTERION_PRODUCT_MV32_WB              0x00f2
 
 /* Olivetti products */
 #define OLIVETTI_VENDOR_ID                     0x0b3c
@@ -1217,6 +1219,10 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff),    /* Telit FD980 */
          .driver_info = NCTRL(2) | RSVD(3) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff),    /* Telit FN980 */
+         .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff),    /* Telit FN980 (PCIe) */
+         .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff),    /* Telit LN920 (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff),    /* Telit LN920 (MBIM) */
@@ -1233,6 +1239,8 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff),    /* Telit FN990 (ECM) */
          .driver_info = NCTRL(0) | RSVD(1) },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff),    /* Telit FN990 (PCIe) */
+         .driver_info = RSVD(0) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -1969,6 +1977,10 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
          .driver_info = RSVD(0)},
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
+         .driver_info = RSVD(3)},
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
+         .driver_info = RSVD(3)},
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100),
          .driver_info = RSVD(4) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120),
@@ -2111,10 +2123,14 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(3) },
        { USB_DEVICE(0x1508, 0x1001),                                           /* Fibocom NL668 (IOT version) */
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
+       { USB_DEVICE(0x1782, 0x4d10) },                                         /* Fibocom L610 (AT mode) */
+       { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) },                   /* Fibocom L610 (ECM/RNDIS mode) */
        { USB_DEVICE(0x2cb7, 0x0104),                                           /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                     /* Fibocom NL678 series */
          .driver_info = RSVD(6) },
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) },                   /* Fibocom MA510 (ECM mode w/ diag intf.) */
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) },                   /* Fibocom MA510 (ECM mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) },    /* Fibocom FG150 Diag */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) },          /* Fibocom FG150 AT */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                   /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
index 88b284d61681ae76d0eec83f32d7eca0c53d662d..1d878d05a6584d473ab143fba8d39ce9269baaaa 100644 (file)
@@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) },
+       { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) },
        { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) },
        { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
index c5406452b774ef9dc6cdc986f0aa4ce6ead1b866..732f9b13ad5d59cfbff2edb9c6e4dc266eeb63df 100644 (file)
 #define HP_TD620_PRODUCT_ID    0x0956
 #define HP_LD960_PRODUCT_ID    0x0b39
 #define HP_LD381_PRODUCT_ID    0x0f7f
+#define HP_LM930_PRODUCT_ID    0x0f9b
 #define HP_LCM220_PRODUCT_ID   0x3139
 #define HP_LCM960_PRODUCT_ID   0x3239
 #define HP_LD220_PRODUCT_ID    0x3524
index c18bf8164bc2e9676247f36e672f2c9cea489e30..586ef5551e76e41e5ac0a552429e3979bb03185b 100644 (file)
@@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x1199, 0x9090)},   /* Sierra Wireless EM7565 QDL */
        {DEVICE_SWI(0x1199, 0x9091)},   /* Sierra Wireless EM7565 */
        {DEVICE_SWI(0x1199, 0x90d2)},   /* Sierra Wireless EM9191 QDL */
+       {DEVICE_SWI(0x1199, 0xc080)},   /* Sierra Wireless EM7590 QDL */
+       {DEVICE_SWI(0x1199, 0xc081)},   /* Sierra Wireless EM7590 */
        {DEVICE_SWI(0x413c, 0x81a2)},   /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a3)},   /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a4)},   /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
index da65d14c9ed5e2a8c1851ceac939108fb7b0041c..06aad0d727ddcdd57962f08acbc666d2da9fbff6 100644 (file)
@@ -584,9 +584,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
                switch (command) {
                case WHITEHEAT_GET_DTR_RTS:
                        info = usb_get_serial_port_data(port);
-                       memcpy(&info->mcr, command_info->result_buffer,
-                                       sizeof(struct whiteheat_dr_info));
-                               break;
+                       info->mcr = command_info->result_buffer[0];
+                       break;
                }
        }
 exit:
index 8f921213b17df324bda50a6bb2fc5afdf6331e72..ba24847fb2450a4b07d591185b6b7b3102a36cde 100644 (file)
@@ -56,6 +56,7 @@ config TYPEC_RT1719
        tristate "Richtek RT1719 Sink Only Type-C controller driver"
        depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH
        depends on I2C
+       depends on POWER_SUPPLY
        select REGMAP_I2C
        help
          Say Y or M here if your system has Richtek RT1719 sink only
index e07d26a3cd8e1d5d75bbdb50cd11243137439409..f33e08eb767094cad85b499e0f283cbd803c6848 100644 (file)
@@ -877,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client)
        /* Disable chip interrupts before unregistering port */
        err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0);
        if (err < 0)
-               return err;
+               dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err));
 
        tcpci_unregister_port(chip->tcpci);
 
index f1bd9e09bc87f191962c65b43b904d32ffabe796..8a952eaf9016323aa95af33151e6776d0340c827 100644 (file)
@@ -15,6 +15,9 @@
 
 #include "tcpci.h"
 
+#define MT6360_REG_PHYCTRL1    0x80
+#define MT6360_REG_PHYCTRL3    0x82
+#define MT6360_REG_PHYCTRL7    0x86
 #define MT6360_REG_VCONNCTRL1  0x8C
 #define MT6360_REG_MODECTRL2   0x8F
 #define MT6360_REG_SWRESET     0xA0
@@ -22,6 +25,8 @@
 #define MT6360_REG_DRPCTRL1    0xA2
 #define MT6360_REG_DRPCTRL2    0xA3
 #define MT6360_REG_I2CTORST    0xBF
+#define MT6360_REG_PHYCTRL11   0xCA
+#define MT6360_REG_RXCTRL1     0xCE
 #define MT6360_REG_RXCTRL2     0xCF
 #define MT6360_REG_CTDCTRL2    0xEC
 
@@ -106,6 +111,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata)
        if (ret)
                return ret;
 
+       /* BMC PHY */
+       ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_PHYCTRL3,  0x82);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36);
+       if (ret)
+               return ret;
+
+       ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8);
+       if (ret)
+               return ret;
+
        /* Set shipping mode off, AUTOIDLE on */
        return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A);
 }
index f0c2fa19f3e0f6e9ab4ac0cade47bbce7af8e5db..a6045aef0d04f5d6acf92ea4bc680c66e019f4a2 100644 (file)
@@ -949,6 +949,8 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
             role == TYPEC_HOST))
                goto out_unlock;
 
+       reinit_completion(&con->complete);
+
        command = UCSI_SET_UOR | UCSI_CONNECTOR_NUMBER(con->num);
        command |= UCSI_SET_UOR_ROLE(role);
        command |= UCSI_SET_UOR_ACCEPT_ROLE_SWAPS;
@@ -956,14 +958,18 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
        if (ret < 0)
                goto out_unlock;
 
+       mutex_unlock(&con->lock);
+
        if (!wait_for_completion_timeout(&con->complete,
-                                       msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
-               ret = -ETIMEDOUT;
+                                        msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
+               return -ETIMEDOUT;
+
+       return 0;
 
 out_unlock:
        mutex_unlock(&con->lock);
 
-       return ret < 0 ? ret : 0;
+       return ret;
 }
 
 static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
@@ -985,6 +991,8 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
        if (cur_role == role)
                goto out_unlock;
 
+       reinit_completion(&con->complete);
+
        command = UCSI_SET_PDR | UCSI_CONNECTOR_NUMBER(con->num);
        command |= UCSI_SET_PDR_ROLE(role);
        command |= UCSI_SET_PDR_ACCEPT_ROLE_SWAPS;
@@ -992,11 +1000,13 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
        if (ret < 0)
                goto out_unlock;
 
+       mutex_unlock(&con->lock);
+
        if (!wait_for_completion_timeout(&con->complete,
-                               msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) {
-               ret = -ETIMEDOUT;
-               goto out_unlock;
-       }
+                                        msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
+               return -ETIMEDOUT;
+
+       mutex_lock(&con->lock);
 
        /* Something has gone wrong while swapping the role */
        if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) !=
index 2f4fb09f1e89fd96b35e2700894dcf47698bf88a..e0de44000d92d177a8155174b49061a73b13002f 100644 (file)
@@ -161,8 +161,10 @@ struct mlx5_vdpa_net {
        struct mlx5_flow_handle *rx_rule_mcast;
        bool setup;
        u32 cur_num_vqs;
+       u32 rqt_size;
        struct notifier_block nb;
        struct vdpa_callback config_cb;
+       struct mlx5_vdpa_wq_ent cvq_ent;
 };
 
 static void free_resources(struct mlx5_vdpa_net *ndev);
@@ -203,17 +205,12 @@ static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
        return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
 }
 
-static inline u32 mlx5_vdpa_max_qps(int max_vqs)
-{
-       return max_vqs / 2;
-}
-
 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
 {
        if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
                return 2;
 
-       return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+       return mvdev->max_vqs;
 }
 
 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
@@ -1235,25 +1232,13 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
 static int create_rqt(struct mlx5_vdpa_net *ndev)
 {
        __be32 *list;
-       int max_rqt;
        void *rqtc;
        int inlen;
        void *in;
        int i, j;
        int err;
-       int num;
-
-       if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
-               num = 1;
-       else
-               num = ndev->cur_num_vqs / 2;
-
-       max_rqt = min_t(int, roundup_pow_of_two(num),
-                       1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
-       if (max_rqt < 1)
-               return -EOPNOTSUPP;
 
-       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
        in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -1262,12 +1247,12 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
-       MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
-       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
-               list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
+       for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
+               list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
 
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
        kfree(in);
        if (err)
@@ -1281,19 +1266,13 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
 {
        __be32 *list;
-       int max_rqt;
        void *rqtc;
        int inlen;
        void *in;
        int i, j;
        int err;
 
-       max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2),
-                       1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
-       if (max_rqt < 1)
-               return -EOPNOTSUPP;
-
-       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
        in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -1304,10 +1283,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
 
        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
-       for (i = 0, j = 0; i < max_rqt; i++, j += 2)
+       for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
                list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
 
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
        err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
        kfree(in);
        if (err)
@@ -1624,7 +1603,7 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
 
                newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
                if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
-                   newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
+                   newqps > ndev->rqt_size)
                        break;
 
                if (ndev->cur_num_vqs == 2 * newqps) {
@@ -1658,6 +1637,12 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
        mvdev = wqent->mvdev;
        ndev = to_mlx5_vdpa_ndev(mvdev);
        cvq = &mvdev->cvq;
+
+       mutex_lock(&ndev->reslock);
+
+       if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+               goto out;
+
        if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
                goto out;
 
@@ -1696,9 +1681,13 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
 
                if (vringh_need_notify_iotlb(&cvq->vring))
                        vringh_notify(&cvq->vring);
+
+               queue_work(mvdev->wq, &wqent->work);
+               break;
        }
+
 out:
-       kfree(wqent);
+       mutex_unlock(&ndev->reslock);
 }
 
 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
@@ -1706,7 +1695,6 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        struct mlx5_vdpa_virtqueue *mvq;
-       struct mlx5_vdpa_wq_ent *wqent;
 
        if (!is_index_valid(mvdev, idx))
                return;
@@ -1715,13 +1703,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
                if (!mvdev->wq || !mvdev->cvq.ready)
                        return;
 
-               wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
-               if (!wqent)
-                       return;
-
-               wqent->mvdev = mvdev;
-               INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
-               queue_work(mvdev->wq, &wqent->work);
+               queue_work(mvdev->wq, &ndev->cvq_ent.work);
                return;
        }
 
@@ -1985,7 +1967,7 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
        int err;
        int i;
 
-       for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
+       for (i = 0; i < mvdev->max_vqs; i++) {
                err = setup_vq(ndev, &ndev->vqs[i]);
                if (err)
                        goto err_vq;
@@ -2056,9 +2038,11 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
 
        ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
        if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
-               ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
+               ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
        else
-               ndev->cur_num_vqs = 2;
+               ndev->rqt_size = 1;
+
+       ndev->cur_num_vqs = 2 * ndev->rqt_size;
 
        update_cvq_info(mvdev);
        return err;
@@ -2180,7 +2164,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
                goto err_mr;
 
        if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
-               return 0;
+               goto err_mr;
 
        restore_channels_info(ndev);
        err = setup_driver(mvdev);
@@ -2195,12 +2179,14 @@ err_mr:
        return err;
 }
 
+/* reslock must be held for this function */
 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
 {
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        int err;
 
-       mutex_lock(&ndev->reslock);
+       WARN_ON(!mutex_is_locked(&ndev->reslock));
+
        if (ndev->setup) {
                mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
                err = 0;
@@ -2230,7 +2216,6 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
                goto err_fwd;
        }
        ndev->setup = true;
-       mutex_unlock(&ndev->reslock);
 
        return 0;
 
@@ -2241,23 +2226,23 @@ err_tir:
 err_rqt:
        teardown_virtqueues(ndev);
 out:
-       mutex_unlock(&ndev->reslock);
        return err;
 }
 
+/* reslock must be held for this function */
 static void teardown_driver(struct mlx5_vdpa_net *ndev)
 {
-       mutex_lock(&ndev->reslock);
+
+       WARN_ON(!mutex_is_locked(&ndev->reslock));
+
        if (!ndev->setup)
-               goto out;
+               return;
 
        remove_fwd_to_tir(ndev);
        destroy_tir(ndev);
        destroy_rqt(ndev);
        teardown_virtqueues(ndev);
        ndev->setup = false;
-out:
-       mutex_unlock(&ndev->reslock);
 }
 
 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
@@ -2278,6 +2263,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 
        print_status(mvdev, status, true);
 
+       mutex_lock(&ndev->reslock);
+
        if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
                if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
                        err = setup_driver(mvdev);
@@ -2287,16 +2274,19 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
                        }
                } else {
                        mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
-                       return;
+                       goto err_clear;
                }
        }
 
        ndev->mvdev.status = status;
+       mutex_unlock(&ndev->reslock);
        return;
 
 err_setup:
        mlx5_vdpa_destroy_mr(&ndev->mvdev);
        ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
+err_clear:
+       mutex_unlock(&ndev->reslock);
 }
 
 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
@@ -2306,6 +2296,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 
        print_status(mvdev, 0, true);
        mlx5_vdpa_info(mvdev, "performing device reset\n");
+
+       mutex_lock(&ndev->reslock);
        teardown_driver(ndev);
        clear_vqs_ready(ndev);
        mlx5_vdpa_destroy_mr(&ndev->mvdev);
@@ -2318,6 +2310,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
                if (mlx5_vdpa_create_mr(mvdev, NULL))
                        mlx5_vdpa_warn(mvdev, "create MR failed\n");
        }
+       mutex_unlock(&ndev->reslock);
 
        return 0;
 }
@@ -2353,19 +2346,24 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
 {
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        bool change_map;
        int err;
 
+       mutex_lock(&ndev->reslock);
+
        err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
        if (err) {
                mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
-               return err;
+               goto err;
        }
 
        if (change_map)
-               return mlx5_vdpa_change_map(mvdev, iotlb);
+               err = mlx5_vdpa_change_map(mvdev, iotlb);
 
-       return 0;
+err:
+       mutex_unlock(&ndev->reslock);
+       return err;
 }
 
 static void mlx5_vdpa_free(struct vdpa_device *vdev)
@@ -2511,7 +2509,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
        struct mlx5_vdpa_virtqueue *mvq;
        int i;
 
-       for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
+       for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
                mvq = &ndev->vqs[i];
                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
                mvq->index = i;
@@ -2653,7 +2651,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
                return -EOPNOTSUPP;
        }
 
-       max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
+       max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
+                       1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
        if (max_vqs < 2) {
                dev_warn(mdev->device,
                         "%d virtqueues are supported. At least 2 are required\n",
@@ -2724,7 +2723,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
                ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
        }
 
-       config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
+       config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
        mvdev->vdev.dma_dev = &mdev->pdev->dev;
        err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
        if (err)
@@ -2740,6 +2739,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
        if (err)
                goto err_mr;
 
+       ndev->cvq_ent.mvdev = mvdev;
+       INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
        mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
        if (!mvdev->wq) {
                err = -ENOMEM;
@@ -2749,7 +2750,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
        ndev->nb.notifier_call = event_handler;
        mlx5_notifier_register(mdev, &ndev->nb);
        mvdev->vdev.mdev = &mgtdev->mgtdev;
-       err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1);
+       err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
        if (err)
                goto err_reg;
 
index b7bb16f92ac6282a9965a5aa3b6709b4e382c999..06b6f3594a1316e020dc294a8ac102d009504e31 100644 (file)
@@ -36,6 +36,10 @@ static bool nointxmask;
 static bool disable_vga;
 static bool disable_idle_d3;
 
+/* List of PF's that vfio_pci_core_sriov_configure() has been called on */
+static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
+static LIST_HEAD(vfio_pci_sriov_pfs);
+
 static inline bool vfio_vga_disabled(void)
 {
 #ifdef CONFIG_VFIO_PCI_VGA
@@ -434,47 +438,17 @@ out:
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
 
-static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev)
-{
-       struct pci_dev *physfn = pci_physfn(vdev->pdev);
-       struct vfio_device *pf_dev;
-
-       if (!vdev->pdev->is_virtfn)
-               return NULL;
-
-       pf_dev = vfio_device_get_from_dev(&physfn->dev);
-       if (!pf_dev)
-               return NULL;
-
-       if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
-               vfio_device_put(pf_dev);
-               return NULL;
-       }
-
-       return container_of(pf_dev, struct vfio_pci_core_device, vdev);
-}
-
-static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val)
-{
-       struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
-
-       if (!pf_vdev)
-               return;
-
-       mutex_lock(&pf_vdev->vf_token->lock);
-       pf_vdev->vf_token->users += val;
-       WARN_ON(pf_vdev->vf_token->users < 0);
-       mutex_unlock(&pf_vdev->vf_token->lock);
-
-       vfio_device_put(&pf_vdev->vdev);
-}
-
 void vfio_pci_core_close_device(struct vfio_device *core_vdev)
 {
        struct vfio_pci_core_device *vdev =
                container_of(core_vdev, struct vfio_pci_core_device, vdev);
 
-       vfio_pci_vf_token_user_add(vdev, -1);
+       if (vdev->sriov_pf_core_dev) {
+               mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
+               WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users);
+               vdev->sriov_pf_core_dev->vf_token->users--;
+               mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
+       }
        vfio_spapr_pci_eeh_release(vdev->pdev);
        vfio_pci_core_disable(vdev);
 
@@ -495,7 +469,12 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
 {
        vfio_pci_probe_mmaps(vdev);
        vfio_spapr_pci_eeh_open(vdev->pdev);
-       vfio_pci_vf_token_user_add(vdev, 1);
+
+       if (vdev->sriov_pf_core_dev) {
+               mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
+               vdev->sriov_pf_core_dev->vf_token->users++;
+               mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
+       }
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
 
@@ -1583,11 +1562,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
         *
         * If the VF token is provided but unused, an error is generated.
         */
-       if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
-               return 0; /* No VF token provided or required */
-
        if (vdev->pdev->is_virtfn) {
-               struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
+               struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev;
                bool match;
 
                if (!pf_vdev) {
@@ -1600,7 +1576,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
                }
 
                if (!vf_token) {
-                       vfio_device_put(&pf_vdev->vdev);
                        pci_info_ratelimited(vdev->pdev,
                                "VF token required to access device\n");
                        return -EACCES;
@@ -1610,8 +1585,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
                match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
                mutex_unlock(&pf_vdev->vf_token->lock);
 
-               vfio_device_put(&pf_vdev->vdev);
-
                if (!match) {
                        pci_info_ratelimited(vdev->pdev,
                                "Incorrect VF token provided for device\n");
@@ -1732,8 +1705,30 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
 static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)
 {
        struct pci_dev *pdev = vdev->pdev;
+       struct vfio_pci_core_device *cur;
+       struct pci_dev *physfn;
        int ret;
 
+       if (pdev->is_virtfn) {
+               /*
+                * If this VF was created by our vfio_pci_core_sriov_configure()
+                * then we can find the PF vfio_pci_core_device now, and due to
+                * the locking in pci_disable_sriov() it cannot change until
+                * this VF device driver is removed.
+                */
+               physfn = pci_physfn(vdev->pdev);
+               mutex_lock(&vfio_pci_sriov_pfs_mutex);
+               list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) {
+                       if (cur->pdev == physfn) {
+                               vdev->sriov_pf_core_dev = cur;
+                               break;
+                       }
+               }
+               mutex_unlock(&vfio_pci_sriov_pfs_mutex);
+               return 0;
+       }
+
+       /* Not a SRIOV PF */
        if (!pdev->is_physfn)
                return 0;
 
@@ -1805,6 +1800,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
        INIT_LIST_HEAD(&vdev->ioeventfds_list);
        mutex_init(&vdev->vma_lock);
        INIT_LIST_HEAD(&vdev->vma_list);
+       INIT_LIST_HEAD(&vdev->sriov_pfs_item);
        init_rwsem(&vdev->memory_lock);
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
@@ -1896,7 +1892,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
 {
        struct pci_dev *pdev = vdev->pdev;
 
-       pci_disable_sriov(pdev);
+       vfio_pci_core_sriov_configure(pdev, 0);
 
        vfio_unregister_group_dev(&vdev->vdev);
 
@@ -1935,21 +1931,49 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected);
 
 int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
 {
+       struct vfio_pci_core_device *vdev;
        struct vfio_device *device;
        int ret = 0;
 
+       device_lock_assert(&pdev->dev);
+
        device = vfio_device_get_from_dev(&pdev->dev);
        if (!device)
                return -ENODEV;
 
-       if (nr_virtfn == 0)
-               pci_disable_sriov(pdev);
-       else
+       vdev = container_of(device, struct vfio_pci_core_device, vdev);
+
+       if (nr_virtfn) {
+               mutex_lock(&vfio_pci_sriov_pfs_mutex);
+               /*
+                * The thread that adds the vdev to the list is the only thread
+                * that gets to call pci_enable_sriov() and we will only allow
+                * it to be called once without going through
+                * pci_disable_sriov()
+                */
+               if (!list_empty(&vdev->sriov_pfs_item)) {
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+               list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
+               mutex_unlock(&vfio_pci_sriov_pfs_mutex);
                ret = pci_enable_sriov(pdev, nr_virtfn);
+               if (ret)
+                       goto out_del;
+               ret = nr_virtfn;
+               goto out_put;
+       }
 
-       vfio_device_put(device);
+       pci_disable_sriov(pdev);
 
-       return ret < 0 ? ret : nr_virtfn;
+out_del:
+       mutex_lock(&vfio_pci_sriov_pfs_mutex);
+       list_del_init(&vdev->sriov_pfs_item);
+out_unlock:
+       mutex_unlock(&vfio_pci_sriov_pfs_mutex);
+out_put:
+       vfio_device_put(device);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
 
index 792ab5f2364713117914e8e42ccbcb326a194d08..297b5db474545e37619181549618c845a19ba51a 100644 (file)
@@ -1450,13 +1450,9 @@ err:
        return ERR_PTR(r);
 }
 
-static struct ptr_ring *get_tap_ptr_ring(int fd)
+static struct ptr_ring *get_tap_ptr_ring(struct file *file)
 {
        struct ptr_ring *ring;
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
        ring = tun_get_tx_ring(file);
        if (!IS_ERR(ring))
                goto out;
@@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
                goto out;
        ring = NULL;
 out:
-       fput(file);
        return ring;
 }
 
@@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
                r = vhost_net_enable_vq(n, vq);
                if (r)
                        goto err_used;
-               if (index == VHOST_NET_VQ_RX)
-                       nvq->rx_ring = get_tap_ptr_ring(fd);
+               if (index == VHOST_NET_VQ_RX) {
+                       if (sock)
+                               nvq->rx_ring = get_tap_ptr_ring(sock->file);
+                       else
+                               nvq->rx_ring = NULL;
+               }
 
                oldubufs = nvq->ubufs;
                nvq->ubufs = ubufs;
index edf169d0816e6290f823729cad218642debb5b2b..eb3e47c58c5f7f8c7e1b4a698577d2938327e314 100644 (file)
@@ -566,6 +566,9 @@ static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
        int rv, mem, step;
 
+       if (!var->pixclock)
+               return -EINVAL;
+
        /* Find appropriate format */
        rv = svga_match_format (arkfb_formats, var, NULL);
        if (rv < 0)
index 6ff16d3132e5f1a16af0459c1701ce8d8c18ed05..b26c81233b6b7c4ff3a570435486cb4c353611fd 100644 (file)
@@ -68,7 +68,6 @@
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
 #include <asm/pmac_feature.h>
-#include <asm/prom.h>
 #include "../macmodes.h"
 #endif
 
index 1aef3d6ebd8809f5134381ab2d5b9878141a4c0d..a3e6faed7745a4b2c5ab877b152ecce97a777091 100644 (file)
@@ -79,7 +79,6 @@
 
 #ifdef __powerpc__
 #include <asm/machdep.h>
-#include <asm/prom.h>
 #include "../macmodes.h"
 #endif
 #ifdef __sparc__
index b5fbd5329652865b573f85fa9b71f190490a6502..97a5972f5b1fb8a0f36228e7e1d2f56baf96d074 100644 (file)
@@ -22,7 +22,6 @@
 
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
-#include <asm/prom.h>
 #include <asm/pmac_feature.h>
 #endif
 
index 93f403cbb41507963a34f293fbd564f3d5694fe8..91d81b5762313ee258b088659c91d6e288227d89 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <asm/io.h>
 
-#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
+#ifdef CONFIG_SPARC
 #include <asm/prom.h>
 #endif
 
index c5d15c6db28767d04dd7a0d3ab55fbebd78adabb..771ce1f769515554ee65604cd63e5fc66410916d 100644 (file)
@@ -268,8 +268,7 @@ static int clps711x_fb_probe(struct platform_device *pdev)
                goto out_fb_release;
        }
 
-       cfb->syscon =
-               syscon_regmap_lookup_by_compatible("cirrus,ep7209-syscon1");
+       cfb->syscon = syscon_regmap_lookup_by_phandle(np, "syscon");
        if (IS_ERR(cfb->syscon)) {
                ret = PTR_ERR(cfb->syscon);
                goto out_fb_release;
index bd59e7b11ed5305d922b42d33d0dd01c98edb09f..aba46118b208be14b26ea6b8d1b5e7c4cc26b69e 100644 (file)
@@ -47,9 +47,6 @@
 #include <linux/nvram.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
-#ifdef CONFIG_PPC_PMAC
-#include <asm/prom.h>
-#endif
 #ifdef CONFIG_BOOTX_TEXT
 #include <asm/btext.h>
 #endif
index 34d6bb1bf82eed3717eb78a3997e95408e2eb4bd..a6bb0e4382167e121c49bfeb0445197eab5ecea4 100644 (file)
@@ -1579,7 +1579,14 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
                         * If it's not a platform device, at least print a warning. A
                         * fix would add code to remove the device from the system.
                         */
-                       if (dev_is_platform(device)) {
+                       if (!device) {
+                               /* TODO: Represent each OF framebuffer as its own
+                                * device in the device hierarchy. For now, offb
+                                * doesn't have such a device, so unregister the
+                                * framebuffer as before without warning.
+                                */
+                               do_unregister_framebuffer(registered_fb[i]);
+                       } else if (dev_is_platform(device)) {
                                registered_fb[i]->forced_out = true;
                                platform_device_unregister(to_platform_device(device));
                        } else {
index 26892940c21369bb3f62eb1ae863960f7d348d45..82e31a2d845e1925eca50a26c25d474001a57c18 100644 (file)
@@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info)
 {
        if (!info)
                return;
+
+       if (WARN_ON(refcount_read(&info->count)))
+               return;
+
        kfree(info->apertures);
        kfree(info);
 }
index ea42ba6445b2ddaad7cd1a699ff793dec1bceb4c..b3d5f884c5445b68759b8f18bc7ef8047e10ec30 100644 (file)
@@ -243,6 +243,10 @@ error:
 static inline void efifb_show_boot_graphics(struct fb_info *info) {}
 #endif
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void efifb_destroy(struct fb_info *info)
 {
        if (efifb_pci_dev)
@@ -254,10 +258,13 @@ static void efifb_destroy(struct fb_info *info)
                else
                        memunmap(info->screen_base);
        }
+
        if (request_mem_succeeded)
                release_mem_region(info->apertures->ranges[0].base,
                                   info->apertures->ranges[0].size);
        fb_dealloc_cmap(&info->cmap);
+
+       framebuffer_release(info);
 }
 
 static const struct fb_ops efifb_ops = {
@@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* efifb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
        sysfs_remove_groups(&pdev->dev.kobj, efifb_groups);
-       framebuffer_release(info);
 
        return 0;
 }
index 52cce0db8bd344a87d01ed0d9433916e955d9555..09dd85553d4f3cf95884eb81851555638162a2a0 100644 (file)
@@ -657,6 +657,9 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var,
 
 static int i740fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
+       if (!var->pixclock)
+               return -EINVAL;
+
        switch (var->bits_per_pixel) {
        case 8:
                var->red.offset = var->green.offset = var->blue.offset = 0;
@@ -740,7 +743,7 @@ static int i740fb_set_par(struct fb_info *info)
        if (i)
                return i;
 
-       memset(info->screen_base, 0, info->screen_size);
+       memset_io(info->screen_base, 0, info->screen_size);
 
        vga_protect(par);
 
index 68288756ffff376fec44c592ef0a913c58e30034..a2f644c97f28026db0bde451e300aa1bf8840001 100644 (file)
@@ -925,10 +925,12 @@ static int imxfb_probe(struct platform_device *pdev)
                                sizeof(struct imx_fb_videomode), GFP_KERNEL);
                if (!fbi->mode) {
                        ret = -ENOMEM;
+                       of_node_put(display_np);
                        goto failed_of_parse;
                }
 
                ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode);
+               of_node_put(display_np);
                if (ret)
                        goto failed_of_parse;
        }
index 25801e8e3f74a9ff0ba1cbd59913dcab6f76e3c4..d57772f96ad26718eb01c0ab83dd6f870364c889 100644 (file)
@@ -494,6 +494,8 @@ static int kyrofb_set_par(struct fb_info *info)
                                    info->var.hsync_len +
                                    info->var.left_margin)) / 1000;
 
+       if (!lineclock)
+               return -EINVAL;
 
        /* time for a frame in ns (precision in 32bpp) */
        frameclock = lineclock * (info->var.yres +
index 759dee996af1b0cc3ad4a94b6b174bde5c33c68a..958be6805f876deecd461a42b0d86982f151054f 100644 (file)
@@ -47,7 +47,6 @@
 #include <asm/unaligned.h>
 
 #if defined(CONFIG_PPC_PMAC)
-#include <asm/prom.h>
 #include "../macmodes.h"
 #endif
 
index 63721337a37787b74dfcb61998f42fe8686c8e1b..a7508f5be343a654e615b45b17308ac888047656 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #if defined(CONFIG_OF)
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #endif
 #include "mb862xxfb.h"
index 154127256a2c17a8621469b38dbda60584320238..03707461eced633b65effb9ed108c9a8e5f17834 100644 (file)
@@ -127,19 +127,18 @@ EXPORT_SYMBOL_GPL(mmp_unregister_panel);
  */
 struct mmp_path *mmp_get_path(const char *name)
 {
-       struct mmp_path *path;
-       int found = 0;
+       struct mmp_path *path = NULL, *iter;
 
        mutex_lock(&disp_lock);
-       list_for_each_entry(path, &path_list, node) {
-               if (!strcmp(name, path->name)) {
-                       found = 1;
+       list_for_each_entry(iter, &path_list, node) {
+               if (!strcmp(name, iter->name)) {
+                       path = iter;
                        break;
                }
        }
        mutex_unlock(&disp_lock);
 
-       return found ? path : NULL;
+       return path;
 }
 EXPORT_SYMBOL_GPL(mmp_get_path);
 
index 966df2a0736068fdd3e1e786d03fb51087e6184d..28d32cbf496b5554416ed5bfef95f05cfa55e9ee 100644 (file)
@@ -585,7 +585,7 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
        DBG("neofb_check_var");
 
-       if (var->pixclock && PICOS2KHZ(var->pixclock) > par->maxClock)
+       if (!var->pixclock || PICOS2KHZ(var->pixclock) > par->maxClock)
                return -EINVAL;
 
        /* Is the mode larger than the LCD panel? */
index b191bef22d9845453b1438454087abf84daa61b9..9d9fe5c3a7a1ac41ebd8d39ab1eb7979a81894b7 100644 (file)
@@ -964,7 +964,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
        if ((r = calc_extif_timings(ext_clk, &extif_mem_div)) < 0)
                goto err3;
        hwa742.extif->set_timings(&hwa742.reg_timings);
-       clk_enable(hwa742.sys_ck);
+       clk_prepare_enable(hwa742.sys_ck);
 
        calc_hwa742_clk_rates(ext_clk, &sys_clk, &pix_clk);
        if ((r = calc_extif_timings(sys_clk, &extif_mem_div)) < 0)
@@ -1023,7 +1023,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
 
        return 0;
 err4:
-       clk_disable(hwa742.sys_ck);
+       clk_disable_unprepare(hwa742.sys_ck);
 err3:
        hwa742.extif->cleanup();
 err2:
@@ -1037,7 +1037,7 @@ static void hwa742_cleanup(void)
        hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED);
        hwa742.extif->cleanup();
        hwa742.int_ctrl->cleanup();
-       clk_disable(hwa742.sys_ck);
+       clk_disable_unprepare(hwa742.sys_ck);
 }
 
 struct lcd_ctrl hwa742_ctrl = {
index 7317c9aad677232545e75b7dbc78ad648c674fcb..97d20dc0d1d0276ea704ad27e2454a38250ae085 100644 (file)
@@ -711,7 +711,7 @@ static int omap_lcdc_init(struct omapfb_device *fbdev, int ext_mode,
                dev_err(fbdev->dev, "failed to adjust LCD rate\n");
                goto fail1;
        }
-       clk_enable(lcdc.lcd_ck);
+       clk_prepare_enable(lcdc.lcd_ck);
 
        r = request_irq(OMAP_LCDC_IRQ, lcdc_irq_handler, 0, MODULE_NAME, fbdev);
        if (r) {
@@ -746,7 +746,7 @@ fail4:
 fail3:
        free_irq(OMAP_LCDC_IRQ, lcdc.fbdev);
 fail2:
-       clk_disable(lcdc.lcd_ck);
+       clk_disable_unprepare(lcdc.lcd_ck);
 fail1:
        clk_put(lcdc.lcd_ck);
 fail0:
@@ -760,7 +760,7 @@ static void omap_lcdc_cleanup(void)
        free_fbmem();
        omap_free_lcd_dma();
        free_irq(OMAP_LCDC_IRQ, lcdc.fbdev);
-       clk_disable(lcdc.lcd_ck);
+       clk_disable_unprepare(lcdc.lcd_ck);
        clk_put(lcdc.lcd_ck);
 }
 
index 80ac67f27f0da11223183efb1b87d5df8d965622..b9cb8b386627650720b08d09f7edb56c2634b161 100644 (file)
@@ -598,7 +598,7 @@ static int sossi_init(struct omapfb_device *fbdev)
        l &= ~CONF_SOSSI_RESET_R;
        omap_writel(l, MOD_CONF_CTRL_1);
 
-       clk_enable(sossi.fck);
+       clk_prepare_enable(sossi.fck);
        l = omap_readl(ARM_IDLECT2);
        l &= ~(1 << 8);                 /* DMACK_REQ */
        omap_writel(l, ARM_IDLECT2);
@@ -649,7 +649,7 @@ static int sossi_init(struct omapfb_device *fbdev)
        return 0;
 
 err:
-       clk_disable(sossi.fck);
+       clk_disable_unprepare(sossi.fck);
        clk_put(sossi.fck);
        return r;
 }
@@ -657,6 +657,7 @@ err:
 static void sossi_cleanup(void)
 {
        omap_lcdc_free_dma_callback();
+       clk_unprepare(sossi.fck);
        clk_put(sossi.fck);
        iounmap(sossi.base);
 }
index ce413a9df06e4be8ca0b6b1915e5548047c616e5..5b9e26ea6449484ddccdb407760815d1a4574b4c 100644 (file)
@@ -30,9 +30,9 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/nvram.h>
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <asm/prom.h>
 
 #include "macmodes.h"
 #include "platinumfb.h"
index c68725eebee3bd0cc1c6fa502d9b694a3668f74b..d3be2c64f1c08dce2b1c10eb0be7ee397e030bff 100644 (file)
@@ -1504,9 +1504,7 @@ static const struct fb_ops pm2fb_ops = {
 
 
 /**
- * Device initialisation
- *
- * Initialise and allocate resource for PCI device.
+ * pm2fb_probe - Initialise and allocate resource for PCI device.
  *
  * @pdev:      PCI device.
  * @id:                PCI device ID.
@@ -1711,9 +1709,7 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 /**
- * Device removal.
- *
- * Release all device resources.
+ * pm2fb_remove - Release all device resources.
  *
  * @pdev:      PCI device to clean up.
  */
index f1551e00eb12f149e94857d94d1c4747a2d0adb8..8ad91c251fe6a171bbf455d1277c8b664b033b37 100644 (file)
@@ -2256,10 +2256,10 @@ static int pxafb_probe(struct platform_device *dev)
                        goto failed;
                for (i = 0; i < inf->num_modes; i++)
                        inf->modes[i] = pdata->modes[i];
+       } else {
+               inf = of_pxafb_of_mach_info(&dev->dev);
        }
 
-       if (!pdata)
-               inf = of_pxafb_of_mach_info(&dev->dev);
        if (IS_ERR_OR_NULL(inf))
                goto failed;
 
index 5c74253e7b2c03222febf7f46ae0e4de810db6f1..b93c8eb0233692e1c5f096bfa9d7c197227c667d 100644 (file)
@@ -549,6 +549,9 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
        int rv, mem, step;
        u16 m, n, r;
 
+       if (!var->pixclock)
+               return -EINVAL;
+
        /* Find appropriate format */
        rv = svga_match_format (s3fb_formats, var, NULL);
 
index aa4ebe3192ec973512279389911c550d4a648296..9a4417430b4e948f1f82e60ea58cb733225d379f 100644 (file)
@@ -531,9 +531,6 @@ static void sh_mobile_lcdc_display_off(struct sh_mobile_lcdc_chan *ch)
                ch->tx_dev->ops->display_off(ch->tx_dev);
 }
 
-static int sh_mobile_lcdc_check_var(struct fb_var_screeninfo *var,
-                                   struct fb_info *info);
-
 /* -----------------------------------------------------------------------------
  * Format helpers
  */
index 94fc9c6d04113661e57a7f5f7b43f88f17beb5b8..2c198561c338f74fbb2e34a85aa22f70b36faa3b 100644 (file)
@@ -84,6 +84,10 @@ struct simplefb_par {
 static void simplefb_clocks_destroy(struct simplefb_par *par);
 static void simplefb_regulators_destroy(struct simplefb_par *par);
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void simplefb_destroy(struct fb_info *info)
 {
        struct simplefb_par *par = info->par;
@@ -94,6 +98,8 @@ static void simplefb_destroy(struct fb_info *info)
        if (info->screen_base)
                iounmap(info->screen_base);
 
+       framebuffer_release(info);
+
        if (mem)
                release_mem_region(mem->start, resource_size(mem));
 }
@@ -545,8 +551,8 @@ static int simplefb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* simplefb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
-       framebuffer_release(info);
 
        return 0;
 }
index 742f62986b80b9ea009b5c316ce716c469f24cbc..f28fd69d5eb75919920465ae533ce532633dd7cf 100644 (file)
@@ -4463,7 +4463,7 @@ static void sisfb_post_sis300(struct pci_dev *pdev)
                SiS_SetReg(SISCR, 0x37, 0x02);
                SiS_SetReg(SISPART2, 0x00, 0x1c);
                v4 = 0x00; v5 = 0x00; v6 = 0x10;
-               if(ivideo->SiS_Pr.UseROM) {
+               if (ivideo->SiS_Pr.UseROM && bios) {
                        v4 = bios[0xf5];
                        v5 = bios[0xf6];
                        v6 = bios[0xf7];
index 4d20cb557ff0f6e3f2bf959368828818ad2804c1..319131bd72cffa11a965f69588102012942d40fd 100644 (file)
@@ -996,6 +996,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
        int ramdac = 230000; /* 230MHz for most 3D chips */
        debug("enter\n");
 
+       if (!var->pixclock)
+               return -EINVAL;
+
        /* check color depth */
        if (bpp == 24)
                bpp = var->bits_per_pixel = 32;
index b6ec0b8e2b7252d4062e0c7a3f1727f74eb550eb..d280733f283b1250a4902caf71a3dbf8b7363561 100644 (file)
@@ -1650,8 +1650,9 @@ static int dlfb_usb_probe(struct usb_interface *intf,
        const struct device_attribute *attr;
        struct dlfb_data *dlfb;
        struct fb_info *info;
-       int retval = -ENOMEM;
+       int retval;
        struct usb_device *usbdev = interface_to_usbdev(intf);
+       struct usb_endpoint_descriptor *out;
 
        /* usb initialization */
        dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL);
@@ -1665,6 +1666,12 @@ static int dlfb_usb_probe(struct usb_interface *intf,
        dlfb->udev = usb_get_dev(usbdev);
        usb_set_intfdata(intf, dlfb);
 
+       retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL);
+       if (retval) {
+               dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n");
+               goto error;
+       }
+
        dev_dbg(&intf->dev, "console enable=%d\n", console);
        dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio);
        dev_dbg(&intf->dev, "shadow enable=%d\n", shadow);
@@ -1674,6 +1681,7 @@ static int dlfb_usb_probe(struct usb_interface *intf,
        if (!dlfb_parse_vendor_descriptor(dlfb, intf)) {
                dev_err(&intf->dev,
                        "firmware not recognized, incompatible device?\n");
+               retval = -ENODEV;
                goto error;
        }
 
@@ -1687,8 +1695,10 @@ static int dlfb_usb_probe(struct usb_interface *intf,
 
        /* allocates framebuffer driver structure, not framebuffer memory */
        info = framebuffer_alloc(0, &dlfb->udev->dev);
-       if (!info)
+       if (!info) {
+               retval = -ENOMEM;
                goto error;
+       }
 
        dlfb->info = info;
        info->par = dlfb;
index 8425afe37d7c08890491eb72017fa32b88f71feb..a6c9d4f26669564655270315402871b25601c4ed 100644 (file)
 #include <linux/nvram.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
+#include <linux/of_address.h>
 #ifdef CONFIG_MAC
 #include <asm/macintosh.h>
-#else
-#include <asm/prom.h>
 #endif
 
 #include "macmodes.h"
index df6de5a9dd4cd9981ac11cc70e38e4309dde7cc0..e25e8de5ff672de983837762f39ef9f8034a7a6f 100644 (file)
@@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
        return err;
 }
 
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
 static void vesafb_destroy(struct fb_info *info)
 {
        struct vesafb_par *par = info->par;
@@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info)
        if (info->screen_base)
                iounmap(info->screen_base);
        release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+
+       framebuffer_release(info);
 }
 
 static struct fb_ops vesafb_ops = {
@@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev)
 {
        struct fb_info *info = platform_get_drvdata(pdev);
 
+       /* vesafb_destroy takes care of info cleanup */
        unregister_framebuffer(info);
        if (((struct vesafb_par *)(info->par))->region)
                release_region(0x3c0, 32);
-       framebuffer_release(info);
 
        return 0;
 }
index 7a959e5ba90b83b9a3cf7a3f844274f094019b6b..a92a8c670cf0f28ffda9c391578bd5d51ba69ee0 100644 (file)
@@ -321,6 +321,9 @@ static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf
 {
        int rv, mem, step;
 
+       if (!var->pixclock)
+               return -EINVAL;
+
        /* Find appropriate format */
        rv = svga_match_format (vt8623fb_formats, var, NULL);
        if (rv < 0)
index f93b6abbe258193a64e2bfaada6d6554196b2e22..bebd371c6b93eaea9f93239854d653fffbedff1c 100644 (file)
@@ -199,7 +199,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np)
                struct display_timing *dt;
                int r;
 
-               dt = kzalloc(sizeof(*dt), GFP_KERNEL);
+               dt = kmalloc(sizeof(*dt), GFP_KERNEL);
                if (!dt) {
                        pr_err("%pOF: could not allocate display_timing struct\n",
                                np);
index 121b9293c7375f963f8f85cab6086c2e5688628e..87ef258cec64839802079305a79bcd167f2c0df3 100644 (file)
@@ -47,4 +47,9 @@ source "drivers/virt/vboxguest/Kconfig"
 source "drivers/virt/nitro_enclaves/Kconfig"
 
 source "drivers/virt/acrn/Kconfig"
+
+source "drivers/virt/coco/efi_secret/Kconfig"
+
+source "drivers/virt/coco/sev-guest/Kconfig"
+
 endif
index 108d0ffcc9aa98ea5de1f6095f6cf6f41b9f007e..093674e05c40f2b5ba5fd9629e8c6a306d678cd1 100644 (file)
@@ -9,3 +9,5 @@ obj-y                           += vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)   += nitro_enclaves/
 obj-$(CONFIG_ACRN_HSM)         += acrn/
+obj-$(CONFIG_EFI_SECRET)       += coco/efi_secret/
+obj-$(CONFIG_SEV_GUEST)                += coco/sev-guest/
diff --git a/drivers/virt/coco/efi_secret/Kconfig b/drivers/virt/coco/efi_secret/Kconfig
new file mode 100644 (file)
index 0000000..4404d19
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config EFI_SECRET
+       tristate "EFI secret area securityfs support"
+       depends on EFI && X86_64
+       select EFI_COCO_SECRET
+       select SECURITYFS
+       help
+         This is a driver for accessing the EFI secret area via securityfs.
+         The EFI secret area is a memory area designated by the firmware for
+         confidential computing secret injection (for example for AMD SEV
+         guests).  The driver exposes the secrets as files in
+         <securityfs>/secrets/coco.  Files can be read and deleted (deleting
+         a file wipes the secret from memory).
+
+         To compile this driver as a module, choose M here.
+         The module will be called efi_secret.
diff --git a/drivers/virt/coco/efi_secret/Makefile b/drivers/virt/coco/efi_secret/Makefile
new file mode 100644 (file)
index 0000000..c7047ce
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_EFI_SECRET) += efi_secret.o
diff --git a/drivers/virt/coco/efi_secret/efi_secret.c b/drivers/virt/coco/efi_secret/efi_secret.c
new file mode 100644 (file)
index 0000000..e700a5e
--- /dev/null
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * efi_secret module
+ *
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Dov Murik <dovmurik@linux.ibm.com>
+ */
+
+/**
+ * DOC: efi_secret: Allow reading EFI confidential computing (coco) secret area
+ * via securityfs interface.
+ *
+ * When the module is loaded (and securityfs is mounted, typically under
+ * /sys/kernel/security), a "secrets/coco" directory is created in securityfs.
+ * In it, a file is created for each secret entry.  The name of each such file
+ * is the GUID of the secret entry, and its content is the secret data.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/security.h>
+#include <linux/efi.h>
+#include <linux/cacheflush.h>
+
+#define EFI_SECRET_NUM_FILES 64
+
+struct efi_secret {
+       struct dentry *secrets_dir;
+       struct dentry *fs_dir;
+       struct dentry *fs_files[EFI_SECRET_NUM_FILES];
+       void __iomem *secret_data;
+       u64 secret_data_len;
+};
+
+/*
+ * Structure of the EFI secret area
+ *
+ * Offset   Length
+ * (bytes)  (bytes)  Usage
+ * -------  -------  -----
+ *       0       16  Secret table header GUID (must be 1e74f542-71dd-4d66-963e-ef4287ff173b)
+ *      16        4  Length of bytes of the entire secret area
+ *
+ *      20       16  First secret entry's GUID
+ *      36        4  First secret entry's length in bytes (= 16 + 4 + x)
+ *      40        x  First secret entry's data
+ *
+ *    40+x       16  Second secret entry's GUID
+ *    56+x        4  Second secret entry's length in bytes (= 16 + 4 + y)
+ *    60+x        y  Second secret entry's data
+ *
+ * (... and so on for additional entries)
+ *
+ * The GUID of each secret entry designates the usage of the secret data.
+ */
+
+/**
+ * struct secret_header - Header of entire secret area; this should be followed
+ * by instances of struct secret_entry.
+ * @guid:      Must be EFI_SECRET_TABLE_HEADER_GUID
+ * @len:       Length in bytes of entire secret area, including header
+ */
+struct secret_header {
+       efi_guid_t guid;
+       u32 len;
+} __attribute((packed));
+
+/**
+ * struct secret_entry - Holds one secret entry
+ * @guid:      Secret-specific GUID (or NULL_GUID if this secret entry was deleted)
+ * @len:       Length of secret entry, including its guid and len fields
+ * @data:      The secret data (full of zeros if this secret entry was deleted)
+ */
+struct secret_entry {
+       efi_guid_t guid;
+       u32 len;
+       u8 data[];
+} __attribute((packed));
+
+static size_t secret_entry_data_len(struct secret_entry *e)
+{
+       return e->len - sizeof(*e);
+}
+
+static struct efi_secret the_efi_secret;
+
+static inline struct efi_secret *efi_secret_get(void)
+{
+       return &the_efi_secret;
+}
+
+static int efi_secret_bin_file_show(struct seq_file *file, void *data)
+{
+       struct secret_entry *e = file->private;
+
+       if (e)
+               seq_write(file, e->data, secret_entry_data_len(e));
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(efi_secret_bin_file);
+
+/*
+ * Overwrite memory content with zeroes, and ensure that dirty cache lines are
+ * actually written back to memory, to clear out the secret.
+ */
+static void wipe_memory(void *addr, size_t size)
+{
+       memzero_explicit(addr, size);
+#ifdef CONFIG_X86
+       clflush_cache_range(addr, size);
+#endif
+}
+
+static int efi_secret_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct efi_secret *s = efi_secret_get();
+       struct inode *inode = d_inode(dentry);
+       struct secret_entry *e = (struct secret_entry *)inode->i_private;
+       int i;
+
+       if (e) {
+               /* Zero out the secret data */
+               wipe_memory(e->data, secret_entry_data_len(e));
+               e->guid = NULL_GUID;
+       }
+
+       inode->i_private = NULL;
+
+       for (i = 0; i < EFI_SECRET_NUM_FILES; i++)
+               if (s->fs_files[i] == dentry)
+                       s->fs_files[i] = NULL;
+
+       /*
+        * securityfs_remove tries to lock the directory's inode, but we reach
+        * the unlink callback when it's already locked
+        */
+       inode_unlock(dir);
+       securityfs_remove(dentry);
+       inode_lock(dir);
+
+       return 0;
+}
+
+static const struct inode_operations efi_secret_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .unlink         = efi_secret_unlink,
+};
+
+static int efi_secret_map_area(struct platform_device *dev)
+{
+       int ret;
+       struct efi_secret *s = efi_secret_get();
+       struct linux_efi_coco_secret_area *secret_area;
+
+       if (efi.coco_secret == EFI_INVALID_TABLE_ADDR) {
+               dev_err(&dev->dev, "Secret area address is not available\n");
+               return -EINVAL;
+       }
+
+       secret_area = memremap(efi.coco_secret, sizeof(*secret_area), MEMREMAP_WB);
+       if (secret_area == NULL) {
+               dev_err(&dev->dev, "Could not map secret area EFI config entry\n");
+               return -ENOMEM;
+       }
+       if (!secret_area->base_pa || secret_area->size < sizeof(struct secret_header)) {
+               dev_err(&dev->dev,
+                       "Invalid secret area memory location (base_pa=0x%llx size=0x%llx)\n",
+                       secret_area->base_pa, secret_area->size);
+               ret = -EINVAL;
+               goto unmap;
+       }
+
+       s->secret_data = ioremap_encrypted(secret_area->base_pa, secret_area->size);
+       if (s->secret_data == NULL) {
+               dev_err(&dev->dev, "Could not map secret area\n");
+               ret = -ENOMEM;
+               goto unmap;
+       }
+
+       s->secret_data_len = secret_area->size;
+       ret = 0;
+
+unmap:
+       memunmap(secret_area);
+       return ret;
+}
+
+static void efi_secret_securityfs_teardown(struct platform_device *dev)
+{
+       struct efi_secret *s = efi_secret_get();
+       int i;
+
+       for (i = (EFI_SECRET_NUM_FILES - 1); i >= 0; i--) {
+               securityfs_remove(s->fs_files[i]);
+               s->fs_files[i] = NULL;
+       }
+
+       securityfs_remove(s->fs_dir);
+       s->fs_dir = NULL;
+
+       securityfs_remove(s->secrets_dir);
+       s->secrets_dir = NULL;
+
+       dev_dbg(&dev->dev, "Removed securityfs entries\n");
+}
+
+static int efi_secret_securityfs_setup(struct platform_device *dev)
+{
+       struct efi_secret *s = efi_secret_get();
+       int ret = 0, i = 0, bytes_left;
+       unsigned char *ptr;
+       struct secret_header *h;
+       struct secret_entry *e;
+       struct dentry *dent;
+       char guid_str[EFI_VARIABLE_GUID_LEN + 1];
+
+       ptr = (void __force *)s->secret_data;
+       h = (struct secret_header *)ptr;
+       if (efi_guidcmp(h->guid, EFI_SECRET_TABLE_HEADER_GUID)) {
+               /*
+                * This is not an error: it just means that EFI defines secret
+                * area but it was not populated by the Guest Owner.
+                */
+               dev_dbg(&dev->dev, "EFI secret area does not start with correct GUID\n");
+               return -ENODEV;
+       }
+       if (h->len < sizeof(*h)) {
+               dev_err(&dev->dev, "EFI secret area reported length is too small\n");
+               return -EINVAL;
+       }
+       if (h->len > s->secret_data_len) {
+               dev_err(&dev->dev, "EFI secret area reported length is too big\n");
+               return -EINVAL;
+       }
+
+       s->secrets_dir = NULL;
+       s->fs_dir = NULL;
+       memset(s->fs_files, 0, sizeof(s->fs_files));
+
+       dent = securityfs_create_dir("secrets", NULL);
+       if (IS_ERR(dent)) {
+               dev_err(&dev->dev, "Error creating secrets securityfs directory entry err=%ld\n",
+                       PTR_ERR(dent));
+               return PTR_ERR(dent);
+       }
+       s->secrets_dir = dent;
+
+       dent = securityfs_create_dir("coco", s->secrets_dir);
+       if (IS_ERR(dent)) {
+               dev_err(&dev->dev, "Error creating coco securityfs directory entry err=%ld\n",
+                       PTR_ERR(dent));
+               return PTR_ERR(dent);
+       }
+       d_inode(dent)->i_op = &efi_secret_dir_inode_operations;
+       s->fs_dir = dent;
+
+       bytes_left = h->len - sizeof(*h);
+       ptr += sizeof(*h);
+       while (bytes_left >= (int)sizeof(*e) && i < EFI_SECRET_NUM_FILES) {
+               e = (struct secret_entry *)ptr;
+               if (e->len < sizeof(*e) || e->len > (unsigned int)bytes_left) {
+                       dev_err(&dev->dev, "EFI secret area is corrupted\n");
+                       ret = -EINVAL;
+                       goto err_cleanup;
+               }
+
+               /* Skip deleted entries (which will have NULL_GUID) */
+               if (efi_guidcmp(e->guid, NULL_GUID)) {
+                       efi_guid_to_str(&e->guid, guid_str);
+
+                       dent = securityfs_create_file(guid_str, 0440, s->fs_dir, (void *)e,
+                                                     &efi_secret_bin_file_fops);
+                       if (IS_ERR(dent)) {
+                               dev_err(&dev->dev, "Error creating efi_secret securityfs entry\n");
+                               ret = PTR_ERR(dent);
+                               goto err_cleanup;
+                       }
+
+                       s->fs_files[i++] = dent;
+               }
+               ptr += e->len;
+               bytes_left -= e->len;
+       }
+
+       dev_info(&dev->dev, "Created %d entries in securityfs secrets/coco\n", i);
+       return 0;
+
+err_cleanup:
+       efi_secret_securityfs_teardown(dev);
+       return ret;
+}
+
+static void efi_secret_unmap_area(void)
+{
+       struct efi_secret *s = efi_secret_get();
+
+       if (s->secret_data) {
+               iounmap(s->secret_data);
+               s->secret_data = NULL;
+               s->secret_data_len = 0;
+       }
+}
+
+static int efi_secret_probe(struct platform_device *dev)
+{
+       int ret;
+
+       ret = efi_secret_map_area(dev);
+       if (ret)
+               return ret;
+
+       ret = efi_secret_securityfs_setup(dev);
+       if (ret)
+               goto err_unmap;
+
+       return ret;
+
+err_unmap:
+       efi_secret_unmap_area();
+       return ret;
+}
+
+static int efi_secret_remove(struct platform_device *dev)
+{
+       efi_secret_securityfs_teardown(dev);
+       efi_secret_unmap_area();
+       return 0;
+}
+
+static struct platform_driver efi_secret_driver = {
+       .probe = efi_secret_probe,
+       .remove = efi_secret_remove,
+       .driver = {
+               .name = "efi_secret",
+       },
+};
+
+module_platform_driver(efi_secret_driver);
+
+MODULE_DESCRIPTION("Confidential computing EFI secret area access");
+MODULE_AUTHOR("IBM");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:efi_secret");
diff --git a/drivers/virt/coco/sev-guest/Kconfig b/drivers/virt/coco/sev-guest/Kconfig
new file mode 100644 (file)
index 0000000..f9db079
--- /dev/null
@@ -0,0 +1,14 @@
+config SEV_GUEST
+       tristate "AMD SEV Guest driver"
+       default m
+       depends on AMD_MEM_ENCRYPT
+       select CRYPTO_AEAD2
+       select CRYPTO_GCM
+       help
+         SEV-SNP firmware provides the guest a mechanism to communicate with
+         the PSP without risk from a malicious hypervisor who wishes to read,
+         alter, drop or replay the messages sent. The driver provides
+         userspace interface to communicate with the PSP to request the
+         attestation report and more.
+
+         If you choose 'M' here, this module will be called sev-guest.
diff --git a/drivers/virt/coco/sev-guest/Makefile b/drivers/virt/coco/sev-guest/Makefile
new file mode 100644 (file)
index 0000000..63d67c2
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SEV_GUEST) += sev-guest.o
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
new file mode 100644 (file)
index 0000000..90ce16b
--- /dev/null
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Encrypted Virtualization (SEV) guest driver interface
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/set_memory.h>
+#include <linux/fs.h>
+#include <crypto/aead.h>
+#include <linux/scatterlist.h>
+#include <linux/psp-sev.h>
+#include <uapi/linux/sev-guest.h>
+#include <uapi/linux/psp-sev.h>
+
+#include <asm/svm.h>
+#include <asm/sev.h>
+
+#include "sev-guest.h"
+
+#define DEVICE_NAME    "sev-guest"
+#define AAD_LEN                48
+#define MSG_HDR_VER    1
+
+struct snp_guest_crypto {
+       struct crypto_aead *tfm;
+       u8 *iv, *authtag;
+       int iv_len, a_len;
+};
+
+struct snp_guest_dev {
+       struct device *dev;
+       struct miscdevice misc;
+
+       void *certs_data;
+       struct snp_guest_crypto *crypto;
+       struct snp_guest_msg *request, *response;
+       struct snp_secrets_page_layout *layout;
+       struct snp_req_data input;
+       u32 *os_area_msg_seqno;
+       u8 *vmpck;
+};
+
+static u32 vmpck_id;
+module_param(vmpck_id, uint, 0444);
+MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP.");
+
+/* Mutex to serialize the shared buffer access and command handling. */
+static DEFINE_MUTEX(snp_cmd_mutex);
+
+static bool is_vmpck_empty(struct snp_guest_dev *snp_dev)
+{
+       char zero_key[VMPCK_KEY_LEN] = {0};
+
+       if (snp_dev->vmpck)
+               return !memcmp(snp_dev->vmpck, zero_key, VMPCK_KEY_LEN);
+
+       return true;
+}
+
+static void snp_disable_vmpck(struct snp_guest_dev *snp_dev)
+{
+       memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN);
+       snp_dev->vmpck = NULL;
+}
+
+static inline u64 __snp_get_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       u64 count;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       /* Read the current message sequence counter from secrets pages */
+       count = *snp_dev->os_area_msg_seqno;
+
+       return count + 1;
+}
+
+/* Return a non-zero on success */
+static u64 snp_get_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       u64 count = __snp_get_msg_seqno(snp_dev);
+
+       /*
+        * The message sequence counter for the SNP guest request is a  64-bit
+        * value but the version 2 of GHCB specification defines a 32-bit storage
+        * for it. If the counter exceeds the 32-bit value then return zero.
+        * The caller should check the return value, but if the caller happens to
+        * not check the value and use it, then the firmware treats zero as an
+        * invalid number and will fail the  message request.
+        */
+       if (count >= UINT_MAX) {
+               dev_err(snp_dev->dev, "request message sequence counter overflow\n");
+               return 0;
+       }
+
+       return count;
+}
+
+static void snp_inc_msg_seqno(struct snp_guest_dev *snp_dev)
+{
+       /*
+        * The counter is also incremented by the PSP, so increment it by 2
+        * and save in secrets page.
+        */
+       *snp_dev->os_area_msg_seqno += 2;
+}
+
+static inline struct snp_guest_dev *to_snp_dev(struct file *file)
+{
+       struct miscdevice *dev = file->private_data;
+
+       return container_of(dev, struct snp_guest_dev, misc);
+}
+
+static struct snp_guest_crypto *init_crypto(struct snp_guest_dev *snp_dev, u8 *key, size_t keylen)
+{
+       struct snp_guest_crypto *crypto;
+
+       crypto = kzalloc(sizeof(*crypto), GFP_KERNEL_ACCOUNT);
+       if (!crypto)
+               return NULL;
+
+       crypto->tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+       if (IS_ERR(crypto->tfm))
+               goto e_free;
+
+       if (crypto_aead_setkey(crypto->tfm, key, keylen))
+               goto e_free_crypto;
+
+       crypto->iv_len = crypto_aead_ivsize(crypto->tfm);
+       crypto->iv = kmalloc(crypto->iv_len, GFP_KERNEL_ACCOUNT);
+       if (!crypto->iv)
+               goto e_free_crypto;
+
+       if (crypto_aead_authsize(crypto->tfm) > MAX_AUTHTAG_LEN) {
+               if (crypto_aead_setauthsize(crypto->tfm, MAX_AUTHTAG_LEN)) {
+                       dev_err(snp_dev->dev, "failed to set authsize to %d\n", MAX_AUTHTAG_LEN);
+                       goto e_free_iv;
+               }
+       }
+
+       crypto->a_len = crypto_aead_authsize(crypto->tfm);
+       crypto->authtag = kmalloc(crypto->a_len, GFP_KERNEL_ACCOUNT);
+       if (!crypto->authtag)
+               goto e_free_auth;
+
+       return crypto;
+
+e_free_auth:
+       kfree(crypto->authtag);
+e_free_iv:
+       kfree(crypto->iv);
+e_free_crypto:
+       crypto_free_aead(crypto->tfm);
+e_free:
+       kfree(crypto);
+
+       return NULL;
+}
+
+static void deinit_crypto(struct snp_guest_crypto *crypto)
+{
+       crypto_free_aead(crypto->tfm);
+       kfree(crypto->iv);
+       kfree(crypto->authtag);
+       kfree(crypto);
+}
+
+static int enc_dec_message(struct snp_guest_crypto *crypto, struct snp_guest_msg *msg,
+                          u8 *src_buf, u8 *dst_buf, size_t len, bool enc)
+{
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+       struct scatterlist src[3], dst[3];
+       DECLARE_CRYPTO_WAIT(wait);
+       struct aead_request *req;
+       int ret;
+
+       req = aead_request_alloc(crypto->tfm, GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
+
+       /*
+        * AEAD memory operations:
+        * +------ AAD -------+------- DATA -----+---- AUTHTAG----+
+        * |  msg header      |  plaintext       |  hdr->authtag  |
+        * | bytes 30h - 5Fh  |    or            |                |
+        * |                  |   cipher         |                |
+        * +------------------+------------------+----------------+
+        */
+       sg_init_table(src, 3);
+       sg_set_buf(&src[0], &hdr->algo, AAD_LEN);
+       sg_set_buf(&src[1], src_buf, hdr->msg_sz);
+       sg_set_buf(&src[2], hdr->authtag, crypto->a_len);
+
+       sg_init_table(dst, 3);
+       sg_set_buf(&dst[0], &hdr->algo, AAD_LEN);
+       sg_set_buf(&dst[1], dst_buf, hdr->msg_sz);
+       sg_set_buf(&dst[2], hdr->authtag, crypto->a_len);
+
+       aead_request_set_ad(req, AAD_LEN);
+       aead_request_set_tfm(req, crypto->tfm);
+       aead_request_set_callback(req, 0, crypto_req_done, &wait);
+
+       aead_request_set_crypt(req, src, dst, len, crypto->iv);
+       ret = crypto_wait_req(enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req), &wait);
+
+       aead_request_free(req);
+       return ret;
+}
+
+static int __enc_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg,
+                        void *plaintext, size_t len)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+
+       memset(crypto->iv, 0, crypto->iv_len);
+       memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno));
+
+       return enc_dec_message(crypto, msg, plaintext, msg->payload, len, true);
+}
+
+static int dec_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg,
+                      void *plaintext, size_t len)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg_hdr *hdr = &msg->hdr;
+
+       /* Build IV with response buffer sequence number */
+       memset(crypto->iv, 0, crypto->iv_len);
+       memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno));
+
+       return enc_dec_message(crypto, msg, msg->payload, plaintext, len, false);
+}
+
+static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, u32 sz)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_guest_msg *resp = snp_dev->response;
+       struct snp_guest_msg *req = snp_dev->request;
+       struct snp_guest_msg_hdr *req_hdr = &req->hdr;
+       struct snp_guest_msg_hdr *resp_hdr = &resp->hdr;
+
+       dev_dbg(snp_dev->dev, "response [seqno %lld type %d version %d sz %d]\n",
+               resp_hdr->msg_seqno, resp_hdr->msg_type, resp_hdr->msg_version, resp_hdr->msg_sz);
+
+       /* Verify that the sequence counter is incremented by 1 */
+       if (unlikely(resp_hdr->msg_seqno != (req_hdr->msg_seqno + 1)))
+               return -EBADMSG;
+
+       /* Verify response message type and version number. */
+       if (resp_hdr->msg_type != (req_hdr->msg_type + 1) ||
+           resp_hdr->msg_version != req_hdr->msg_version)
+               return -EBADMSG;
+
+       /*
+        * If the message size is greater than our buffer length then return
+        * an error.
+        */
+       if (unlikely((resp_hdr->msg_sz + crypto->a_len) > sz))
+               return -EBADMSG;
+
+       /* Decrypt the payload */
+       return dec_payload(snp_dev, resp, payload, resp_hdr->msg_sz + crypto->a_len);
+}
+
+static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type,
+                       void *payload, size_t sz)
+{
+       struct snp_guest_msg *req = snp_dev->request;
+       struct snp_guest_msg_hdr *hdr = &req->hdr;
+
+       memset(req, 0, sizeof(*req));
+
+       hdr->algo = SNP_AEAD_AES_256_GCM;
+       hdr->hdr_version = MSG_HDR_VER;
+       hdr->hdr_sz = sizeof(*hdr);
+       hdr->msg_type = type;
+       hdr->msg_version = version;
+       hdr->msg_seqno = seqno;
+       hdr->msg_vmpck = vmpck_id;
+       hdr->msg_sz = sz;
+
+       /* Verify the sequence number is non-zero */
+       if (!hdr->msg_seqno)
+               return -ENOSR;
+
+       dev_dbg(snp_dev->dev, "request [seqno %lld type %d version %d sz %d]\n",
+               hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
+
+       return __enc_payload(snp_dev, req, payload, sz);
+}
+
+static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver,
+                               u8 type, void *req_buf, size_t req_sz, void *resp_buf,
+                               u32 resp_sz, __u64 *fw_err)
+{
+       unsigned long err;
+       u64 seqno;
+       int rc;
+
+       /* Get message sequence and verify that its a non-zero */
+       seqno = snp_get_msg_seqno(snp_dev);
+       if (!seqno)
+               return -EIO;
+
+       memset(snp_dev->response, 0, sizeof(struct snp_guest_msg));
+
+       /* Encrypt the userspace provided payload */
+       rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz);
+       if (rc)
+               return rc;
+
+       /* Call firmware to process the request */
+       rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err);
+       if (fw_err)
+               *fw_err = err;
+
+       if (rc)
+               return rc;
+
+       /*
+        * The verify_and_dec_payload() will fail only if the hypervisor is
+        * actively modifying the message header or corrupting the encrypted payload.
+        * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that
+        * the key cannot be used for any communication. The key is disabled to ensure
+        * that AES-GCM does not use the same IV while encrypting the request payload.
+        */
+       rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
+       if (rc) {
+               dev_alert(snp_dev->dev,
+                         "Detected unexpected decode failure, disabling the vmpck_id %d\n",
+                         vmpck_id);
+               snp_disable_vmpck(snp_dev);
+               return rc;
+       }
+
+       /* Increment to new message sequence after payload decryption was successful. */
+       snp_inc_msg_seqno(snp_dev);
+
+       return 0;
+}
+
+static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_report_resp *resp;
+       struct snp_report_req req;
+       int rc, resp_len;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp->data) + crypto->a_len;
+       resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+       if (!resp)
+               return -ENOMEM;
+
+       rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
+                                 SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data,
+                                 resp_len, &arg->fw_err);
+       if (rc)
+               goto e_free;
+
+       if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp)))
+               rc = -EFAULT;
+
+e_free:
+       kfree(resp);
+       return rc;
+}
+
+static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_derived_key_resp resp = {0};
+       struct snp_derived_key_req req;
+       int rc, resp_len;
+       /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */
+       u8 buf[64 + 16];
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp.data) + crypto->a_len;
+       if (sizeof(buf) < resp_len)
+               return -ENOMEM;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
+                                 SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len,
+                                 &arg->fw_err);
+       if (rc)
+               return rc;
+
+       memcpy(resp.data, buf, sizeof(resp.data));
+       if (copy_to_user((void __user *)arg->resp_data, &resp, sizeof(resp)))
+               rc = -EFAULT;
+
+       /* The response buffer contains the sensitive data, explicitly clear it. */
+       memzero_explicit(buf, sizeof(buf));
+       memzero_explicit(&resp, sizeof(resp));
+       return rc;
+}
+
+static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
+{
+       struct snp_guest_crypto *crypto = snp_dev->crypto;
+       struct snp_ext_report_req req;
+       struct snp_report_resp *resp;
+       int ret, npages = 0, resp_len;
+
+       lockdep_assert_held(&snp_cmd_mutex);
+
+       if (!arg->req_data || !arg->resp_data)
+               return -EINVAL;
+
+       if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req)))
+               return -EFAULT;
+
+       /* userspace does not want certificate data */
+       if (!req.certs_len || !req.certs_address)
+               goto cmd;
+
+       if (req.certs_len > SEV_FW_BLOB_MAX_SIZE ||
+           !IS_ALIGNED(req.certs_len, PAGE_SIZE))
+               return -EINVAL;
+
+       if (!access_ok((const void __user *)req.certs_address, req.certs_len))
+               return -EFAULT;
+
+       /*
+        * Initialize the intermediate buffer with all zeros. This buffer
+        * is used in the guest request message to get the certs blob from
+        * the host. If host does not supply any certs in it, then copy
+        * zeros to indicate that certificate data was not provided.
+        */
+       memset(snp_dev->certs_data, 0, req.certs_len);
+       npages = req.certs_len >> PAGE_SHIFT;
+cmd:
+       /*
+        * The intermediate response buffer is used while decrypting the
+        * response payload. Make sure that it has enough space to cover the
+        * authtag.
+        */
+       resp_len = sizeof(resp->data) + crypto->a_len;
+       resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+       if (!resp)
+               return -ENOMEM;
+
+       snp_dev->input.data_npages = npages;
+       ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
+                                  SNP_MSG_REPORT_REQ, &req.data,
+                                  sizeof(req.data), resp->data, resp_len, &arg->fw_err);
+
+       /* If certs length is invalid then copy the returned length */
+       if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
+               req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
+
+               if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
+                       ret = -EFAULT;
+       }
+
+       if (ret)
+               goto e_free;
+
+       if (npages &&
+           copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
+                        req.certs_len)) {
+               ret = -EFAULT;
+               goto e_free;
+       }
+
+       if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp)))
+               ret = -EFAULT;
+
+e_free:
+       kfree(resp);
+       return ret;
+}
+
+static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+{
+       struct snp_guest_dev *snp_dev = to_snp_dev(file);
+       void __user *argp = (void __user *)arg;
+       struct snp_guest_request_ioctl input;
+       int ret = -ENOTTY;
+
+       if (copy_from_user(&input, argp, sizeof(input)))
+               return -EFAULT;
+
+       input.fw_err = 0xff;
+
+       /* Message version must be non-zero */
+       if (!input.msg_version)
+               return -EINVAL;
+
+       mutex_lock(&snp_cmd_mutex);
+
+       /* Check if the VMPCK is not empty */
+       if (is_vmpck_empty(snp_dev)) {
+               dev_err_ratelimited(snp_dev->dev, "VMPCK is disabled\n");
+               mutex_unlock(&snp_cmd_mutex);
+               return -ENOTTY;
+       }
+
+       switch (ioctl) {
+       case SNP_GET_REPORT:
+               ret = get_report(snp_dev, &input);
+               break;
+       case SNP_GET_DERIVED_KEY:
+               ret = get_derived_key(snp_dev, &input);
+               break;
+       case SNP_GET_EXT_REPORT:
+               ret = get_ext_report(snp_dev, &input);
+               break;
+       default:
+               break;
+       }
+
+       mutex_unlock(&snp_cmd_mutex);
+
+       if (input.fw_err && copy_to_user(argp, &input, sizeof(input)))
+               return -EFAULT;
+
+       return ret;
+}
+
+static void free_shared_pages(void *buf, size_t sz)
+{
+       unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+       int ret;
+
+       if (!buf)
+               return;
+
+       ret = set_memory_encrypted((unsigned long)buf, npages);
+       if (ret) {
+               WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
+               return;
+       }
+
+       __free_pages(virt_to_page(buf), get_order(sz));
+}
+
+static void *alloc_shared_pages(struct device *dev, size_t sz)
+{
+       unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+       struct page *page;
+       int ret;
+
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
+       if (!page)
+               return NULL;
+
+       ret = set_memory_decrypted((unsigned long)page_address(page), npages);
+       if (ret) {
+               dev_err(dev, "failed to mark page shared, ret=%d\n", ret);
+               __free_pages(page, get_order(sz));
+               return NULL;
+       }
+
+       return page_address(page);
+}
+
+static const struct file_operations snp_guest_fops = {
+       .owner  = THIS_MODULE,
+       .unlocked_ioctl = snp_guest_ioctl,
+};
+
+static u8 *get_vmpck(int id, struct snp_secrets_page_layout *layout, u32 **seqno)
+{
+       u8 *key = NULL;
+
+       switch (id) {
+       case 0:
+               *seqno = &layout->os_area.msg_seqno_0;
+               key = layout->vmpck0;
+               break;
+       case 1:
+               *seqno = &layout->os_area.msg_seqno_1;
+               key = layout->vmpck1;
+               break;
+       case 2:
+               *seqno = &layout->os_area.msg_seqno_2;
+               key = layout->vmpck2;
+               break;
+       case 3:
+               *seqno = &layout->os_area.msg_seqno_3;
+               key = layout->vmpck3;
+               break;
+       default:
+               break;
+       }
+
+       return key;
+}
+
+static int __init sev_guest_probe(struct platform_device *pdev)
+{
+       struct snp_secrets_page_layout *layout;
+       struct sev_guest_platform_data *data;
+       struct device *dev = &pdev->dev;
+       struct snp_guest_dev *snp_dev;
+       struct miscdevice *misc;
+       int ret;
+
+       if (!dev->platform_data)
+               return -ENODEV;
+
+       data = (struct sev_guest_platform_data *)dev->platform_data;
+       layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
+       if (!layout)
+               return -ENODEV;
+
+       ret = -ENOMEM;
+       snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
+       if (!snp_dev)
+               goto e_unmap;
+
+       ret = -EINVAL;
+       snp_dev->vmpck = get_vmpck(vmpck_id, layout, &snp_dev->os_area_msg_seqno);
+       if (!snp_dev->vmpck) {
+               dev_err(dev, "invalid vmpck id %d\n", vmpck_id);
+               goto e_unmap;
+       }
+
+       /* Verify that VMPCK is not zero. */
+       if (is_vmpck_empty(snp_dev)) {
+               dev_err(dev, "vmpck id %d is null\n", vmpck_id);
+               goto e_unmap;
+       }
+
+       platform_set_drvdata(pdev, snp_dev);
+       snp_dev->dev = dev;
+       snp_dev->layout = layout;
+
+       /* Allocate the shared page used for the request and response message. */
+       snp_dev->request = alloc_shared_pages(dev, sizeof(struct snp_guest_msg));
+       if (!snp_dev->request)
+               goto e_unmap;
+
+       snp_dev->response = alloc_shared_pages(dev, sizeof(struct snp_guest_msg));
+       if (!snp_dev->response)
+               goto e_free_request;
+
+       snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
+       if (!snp_dev->certs_data)
+               goto e_free_response;
+
+       ret = -EIO;
+       snp_dev->crypto = init_crypto(snp_dev, snp_dev->vmpck, VMPCK_KEY_LEN);
+       if (!snp_dev->crypto)
+               goto e_free_cert_data;
+
+       misc = &snp_dev->misc;
+       misc->minor = MISC_DYNAMIC_MINOR;
+       misc->name = DEVICE_NAME;
+       misc->fops = &snp_guest_fops;
+
+       /* initial the input address for guest request */
+       snp_dev->input.req_gpa = __pa(snp_dev->request);
+       snp_dev->input.resp_gpa = __pa(snp_dev->response);
+       snp_dev->input.data_gpa = __pa(snp_dev->certs_data);
+
+       ret =  misc_register(misc);
+       if (ret)
+               goto e_free_cert_data;
+
+       dev_info(dev, "Initialized SEV guest driver (using vmpck_id %d)\n", vmpck_id);
+       return 0;
+
+e_free_cert_data:
+       free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+e_free_response:
+       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
+e_free_request:
+       free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
+e_unmap:
+       iounmap(layout);
+       return ret;
+}
+
+static int __exit sev_guest_remove(struct platform_device *pdev)
+{
+       struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
+
+       free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
+       free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
+       deinit_crypto(snp_dev->crypto);
+       misc_deregister(&snp_dev->misc);
+
+       return 0;
+}
+
+/*
+ * This driver is meant to be a common SEV guest interface driver and to
+ * support any SEV guest API. As such, even though it has been introduced
+ * with the SEV-SNP support, it is named "sev-guest".
+ */
+static struct platform_driver sev_guest_driver = {
+       .remove         = __exit_p(sev_guest_remove),
+       .driver         = {
+               .name = "sev-guest",
+       },
+};
+
+module_platform_driver_probe(sev_guest_driver, sev_guest_probe);
+
+MODULE_AUTHOR("Brijesh Singh <brijesh.singh@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_DESCRIPTION("AMD SEV Guest Driver");
diff --git a/drivers/virt/coco/sev-guest/sev-guest.h b/drivers/virt/coco/sev-guest/sev-guest.h
new file mode 100644 (file)
index 0000000..21bda26
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * SEV-SNP API spec is available at https://developer.amd.com/sev
+ */
+
+#ifndef __VIRT_SEVGUEST_H__
+#define __VIRT_SEVGUEST_H__
+
+#include <linux/types.h>
+
+#define MAX_AUTHTAG_LEN                32
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+       SNP_MSG_TYPE_INVALID = 0,
+       SNP_MSG_CPUID_REQ,
+       SNP_MSG_CPUID_RSP,
+       SNP_MSG_KEY_REQ,
+       SNP_MSG_KEY_RSP,
+       SNP_MSG_REPORT_REQ,
+       SNP_MSG_REPORT_RSP,
+       SNP_MSG_EXPORT_REQ,
+       SNP_MSG_EXPORT_RSP,
+       SNP_MSG_IMPORT_REQ,
+       SNP_MSG_IMPORT_RSP,
+       SNP_MSG_ABSORB_REQ,
+       SNP_MSG_ABSORB_RSP,
+       SNP_MSG_VMRK_REQ,
+       SNP_MSG_VMRK_RSP,
+
+       SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+       SNP_AEAD_INVALID,
+       SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+       u8 authtag[MAX_AUTHTAG_LEN];
+       u64 msg_seqno;
+       u8 rsvd1[8];
+       u8 algo;
+       u8 hdr_version;
+       u16 hdr_sz;
+       u8 msg_type;
+       u8 msg_version;
+       u16 msg_sz;
+       u32 rsvd2;
+       u8 msg_vmpck;
+       u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+       struct snp_guest_msg_hdr hdr;
+       u8 payload[4000];
+} __packed;
+
+#endif /* __VIRT_SEVGUEST_H__ */
index 75c8d560bbd3633e195321891f21a403197eb1d1..22f15f444f757a4d8294c44da27e1cd8c47dbdd5 100644 (file)
@@ -526,9 +526,8 @@ int virtio_device_restore(struct virtio_device *dev)
                        goto err;
        }
 
-       /* If restore didn't do it, mark device DRIVER_OK ourselves. */
-       if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
-               virtio_device_ready(dev);
+       /* Finally, tell the device we're all set */
+       virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 
        virtio_config_enable(dev);
 
index dfe26fa17e95d783728612cfaa22be67d0c10961..617a7f4f07a807e446adde5b2369088e4b65dba3 100644 (file)
@@ -689,29 +689,34 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL(xen_free_ballooned_pages);
 
-#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC)
-static void __init balloon_add_region(unsigned long start_pfn,
-                                     unsigned long pages)
+static void __init balloon_add_regions(void)
 {
+#if defined(CONFIG_XEN_PV)
+       unsigned long start_pfn, pages;
        unsigned long pfn, extra_pfn_end;
+       unsigned int i;
 
-       /*
-        * If the amount of usable memory has been limited (e.g., with
-        * the 'mem' command line parameter), don't add pages beyond
-        * this limit.
-        */
-       extra_pfn_end = min(max_pfn, start_pfn + pages);
+       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+               pages = xen_extra_mem[i].n_pfns;
+               if (!pages)
+                       continue;
 
-       for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
-               /* totalram_pages and totalhigh_pages do not
-                  include the boot-time balloon extension, so
-                  don't subtract from it. */
-               balloon_append(pfn_to_page(pfn));
-       }
+               start_pfn = xen_extra_mem[i].start_pfn;
 
-       balloon_stats.total_pages += extra_pfn_end - start_pfn;
-}
+               /*
+                * If the amount of usable memory has been limited (e.g., with
+                * the 'mem' command line parameter), don't add pages beyond
+                * this limit.
+                */
+               extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+               for (pfn = start_pfn; pfn < extra_pfn_end; pfn++)
+                       balloon_append(pfn_to_page(pfn));
+
+               balloon_stats.total_pages += extra_pfn_end - start_pfn;
+       }
 #endif
+}
 
 static int __init balloon_init(void)
 {
@@ -745,20 +750,7 @@ static int __init balloon_init(void)
        register_sysctl_table(xen_root);
 #endif
 
-#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC)
-       {
-               int i;
-
-               /*
-                * Initialize the balloon with pages from the extra memory
-                * regions (see arch/x86/xen/setup.c).
-                */
-               for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
-                       if (xen_extra_mem[i].n_pfns)
-                               balloon_add_region(xen_extra_mem[i].start_pfn,
-                                                  xen_extra_mem[i].n_pfns);
-       }
-#endif
+       balloon_add_regions();
 
        task = kthread_run(balloon_thread, NULL, "xen-balloon");
        if (IS_ERR(task)) {
index 4849f94372a45d6d5667e8dd3cef912d6ff44542..55acb32842a3a7296176c85113850124f7c9d97b 100644 (file)
@@ -178,9 +178,9 @@ static void __del_gref(struct gntalloc_gref *gref)
        unsigned long addr;
 
        if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
-               uint8_t *tmp = kmap(gref->page);
+               uint8_t *tmp = kmap_local_page(gref->page);
                tmp[gref->notify.pgoff] = 0;
-               kunmap(gref->page);
+               kunmap_local(tmp);
        }
        if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
                notify_remote_via_evtchn(gref->notify.event);
index a8b41057c382898f04b2a04b207534eb1284ea19..a39f2d36dd9cfc7b4016afedb0c8c9c24f3aa8e4 100644 (file)
@@ -230,39 +230,6 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL(xen_free_unpopulated_pages);
 
-#ifdef CONFIG_XEN_PV
-static int __init init(void)
-{
-       unsigned int i;
-
-       if (!xen_domain())
-               return -ENODEV;
-
-       if (!xen_pv_domain())
-               return 0;
-
-       /*
-        * Initialize with pages from the extra memory regions (see
-        * arch/x86/xen/setup.c).
-        */
-       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
-               unsigned int j;
-
-               for (j = 0; j < xen_extra_mem[i].n_pfns; j++) {
-                       struct page *pg =
-                               pfn_to_page(xen_extra_mem[i].start_pfn + j);
-
-                       pg->zone_device_data = page_list;
-                       page_list = pg;
-                       list_count++;
-               }
-       }
-
-       return 0;
-}
-subsys_initcall(init);
-#endif
-
 static int __init unpopulated_init(void)
 {
        int ret;
index 2fe402483ad5bbc59750e2096f7127a3ba2152c0..30b066299d39f70a450539823dbe75602427e81a 100644 (file)
@@ -740,10 +740,22 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 {
        struct inode *inode = d_inode(path->dentry);
        struct afs_vnode *vnode = AFS_FS_I(inode);
-       int seq = 0;
+       struct key *key;
+       int ret, seq = 0;
 
        _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
+       if (!(query_flags & AT_STATX_DONT_SYNC) &&
+           !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+               key = afs_request_key(vnode->volume->cell);
+               if (IS_ERR(key))
+                       return PTR_ERR(key);
+               ret = afs_validate(vnode, key);
+               key_put(key);
+               if (ret < 0)
+                       return ret;
+       }
+
        do {
                read_seqbegin_or_lock(&vnode->cb_lock, &seq);
                generic_fillattr(&init_user_ns, inode, stat);
index 6bcf1475511bc80234598d963199394c21a84f79..4763132ca57e7f53d4ca82ab64282fe8a3848f11 100644 (file)
@@ -616,8 +616,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
                _debug("write discard %x @%llx [%llx]", len, start, i_size);
 
                /* The dirty region was entirely beyond the EOF. */
-               fscache_clear_page_bits(afs_vnode_cache(vnode),
-                                       mapping, start, len, caching);
+               fscache_clear_page_bits(mapping, start, len, caching);
                afs_pages_written_back(vnode, start, len);
                ret = 0;
        }
index 6556e13ed95f0dcc6634708c722e5feaa79853d5..63c7ebb0da8987a5ca86db763d9e48a8ea7365e9 100644 (file)
@@ -1117,11 +1117,11 @@ out_free_interp:
                         * independently randomized mmap region (0 load_bias
                         * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
                         */
-                       alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
-                       if (interpreter || alignment > ELF_MIN_ALIGN) {
+                       if (interpreter) {
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
+                               alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
                                if (alignment)
                                        load_bias &= ~(alignment - 1);
                                elf_flags |= MAP_FIXED_NOREPLACE;
index c22d287e020b3d4f2a8afcffb6b9f4b84bae82a9..0dd6de994199947850d6b06bec5cf93bfc0094a5 100644 (file)
@@ -2503,12 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
                return ERR_PTR(ret);
        }
 
-       /*
-        * New block group is likely to be used soon. Try to activate it now.
-        * Failure is OK for now.
-        */
-       btrfs_zone_activate(cache);
-
        ret = exclude_super_stripes(cache);
        if (ret) {
                /* We may have excluded something, so call this just in case */
@@ -2946,7 +2940,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
        struct btrfs_path *path = NULL;
        LIST_HEAD(dirty);
        struct list_head *io = &cur_trans->io_bgs;
-       int num_started = 0;
        int loops = 0;
 
        spin_lock(&cur_trans->dirty_bgs_lock);
@@ -3012,7 +3005,6 @@ again:
                        cache->io_ctl.inode = NULL;
                        ret = btrfs_write_out_cache(trans, cache, path);
                        if (ret == 0 && cache->io_ctl.inode) {
-                               num_started++;
                                should_put = 0;
 
                                /*
@@ -3113,7 +3105,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
        int should_put;
        struct btrfs_path *path;
        struct list_head *io = &cur_trans->io_bgs;
-       int num_started = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -3171,7 +3162,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
                        cache->io_ctl.inode = NULL;
                        ret = btrfs_write_out_cache(trans, cache, path);
                        if (ret == 0 && cache->io_ctl.inode) {
-                               num_started++;
                                should_put = 0;
                                list_add_tail(&cache->io_list, io);
                        } else {
@@ -3455,7 +3445,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
        return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 }
 
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
 {
        struct btrfs_block_group *bg;
        int ret;
@@ -3542,7 +3532,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
 out:
        btrfs_trans_release_chunk_metadata(trans);
 
-       return ret;
+       if (ret)
+               return ERR_PTR(ret);
+
+       btrfs_get_block_group(bg);
+       return bg;
 }
 
 /*
@@ -3657,10 +3651,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_space_info *space_info;
+       struct btrfs_block_group *ret_bg;
        bool wait_for_alloc = false;
        bool should_alloc = false;
+       bool from_extent_allocation = false;
        int ret = 0;
 
+       if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) {
+               from_extent_allocation = true;
+               force = CHUNK_ALLOC_FORCE;
+       }
+
        /* Don't re-enter if we're already allocating a chunk */
        if (trans->allocating_chunk)
                return -ENOSPC;
@@ -3750,9 +3751,22 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
                        force_metadata_allocation(fs_info);
        }
 
-       ret = do_chunk_alloc(trans, flags);
+       ret_bg = do_chunk_alloc(trans, flags);
        trans->allocating_chunk = false;
 
+       if (IS_ERR(ret_bg)) {
+               ret = PTR_ERR(ret_bg);
+       } else if (from_extent_allocation) {
+               /*
+                * New block group is likely to be used soon. Try to activate
+                * it now. Failure is OK for now.
+                */
+               btrfs_zone_activate(ret_bg);
+       }
+
+       if (!ret)
+               btrfs_put_block_group(ret_bg);
+
        spin_lock(&space_info->lock);
        if (ret < 0) {
                if (ret == -ENOSPC)
index 93aabc68bb6a8e9c104d72474b12adaa9d08a546..e8308f2ad07d1988e408f0082fc34291fc9c429c 100644 (file)
@@ -35,11 +35,15 @@ enum btrfs_discard_state {
  * the FS with empty chunks
  *
  * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
+ * find_free_extent() that also activaes the zone
  */
 enum btrfs_chunk_alloc_enum {
        CHUNK_ALLOC_NO_FORCE,
        CHUNK_ALLOC_LIMITED,
        CHUNK_ALLOC_FORCE,
+       CHUNK_ALLOC_FORCE_FOR_EXTENT,
 };
 
 struct btrfs_caching_control {
index 47e72d72f7d0a868e316521bb6d99f7ccc66a7d2..32131a5d321b39a6568ba50bff7f8f5ae9276516 100644 (file)
@@ -384,6 +384,17 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
        return ret;
 }
 
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
+{
+       if (inode->flags & BTRFS_INODE_NODATACOW ||
+           inode->flags & BTRFS_INODE_NODATASUM)
+               return false;
+       return true;
+}
+
 struct btrfs_dio_private {
        struct inode *inode;
 
index be476f094300ac8e53e3309963cd469664a6b31e..19bf36d8ffea7286dbee1388149d2bbeeb3b7011 100644 (file)
@@ -537,6 +537,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
        cb->orig_bio = NULL;
        cb->nr_pages = nr_pages;
 
+       if (blkcg_css)
+               kthread_associate_blkcg(blkcg_css);
+
        while (cur_disk_bytenr < disk_start + compressed_len) {
                u64 offset = cur_disk_bytenr - disk_start;
                unsigned int index = offset >> PAGE_SHIFT;
@@ -555,6 +558,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                                bio = NULL;
                                goto finish_cb;
                        }
+                       if (blkcg_css)
+                               bio->bi_opf |= REQ_CGROUP_PUNT;
                }
                /*
                 * We should never reach next_stripe_start start as we will
@@ -612,6 +617,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
        return 0;
 
 finish_cb:
+       if (blkcg_css)
+               kthread_associate_blkcg(NULL);
+
        if (bio) {
                bio->bi_status = ret;
                bio_endio(bio);
index b7631b88426e37a234ba758a37c6149698d05015..077c95e9baa50648ad027c78e4b9fb77dbe19853 100644 (file)
@@ -1060,6 +1060,7 @@ struct btrfs_fs_info {
         */
        spinlock_t relocation_bg_lock;
        u64 data_reloc_bg;
+       struct mutex zoned_data_reloc_io_lock;
 
        u64 nr_global_roots;
 
index 71fd99b482832c4e635b761400bc8cc537fc3905..f262026219894c13aed8a0fdd9a38f72aa1a93fb 100644 (file)
@@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
        btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
 
-       /* Commit dev_replace state and reserve 1 item for it. */
+       /*
+        * Commit dev_replace state and reserve 1 item for it.
+        * This is crucial to ensure we won't miss copying extents for new block
+        * groups that are allocated after we started the device replace, and
+        * must be done after setting up the device replace state.
+        */
        trans = btrfs_start_transaction(root, 1);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
index b30309f187cf022c3910b83d92a26b2660875364..84795d831282b3152440354db4a2ddc3e0b594e7 100644 (file)
@@ -1850,9 +1850,10 @@ again:
 
        ret = btrfs_insert_fs_root(fs_info, root);
        if (ret) {
-               btrfs_put_root(root);
-               if (ret == -EEXIST)
+               if (ret == -EEXIST) {
+                       btrfs_put_root(root);
                        goto again;
+               }
                goto fail;
        }
        return root;
@@ -3156,6 +3157,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
        mutex_init(&fs_info->zoned_meta_io_lock);
+       mutex_init(&fs_info->zoned_data_reloc_io_lock);
        seqlock_init(&fs_info->profiles_lock);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -3656,6 +3658,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
        if (sectorsize < PAGE_SIZE) {
                struct btrfs_subpage_info *subpage_info;
 
+               /*
+                * V1 space cache has some hardcoded PAGE_SIZE usage, and is
+                * going to be deprecated.
+                *
+                * Force to use v2 cache for subpage case.
+                */
+               btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
+               btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
+                       "forcing free space tree for sector size %u with page size %lu",
+                       sectorsize, PAGE_SIZE);
+
                btrfs_warn(fs_info,
                "read-write for sector size %u with page size %lu is experimental",
                           sectorsize, PAGE_SIZE);
@@ -4225,6 +4238,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
  */
 static void btrfs_end_empty_barrier(struct bio *bio)
 {
+       bio_uninit(bio);
        complete(bio->bi_private);
 }
 
@@ -4234,7 +4248,7 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  */
 static void write_dev_flush(struct btrfs_device *device)
 {
-       struct bio *bio = device->flush_bio;
+       struct bio *bio = &device->flush_bio;
 
 #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        /*
@@ -4247,12 +4261,12 @@ static void write_dev_flush(struct btrfs_device *device)
         * of simplicity, since this is a debug tool and not meant for use in
         * non-debug builds.
         */
-       struct request_queue *q = bdev_get_queue(device->bdev);
-       if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+       if (!bdev_write_cache(device->bdev))
                return;
 #endif
 
-       bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
+       bio_init(bio, device->bdev, NULL, 0,
+                REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
        bio->bi_end_io = btrfs_end_empty_barrier;
        init_completion(&device->flush_wait);
        bio->bi_private = &device->flush_wait;
@@ -4266,7 +4280,7 @@ static void write_dev_flush(struct btrfs_device *device)
  */
 static blk_status_t wait_dev_flush(struct btrfs_device *device)
 {
-       struct bio *bio = device->flush_bio;
+       struct bio *bio = &device->flush_bio;
 
        if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
                return BLK_STS_OK;
index f477035a2ac2358015bcdff41f733906d04fe872..6260784e74b5ae66b7ef6559ac67ba936376d2c6 100644 (file)
@@ -1239,7 +1239,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
 
                if (size) {
                        ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
-                                                  GFP_NOFS, 0);
+                                                  GFP_NOFS);
                        if (!ret)
                                *discarded_bytes += size;
                        else if (ret != -EOPNOTSUPP)
@@ -1256,7 +1256,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
 
        if (bytes_left) {
                ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
-                                          GFP_NOFS, 0);
+                                          GFP_NOFS);
                if (!ret)
                        *discarded_bytes += bytes_left;
        }
@@ -1291,7 +1291,7 @@ static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
                ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
                                              &discarded);
                discarded += src_disc;
-       } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
+       } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
                ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
        } else {
                ret = 0;
@@ -4082,7 +4082,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
                        }
 
                        ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
-                                               CHUNK_ALLOC_FORCE);
+                                               CHUNK_ALLOC_FORCE_FOR_EXTENT);
 
                        /* Do not bail out on ENOSPC since we can do more. */
                        if (ret == -ENOSPC)
@@ -5987,7 +5987,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
        *trimmed = 0;
 
        /* Discard not supported = nothing to do. */
-       if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+       if (!bdev_max_discard_sectors(device->bdev))
                return 0;
 
        /* Not writable = nothing to do. */
index 724e8fe06aa0bc371334d21325dbe1b4b16a3575..33c19f51d79b000e75e216ced93f984556ef730f 100644 (file)
@@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode,
 
        repair_bio = btrfs_bio_alloc(1);
        repair_bbio = btrfs_bio(repair_bio);
+       repair_bbio->file_offset = start;
        repair_bio->bi_opf = REQ_OP_READ;
        repair_bio->bi_end_io = failed_bio->bi_end_io;
        repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
@@ -3333,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode,
        ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
        if (ret < 0)
                goto error;
-       if (wbc) {
-               struct block_device *bdev;
 
-               bdev = fs_info->fs_devices->latest_dev->bdev;
-               bio_set_dev(bio, bdev);
-               wbc_init_bio(wbc, bio);
-       }
-       if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-               struct btrfs_device *device;
+       if (wbc) {
+               /*
+                * For Zone append we need the correct block_device that we are
+                * going to write to set in the bio to be able to respect the
+                * hardware limitation.  Look it up here:
+                */
+               if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+                       struct btrfs_device *dev;
+
+                       dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
+                                                    fs_info->sectorsize);
+                       if (IS_ERR(dev)) {
+                               ret = PTR_ERR(dev);
+                               goto error;
+                       }
 
-               device = btrfs_zoned_get_device(fs_info, disk_bytenr,
-                                               fs_info->sectorsize);
-               if (IS_ERR(device)) {
-                       ret = PTR_ERR(device);
-                       goto error;
+                       bio_set_dev(bio, dev->bdev);
+               } else {
+                       /*
+                        * Otherwise pick the last added device to support
+                        * cgroup writeback.  For multi-device file systems this
+                        * means blk-cgroup policies have to always be set on the
+                        * last added/replaced device.  This is a bit odd but has
+                        * been like that for a long time.
+                        */
+                       bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
                }
-
-               btrfs_bio(bio)->device = device;
+               wbc_init_bio(wbc, bio);
+       } else {
+               ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
        }
        return 0;
 error:
index 0399cf8e3c32c5c3a5e5141bd07cea93e2abcaa6..151e9da5da2dc2d553fb4f0582ad882133c916e9 100644 (file)
@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl {
  */
 struct extent_changeset {
        /* How many bytes are set/cleared in this operation */
-       unsigned int bytes_changed;
+       u64 bytes_changed;
 
        /* Changed ranges */
        struct ulist range_changed;
index 9f455c96c9744b5f9ed3af1599eb4b375b072907..380054c94e4b6a1cb2502f06998acfc015806db3 100644 (file)
@@ -2957,8 +2957,9 @@ out:
        return ret;
 }
 
-static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
 {
+       struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_state *cached_state = NULL;
@@ -2990,6 +2991,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                goto out_only_mutex;
        }
 
+       ret = file_modified(file);
+       if (ret)
+               goto out_only_mutex;
+
        lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
        lockend = round_down(offset + len,
                             btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
@@ -3430,7 +3435,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE)
-               return btrfs_punch_hole(inode, offset, len);
+               return btrfs_punch_hole(file, offset, len);
 
        /*
         * Only trigger disk allocation, don't trigger qgroup reserve
@@ -3452,6 +3457,10 @@ static long btrfs_fallocate(struct file *file, int mode,
                        goto out;
        }
 
+       ret = file_modified(file);
+       if (ret)
+               goto out;
+
        /*
         * TODO: Move these two operations after we have checked
         * accurate reserved space, or fallocate can still fail but
index 6bfc4343c98d123a6da6aaa9c06e2ee1d806fbf6..95c499b8424e77b403e8b2e881b1b804b93c4a65 100644 (file)
@@ -480,17 +480,6 @@ static noinline int add_async_extent(struct async_chunk *cow,
        return 0;
 }
 
-/*
- * Check if the inode has flags compatible with compression
- */
-static inline bool inode_can_compress(struct btrfs_inode *inode)
-{
-       if (inode->flags & BTRFS_INODE_NODATACOW ||
-           inode->flags & BTRFS_INODE_NODATASUM)
-               return false;
-       return true;
-}
-
 /*
  * Check if the inode needs to be submitted to compression, based on mount
  * options, defragmentation, properties or heuristics.
@@ -500,7 +489,7 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
-       if (!inode_can_compress(inode)) {
+       if (!btrfs_inode_can_compress(inode)) {
                WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
                        KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
                        btrfs_ino(inode));
@@ -1128,7 +1117,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
        int ret = 0;
 
        if (btrfs_is_free_space_inode(inode)) {
-               WARN_ON_ONCE(1);
                ret = -EINVAL;
                goto out_unlock;
        }
@@ -2017,11 +2005,10 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
                 * to use run_delalloc_nocow() here, like for  regular
                 * preallocated inodes.
                 */
-               ASSERT(!zoned ||
-                      (zoned && btrfs_is_data_reloc_root(inode->root)));
+               ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, nr_written);
-       } else if (!inode_can_compress(inode) ||
+       } else if (!btrfs_inode_can_compress(inode) ||
                   !inode_need_compress(inode, start, end)) {
                if (zoned)
                        ret = run_delalloc_zoned(inode, locked_page, start, end,
@@ -4488,6 +4475,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
                           dest->root_key.objectid);
                return -EPERM;
        }
+       if (atomic_read(&dest->nr_swapfiles)) {
+               spin_unlock(&dest->root_item_lock);
+               btrfs_warn(fs_info,
+                          "attempt to delete subvolume %llu with active swapfile",
+                          root->root_key.objectid);
+               return -EPERM;
+       }
        root_flags = btrfs_root_flags(&dest->root_item);
        btrfs_set_root_flags(&dest->root_item,
                             root_flags | BTRFS_ROOT_SUBVOL_DEAD);
@@ -7438,6 +7432,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
        u64 block_start, orig_start, orig_block_len, ram_bytes;
        bool can_nocow = false;
        bool space_reserved = false;
+       u64 prev_len;
        int ret = 0;
 
        /*
@@ -7465,6 +7460,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
                        can_nocow = true;
        }
 
+       prev_len = len;
        if (can_nocow) {
                struct extent_map *em2;
 
@@ -7494,8 +7490,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
                        goto out;
                }
        } else {
-               const u64 prev_len = len;
-
                /* Our caller expects us to free the input extent map. */
                free_extent_map(em);
                *map = NULL;
@@ -7526,7 +7520,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
         * We have created our ordered extent, so we can now release our reservation
         * for an outstanding extent.
         */
-       btrfs_delalloc_release_extents(BTRFS_I(inode), len);
+       btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
 
        /*
         * Need to update the i_size under the extent lock so buffered
@@ -7805,8 +7799,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
        const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
        struct bio_vec bvec;
        struct bvec_iter iter;
-       const u64 orig_file_offset = dip->file_offset;
-       u64 start = orig_file_offset;
        u32 bio_offset = 0;
        blk_status_t err = BLK_STS_OK;
 
@@ -7816,6 +7808,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
                nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
                pgoff = bvec.bv_offset;
                for (i = 0; i < nr_sectors; i++) {
+                       u64 start = bbio->file_offset + bio_offset;
+
                        ASSERT(pgoff < PAGE_SIZE);
                        if (uptodate &&
                            (!csum || !check_data_csum(inode, bbio,
@@ -7828,17 +7822,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
                        } else {
                                int ret;
 
-                               ASSERT((start - orig_file_offset) < UINT_MAX);
-                               ret = btrfs_repair_one_sector(inode,
-                                               &bbio->bio,
-                                               start - orig_file_offset,
-                                               bvec.bv_page, pgoff,
+                               ret = btrfs_repair_one_sector(inode, &bbio->bio,
+                                               bio_offset, bvec.bv_page, pgoff,
                                                start, bbio->mirror_num,
                                                submit_dio_repair_bio);
                                if (ret)
                                        err = errno_to_blk_status(ret);
                        }
-                       start += sectorsize;
                        ASSERT(bio_offset + sectorsize > bio_offset);
                        bio_offset += sectorsize;
                        pgoff += sectorsize;
@@ -7865,6 +7855,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
        struct btrfs_dio_private *dip = bio->bi_private;
+       struct btrfs_bio *bbio = btrfs_bio(bio);
        blk_status_t err = bio->bi_status;
 
        if (err)
@@ -7875,12 +7866,12 @@ static void btrfs_end_dio_bio(struct bio *bio)
                           bio->bi_iter.bi_size, err);
 
        if (bio_op(bio) == REQ_OP_READ)
-               err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err);
+               err = btrfs_check_read_dio_bio(dip, bbio, !err);
 
        if (err)
                dip->dio_bio->bi_status = err;
 
-       btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
+       btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
 
        bio_put(bio);
        btrfs_dio_private_put(dip);
@@ -8041,6 +8032,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
                bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
                bio->bi_private = dip;
                bio->bi_end_io = btrfs_end_dio_bio;
+               btrfs_bio(bio)->file_offset = file_offset;
 
                if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
                        status = extract_ordered_extent(BTRFS_I(inode), bio,
@@ -11107,8 +11099,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
         * set. We use this counter to prevent snapshots. We must increment it
         * before walking the extents because we don't want a concurrent
         * snapshot to run after we've already checked the extents.
+        *
+        * It is possible that subvolume is marked for deletion but still not
+        * removed yet. To prevent this race, we check the root status before
+        * activating the swapfile.
         */
+       spin_lock(&root->root_item_lock);
+       if (btrfs_root_dead(root)) {
+               spin_unlock(&root->root_item_lock);
+
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+               "cannot activate swapfile because subvolume %llu is being deleted",
+                       root->root_key.objectid);
+               return -EPERM;
+       }
        atomic_inc(&root->nr_swapfiles);
+       spin_unlock(&root->root_item_lock);
 
        isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
 
index 238cee5b5254d1c7204e7d7a4286a6aec6ac09c8..4b28aaea27022e89f552d136d31d4d681a26d8ae 100644 (file)
@@ -468,7 +468,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
                                        void __user *arg)
 {
        struct btrfs_device *device;
-       struct request_queue *q;
        struct fstrim_range range;
        u64 minlen = ULLONG_MAX;
        u64 num_devices = 0;
@@ -498,14 +497,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
        rcu_read_lock();
        list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
                                dev_list) {
-               if (!device->bdev)
+               if (!device->bdev || !bdev_max_discard_sectors(device->bdev))
                        continue;
-               q = bdev_get_queue(device->bdev);
-               if (blk_queue_discard(q)) {
-                       num_devices++;
-                       minlen = min_t(u64, q->limits.discard_granularity,
-                                    minlen);
-               }
+               num_devices++;
+               minlen = min_t(u64, bdev_discard_granularity(device->bdev),
+                                   minlen);
        }
        rcu_read_unlock();
 
@@ -1239,7 +1235,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em)
 }
 
 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
-                                    bool locked)
+                                    u32 extent_thresh, u64 newer_than, bool locked)
 {
        struct extent_map *next;
        bool ret = false;
@@ -1249,11 +1245,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
                return false;
 
        /*
-        * We want to check if the next extent can be merged with the current
-        * one, which can be an extent created in a past generation, so we pass
-        * a minimum generation of 0 to defrag_lookup_extent().
+        * Here we need to pass @newer_then when checking the next extent, or
+        * we will hit a case we mark current extent for defrag, but the next
+        * one will not be a target.
+        * This will just cause extra IO without really reducing the fragments.
         */
-       next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
+       next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
        /* No more em or hole */
        if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
                goto out;
@@ -1265,6 +1262,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
         */
        if (next->len >= get_extent_max_capacity(em))
                goto out;
+       /* Skip older extent */
+       if (next->generation < newer_than)
+               goto out;
+       /* Also check extent size */
+       if (next->len >= extent_thresh)
+               goto out;
+
        ret = true;
 out:
        free_extent_map(next);
@@ -1470,7 +1474,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
                        goto next;
 
                next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
-                                                         locked);
+                                               extent_thresh, newer_than, locked);
                if (!next_mergeable) {
                        struct defrag_target_range *last;
 
@@ -5448,8 +5452,6 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_fs_info(fs_info, argp);
        case BTRFS_IOC_DEV_INFO:
                return btrfs_ioctl_dev_info(fs_info, argp);
-       case BTRFS_IOC_BALANCE:
-               return btrfs_ioctl_balance(file, NULL);
        case BTRFS_IOC_TREE_SEARCH:
                return btrfs_ioctl_tree_search(inode, argp);
        case BTRFS_IOC_TREE_SEARCH_V2:
index 1a6d2d5b4b3332d615adbe35eaa6393e66050268..1b31481f9e72c35fda104449ff6108ac5eee0872 100644 (file)
@@ -17,9 +17,11 @@ static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
 struct prop_handler {
        struct hlist_node node;
        const char *xattr_name;
-       int (*validate)(const char *value, size_t len);
+       int (*validate)(const struct btrfs_inode *inode, const char *value,
+                       size_t len);
        int (*apply)(struct inode *inode, const char *value, size_t len);
        const char *(*extract)(struct inode *inode);
+       bool (*ignore)(const struct btrfs_inode *inode);
        int inheritable;
 };
 
@@ -55,7 +57,8 @@ find_prop_handler(const char *name,
        return NULL;
 }
 
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+                       const char *value, size_t value_len)
 {
        const struct prop_handler *handler;
 
@@ -69,7 +72,29 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
        if (value_len == 0)
                return 0;
 
-       return handler->validate(value, value_len);
+       return handler->validate(inode, value, value_len);
+}
+
+/*
+ * Check if a property should be ignored (not set) for an inode.
+ *
+ * @inode:     The target inode.
+ * @name:      The property's name.
+ *
+ * The caller must be sure the given property name is valid, for example by
+ * having previously called btrfs_validate_prop().
+ *
+ * Returns:    true if the property should be ignored for the given inode
+ *             false if the property must not be ignored for the given inode
+ */
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name)
+{
+       const struct prop_handler *handler;
+
+       handler = find_prop_handler(name, NULL);
+       ASSERT(handler != NULL);
+
+       return handler->ignore(inode);
 }
 
 int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
@@ -252,8 +277,12 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
        return ret;
 }
 
-static int prop_compression_validate(const char *value, size_t len)
+static int prop_compression_validate(const struct btrfs_inode *inode,
+                                    const char *value, size_t len)
 {
+       if (!btrfs_inode_can_compress(inode))
+               return -EINVAL;
+
        if (!value)
                return 0;
 
@@ -310,6 +339,22 @@ static int prop_compression_apply(struct inode *inode, const char *value,
        return 0;
 }
 
+static bool prop_compression_ignore(const struct btrfs_inode *inode)
+{
+       /*
+        * Compression only has effect for regular files, and for directories
+        * we set it just to propagate it to new files created inside them.
+        * Everything else (symlinks, devices, sockets, fifos) is pointless as
+        * it will do nothing, so don't waste metadata space on a compression
+        * xattr for anything that is neither a file nor a directory.
+        */
+       if (!S_ISREG(inode->vfs_inode.i_mode) &&
+           !S_ISDIR(inode->vfs_inode.i_mode))
+               return true;
+
+       return false;
+}
+
 static const char *prop_compression_extract(struct inode *inode)
 {
        switch (BTRFS_I(inode)->prop_compress) {
@@ -330,6 +375,7 @@ static struct prop_handler prop_handlers[] = {
                .validate = prop_compression_validate,
                .apply = prop_compression_apply,
                .extract = prop_compression_extract,
+               .ignore = prop_compression_ignore,
                .inheritable = 1
        },
 };
@@ -356,6 +402,9 @@ static int inherit_props(struct btrfs_trans_handle *trans,
                if (!h->inheritable)
                        continue;
 
+               if (h->ignore(BTRFS_I(inode)))
+                       continue;
+
                value = h->extract(parent);
                if (!value)
                        continue;
@@ -364,7 +413,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
                 * This is not strictly necessary as the property should be
                 * valid, but in case it isn't, don't propagate it further.
                 */
-               ret = h->validate(value, strlen(value));
+               ret = h->validate(BTRFS_I(inode), value, strlen(value));
                if (ret)
                        continue;
 
index 40b2c65b518c6d78d61727bdeaa7e3d819dd32e2..59bea741cfcf433925faa316a4a9095a4cc973a7 100644 (file)
@@ -13,7 +13,9 @@ void __init btrfs_props_init(void);
 int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
                   const char *name, const char *value, size_t value_len,
                   int flags);
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+                       const char *value, size_t value_len);
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name);
 
 int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
 
index 11089568b2879e65f37de80db06c0278c0b3e1d7..8cd713d37ad2f35f1375c955f7426ec79e5f39cb 100644 (file)
@@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                if (!cache)
                        goto skip;
 
+               ASSERT(cache->start <= chunk_offset);
+               /*
+                * We are using the commit root to search for device extents, so
+                * that means we could have found a device extent item from a
+                * block group that was deleted in the current transaction. The
+                * logical start offset of the deleted block group, stored at
+                * @chunk_offset, might be part of the logical address range of
+                * a new block group (which uses different physical extents).
+                * In this case btrfs_lookup_block_group() has returned the new
+                * block group, and its start address is less than @chunk_offset.
+                *
+                * We skip such new block groups, because it's pointless to
+                * process them, as we won't find their extents because we search
+                * for them using the commit root of the extent tree. For a device
+                * replace it's also fine to skip it, we won't miss copying them
+                * to the target device because we have the write duplication
+                * setup through the regular write path (by btrfs_map_block()),
+                * and we have committed a transaction when we started the device
+                * replace, right after setting up the device replace state.
+                */
+               if (cache->start < chunk_offset) {
+                       btrfs_put_block_group(cache);
+                       goto skip;
+               }
+
                if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
                        spin_lock(&cache->lock);
                        if (!cache->to_copy) {
@@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                dev_replace->item_needs_writeback = 1;
                up_write(&dev_replace->rwsem);
 
-               ASSERT(cache->start == chunk_offset);
                ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
                                  dev_extent_len);
 
index 17389a42a3ab7214971f6f02c3883c8caef31cb0..ba78ca5aabbb251a3bccb36da094ef3866fa17f4 100644 (file)
@@ -922,6 +922,9 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
                case BTRFS_EXCLOP_BALANCE:
                        str = "balance\n";
                        break;
+               case BTRFS_EXCLOP_BALANCE_PAUSED:
+                       str = "balance paused\n";
+                       break;
                case BTRFS_EXCLOP_DEV_ADD:
                        str = "device add\n";
                        break;
index 571dae8ad65e8cb92e9eebf4f6fa92e746c845c9..e65633686378cc8b3ff13d4f6cde9a15dc3548ca 100644 (file)
@@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                        ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
                        if (ret) {
                                mutex_unlock(&fs_info->tree_root->log_mutex);
+                               blk_finish_plug(&plug);
                                goto out;
                        }
                }
@@ -3720,11 +3721,29 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
        key.offset = first_offset;
        key.type = BTRFS_DIR_LOG_INDEX_KEY;
        ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
-       if (ret)
+       /*
+        * -EEXIST is fine and can happen sporadically when we are logging a
+        * directory and have concurrent insertions in the subvolume's tree for
+        * items from other inodes and that result in pushing off some dir items
+        * from one leaf to another in order to accommodate for the new items.
+        * This results in logging the same dir index range key.
+        */
+       if (ret && ret != -EEXIST)
                return ret;
 
        item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                              struct btrfs_dir_log_item);
+       if (ret == -EEXIST) {
+               const u64 curr_end = btrfs_dir_log_end(path->nodes[0], item);
+
+               /*
+                * btrfs_del_dir_entries_in_log() might have been called during
+                * an unlink between the initial insertion of this key and the
+                * current update, or we might be logging a single entry deletion
+                * during a rename, so set the new last_offset to the max value.
+                */
+               last_offset = max(last_offset, curr_end);
+       }
        btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_release_path(path);
@@ -3848,13 +3867,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
                                ret = insert_dir_log_key(trans, log, dst_path,
                                                 ino, *last_old_dentry_offset + 1,
                                                 key.offset - 1);
-                               /*
-                                * -EEXIST should never happen because when we
-                                * log a directory in full mode (LOG_INODE_ALL)
-                                * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from
-                                * the log tree.
-                                */
-                               ASSERT(ret != -EEXIST);
                                if (ret < 0)
                                        return ret;
                        }
@@ -5803,6 +5815,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                mutex_lock(&inode->log_mutex);
        }
 
+       /*
+        * For symlinks, we must always log their content, which is stored in an
+        * inline extent, otherwise we could end up with an empty symlink after
+        * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
+        * one attempts to create an empty symlink).
+        * We don't need to worry about flushing delalloc, because when we create
+        * the inline extent when the symlink is created (we never have delalloc
+        * for symlinks).
+        */
+       if (S_ISLNK(inode->vfs_inode.i_mode))
+               inode_only = LOG_INODE_ALL;
+
        /*
         * Before logging the inode item, cache the value returned by
         * inode_logged(), because after that we have the need to figure out if
@@ -6181,7 +6205,7 @@ again:
                        }
 
                        ctx->log_new_dentries = false;
-                       if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
+                       if (type == BTRFS_FT_DIR)
                                log_mode = LOG_INODE_ALL;
                        ret = btrfs_log_inode(trans, BTRFS_I(di_inode),
                                              log_mode, ctx);
@@ -7018,12 +7042,12 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
                /*
                 * Other concurrent task might be logging the old directory,
                 * as it can be triggered when logging other inode that had or
-                * still has a dentry in the old directory. So take the old
-                * directory's log_mutex to prevent getting an -EEXIST when
-                * logging a key to record the deletion, or having that other
-                * task logging the old directory get an -EEXIST if it attempts
-                * to log the same key after we just did it. In both cases that
-                * would result in falling back to a transaction commit.
+                * still has a dentry in the old directory. We lock the old
+                * directory's log_mutex to ensure the deletion of the old
+                * name is persisted, because during directory logging we
+                * delete all BTRFS_DIR_LOG_INDEX_KEY keys and the deletion of
+                * the old name's dir index item is in the delayed items, so
+                * it could be missed by an in progress directory logging.
                 */
                mutex_lock(&old_dir->log_mutex);
                ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
index 1be7cb2f955fcbf2a9fce535b1b4d49e5c4690e9..b6b00338037c49b56d27af50ed7668763b89bbcf 100644 (file)
@@ -405,7 +405,6 @@ void btrfs_free_device(struct btrfs_device *device)
        WARN_ON(!list_empty(&device->post_commit_list));
        rcu_string_free(device->name);
        extent_io_tree_release(&device->alloc_state);
-       bio_put(device->flush_bio);
        btrfs_destroy_dev_zone_info(device);
        kfree(device);
 }
@@ -643,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
                        set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        }
 
-       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+       if (!bdev_nonrot(bdev))
                fs_devices->rotating = true;
 
        device->bdev = bdev;
@@ -1896,23 +1895,18 @@ static void update_dev_time(const char *device_path)
        path_put(&path);
 }
 
-static int btrfs_rm_dev_item(struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
+                            struct btrfs_device *device)
 {
        struct btrfs_root *root = device->fs_info->chunk_root;
        int ret;
        struct btrfs_path *path;
        struct btrfs_key key;
-       struct btrfs_trans_handle *trans;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans)) {
-               btrfs_free_path(path);
-               return PTR_ERR(trans);
-       }
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = device->devid;
@@ -1923,21 +1917,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
        if (ret) {
                if (ret > 0)
                        ret = -ENOENT;
-               btrfs_abort_transaction(trans, ret);
-               btrfs_end_transaction(trans);
                goto out;
        }
 
        ret = btrfs_del_item(trans, root, path);
-       if (ret) {
-               btrfs_abort_transaction(trans, ret);
-               btrfs_end_transaction(trans);
-       }
-
 out:
        btrfs_free_path(path);
-       if (!ret)
-               ret = btrfs_commit_transaction(trans);
        return ret;
 }
 
@@ -2078,6 +2063,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
                    struct btrfs_dev_lookup_args *args,
                    struct block_device **bdev, fmode_t *mode)
 {
+       struct btrfs_trans_handle *trans;
        struct btrfs_device *device;
        struct btrfs_fs_devices *cur_devices;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2098,7 +2084,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 
        ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
        if (ret)
-               goto out;
+               return ret;
 
        device = btrfs_find_device(fs_info->fs_devices, args);
        if (!device) {
@@ -2106,27 +2092,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
                        ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
                else
                        ret = -ENOENT;
-               goto out;
+               return ret;
        }
 
        if (btrfs_pinned_by_swapfile(fs_info, device)) {
                btrfs_warn_in_rcu(fs_info,
                  "cannot remove device %s (devid %llu) due to active swapfile",
                                  rcu_str_deref(device->name), device->devid);
-               ret = -ETXTBSY;
-               goto out;
+               return -ETXTBSY;
        }
 
-       if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
-               ret = BTRFS_ERROR_DEV_TGT_REPLACE;
-               goto out;
-       }
+       if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
+               return BTRFS_ERROR_DEV_TGT_REPLACE;
 
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
-           fs_info->fs_devices->rw_devices == 1) {
-               ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
-               goto out;
-       }
+           fs_info->fs_devices->rw_devices == 1)
+               return BTRFS_ERROR_DEV_ONLY_WRITABLE;
 
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                mutex_lock(&fs_info->chunk_mutex);
@@ -2139,14 +2120,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
        if (ret)
                goto error_undo;
 
-       /*
-        * TODO: the superblock still includes this device in its num_devices
-        * counter although write_all_supers() is not locked out. This
-        * could give a filesystem state which requires a degraded mount.
-        */
-       ret = btrfs_rm_dev_item(device);
-       if (ret)
+       trans = btrfs_start_transaction(fs_info->chunk_root, 0);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
                goto error_undo;
+       }
+
+       ret = btrfs_rm_dev_item(trans, device);
+       if (ret) {
+               /* Any error in dev item removal is critical */
+               btrfs_crit(fs_info,
+                          "failed to remove device item for devid %llu: %d",
+                          device->devid, ret);
+               btrfs_abort_transaction(trans, ret);
+               btrfs_end_transaction(trans);
+               return ret;
+       }
 
        clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
        btrfs_scrub_cancel_dev(device);
@@ -2229,7 +2218,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
                free_fs_devices(cur_devices);
        }
 
-out:
+       ret = btrfs_commit_transaction(trans);
+
        return ret;
 
 error_undo:
@@ -2240,7 +2230,7 @@ error_undo:
                device->fs_devices->rw_devices++;
                mutex_unlock(&fs_info->chunk_mutex);
        }
-       goto out;
+       return ret;
 }
 
 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
@@ -2715,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
        atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
-       if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+       if (!bdev_nonrot(bdev))
                fs_devices->rotating = true;
 
        orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@@ -4439,10 +4429,12 @@ static int balance_kthread(void *data)
        struct btrfs_fs_info *fs_info = data;
        int ret = 0;
 
+       sb_start_write(fs_info->sb);
        mutex_lock(&fs_info->balance_mutex);
        if (fs_info->balance_ctl)
                ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
        mutex_unlock(&fs_info->balance_mutex);
+       sb_end_write(fs_info->sb);
 
        return ret;
 }
@@ -6956,16 +6948,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       /*
-        * Preallocate a bio that's always going to be used for flushing device
-        * barriers and matches the device lifespan
-        */
-       dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
-       if (!dev->flush_bio) {
-               kfree(dev);
-               return ERR_PTR(-ENOMEM);
-       }
-
        INIT_LIST_HEAD(&dev->dev_list);
        INIT_LIST_HEAD(&dev->dev_alloc_list);
        INIT_LIST_HEAD(&dev->post_commit_list);
index bd297f23d19e7f9fccb9326f64dbb7774ae7f396..b11c563d2025e52d1e1ca2ddffde4df55af08f98 100644 (file)
@@ -121,8 +121,8 @@ struct btrfs_device {
        /* bytes used on the current transaction */
        u64 commit_bytes_used;
 
-       /* for sending down flush barriers */
-       struct bio *flush_bio;
+       /* Bio used for flushing device barriers */
+       struct bio flush_bio;
        struct completion flush_wait;
 
        /* per-device scrub information */
@@ -328,6 +328,9 @@ struct btrfs_fs_devices {
 struct btrfs_bio {
        unsigned int mirror_num;
 
+       /* for direct I/O */
+       u64 file_offset;
+
        /* @device is for stripe IO submission. */
        struct btrfs_device *device;
        u8 *csum;
index 99abf41b89b92f47bd7bd5f52ab9c4e09000e332..85691dc2232fa60d6bd123c7bb7cf0ab8da549c5 100644 (file)
@@ -262,7 +262,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
        inode_inc_iversion(inode);
        inode->i_ctime = current_time(inode);
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-       BUG_ON(ret);
+       if (ret)
+               btrfs_abort_transaction(trans, ret);
 out:
        if (start_trans)
                btrfs_end_transaction(trans);
@@ -403,10 +404,13 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        name = xattr_full_name(handler, name);
-       ret = btrfs_validate_prop(name, value, size);
+       ret = btrfs_validate_prop(BTRFS_I(inode), name, value, size);
        if (ret)
                return ret;
 
+       if (btrfs_ignore_prop(BTRFS_I(inode), name))
+               return 0;
+
        trans = btrfs_start_transaction(root, 2);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
@@ -416,7 +420,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
                inode_inc_iversion(inode);
                inode->i_ctime = current_time(inode);
                ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-               BUG_ON(ret);
+               if (ret)
+                       btrfs_abort_transaction(trans, ret);
        }
 
        btrfs_end_transaction(trans);
index b7b5fac1c779004088ecf29fb4ec69bdd940e213..29b54fd9c128dffdb1bff294d8e41e102df466c4 100644 (file)
@@ -350,7 +350,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
        struct btrfs_fs_info *fs_info = device->fs_info;
        struct btrfs_zoned_device_info *zone_info = NULL;
        struct block_device *bdev = device->bdev;
-       struct request_queue *queue = bdev_get_queue(bdev);
        unsigned int max_active_zones;
        unsigned int nactive;
        sector_t nr_sectors;
@@ -410,7 +409,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
        if (!IS_ALIGNED(nr_sectors, zone_sectors))
                zone_info->nr_zones++;
 
-       max_active_zones = queue_max_active_zones(queue);
+       max_active_zones = bdev_max_active_zones(bdev);
        if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
                btrfs_err_in_rcu(fs_info,
 "zoned: %s: max active zones %u is too small, need at least %u active zones",
@@ -1801,7 +1800,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
 
        map = em->map_lookup;
        /* We only support single profile for now */
-       ASSERT(map->num_stripes == 1);
        device = map->stripes[0].dev;
 
        free_extent_map(em);
@@ -1836,6 +1834,12 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
                goto out_unlock;
        }
 
+       /* No space left */
+       if (block_group->alloc_offset == block_group->zone_capacity) {
+               ret = false;
+               goto out_unlock;
+       }
+
        for (i = 0; i < map->num_stripes; i++) {
                device = map->stripes[i].dev;
                physical = map->stripes[i].physical;
@@ -1843,35 +1847,23 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
                if (device->zone_info->max_active_zones == 0)
                        continue;
 
-               /* No space left */
-               if (block_group->alloc_offset == block_group->zone_capacity) {
-                       ret = false;
-                       goto out_unlock;
-               }
-
                if (!btrfs_dev_set_active_zone(device, physical)) {
                        /* Cannot activate the zone */
                        ret = false;
                        goto out_unlock;
                }
-
-               /* Successfully activated all the zones */
-               if (i == map->num_stripes - 1)
-                       block_group->zone_is_active = 1;
-
-
        }
+
+       /* Successfully activated all the zones */
+       block_group->zone_is_active = 1;
        spin_unlock(&block_group->lock);
 
-       if (block_group->zone_is_active) {
-               /* For the active block group list */
-               btrfs_get_block_group(block_group);
+       /* For the active block group list */
+       btrfs_get_block_group(block_group);
 
-               spin_lock(&fs_info->zone_active_bgs_lock);
-               list_add_tail(&block_group->active_bg_list,
-                             &fs_info->zone_active_bgs);
-               spin_unlock(&fs_info->zone_active_bgs_lock);
-       }
+       spin_lock(&fs_info->zone_active_bgs_lock);
+       list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+       spin_unlock(&fs_info->zone_active_bgs_lock);
 
        return true;
 
@@ -1976,18 +1968,16 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
 
 bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 {
+       struct btrfs_fs_info *fs_info = fs_devices->fs_info;
        struct btrfs_device *device;
        bool ret = false;
 
-       if (!btrfs_is_zoned(fs_devices->fs_info))
+       if (!btrfs_is_zoned(fs_info))
                return true;
 
-       /* Non-single profiles are not supported yet */
-       ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
-
        /* Check if there is a device with active zones left */
-       mutex_lock(&fs_devices->device_list_mutex);
-       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+       mutex_lock(&fs_info->chunk_mutex);
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
                struct btrfs_zoned_device_info *zinfo = device->zone_info;
 
                if (!device->bdev)
@@ -1999,7 +1989,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
                        break;
                }
        }
-       mutex_unlock(&fs_devices->device_list_mutex);
+       mutex_unlock(&fs_info->chunk_mutex);
 
        return ret;
 }
index cbf016a7bb5dd393873d555a3d347b9e0fc46d18..6dee76248cb4da37c8a1efe01045e6f9ffeda8fb 100644 (file)
@@ -359,7 +359,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
        struct btrfs_root *root = inode->root;
 
        if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
-               btrfs_inode_lock(&inode->vfs_inode, 0);
+               mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
 }
 
 static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
@@ -367,7 +367,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
        struct btrfs_root *root = inode->root;
 
        if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
-               btrfs_inode_unlock(&inode->vfs_inode, 0);
+               mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
 }
 
 #endif
index f256c8aff7bb5fa7db56210c7961ce56aaedd6d9..ca9f3e4ec4b3fb2fcb50e57d613c29333d04aac1 100644 (file)
@@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
        trace_cachefiles_mark_inactive(object, inode);
 }
 
+static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object,
+                                             struct dentry *dentry)
+{
+       struct inode *inode = d_backing_inode(dentry);
+
+       inode_lock(inode);
+       __cachefiles_unmark_inode_in_use(object, dentry);
+       inode_unlock(inode);
+}
+
 /*
  * Unmark a backing inode and tell cachefilesd that there's something that can
  * be culled.
@@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
        struct inode *inode = file_inode(file);
 
        if (inode) {
-               inode_lock(inode);
-               __cachefiles_unmark_inode_in_use(object, file->f_path.dentry);
-               inode_unlock(inode);
+               cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry);
 
                if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
                        atomic_long_add(inode->i_blocks, &cache->b_released);
@@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
                                object, d_backing_inode(path.dentry), ret,
                                cachefiles_trace_trunc_error);
                        file = ERR_PTR(ret);
-                       goto out_dput;
+                       goto out_unuse;
                }
        }
 
@@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
                trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry),
                                           PTR_ERR(file),
                                           cachefiles_trace_open_error);
-               goto out_dput;
+               goto out_unuse;
        }
        if (unlikely(!file->f_op->read_iter) ||
            unlikely(!file->f_op->write_iter)) {
                fput(file);
                pr_notice("Cache does not support read_iter and write_iter\n");
                file = ERR_PTR(-EINVAL);
+               goto out_unuse;
        }
 
+       goto out_dput;
+
+out_unuse:
+       cachefiles_do_unmark_inode_in_use(object, path.dentry);
 out_dput:
        dput(path.dentry);
 out:
@@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
 check_failed:
        fscache_cookie_lookup_negative(object->cookie);
        cachefiles_unmark_inode_in_use(object, file);
-       if (ret == -ESTALE) {
-               fput(file);
-               dput(dentry);
+       fput(file);
+       dput(dentry);
+       if (ret == -ESTALE)
                return cachefiles_create_file(object);
-       }
+       return false;
+
 error_fput:
        fput(file);
 error:
+       cachefiles_do_unmark_inode_in_use(object, dentry);
        dput(dentry);
        return false;
 }
index 35465109d9c4ea70fdab56993c2339af2532b3d6..00b087c14995a0be7d407efa1f40f620a1acb7f3 100644 (file)
@@ -203,7 +203,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume)
        if (!buf)
                return false;
        buf->reserved = cpu_to_be32(0);
-       memcpy(buf->data, p, len);
+       memcpy(buf->data, p, volume->vcookie->coherency_len);
 
        ret = cachefiles_inject_write_error();
        if (ret == 0)
index aa25bffd48237afb3a0140f68e9824396be24c7b..b6edcf89a429f6107d4ee015a3c999482c74b0fd 100644 (file)
@@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
        if (folio_test_dirty(folio)) {
                dout("%p dirty_folio %p idx %lu -- already dirty\n",
                     mapping->host, folio, folio->index);
-               BUG_ON(!folio_get_private(folio));
+               VM_BUG_ON_FOLIO(!folio_test_private(folio), folio);
                return false;
        }
 
@@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
         * Reference snap context in folio->private.  Also set
         * PagePrivate so that we get invalidate_folio callback.
         */
-       BUG_ON(folio_get_private(folio));
+       VM_BUG_ON_FOLIO(folio_test_private(folio), folio);
        folio_attach_private(folio, snapc);
 
        return ceph_fscache_dirty_folio(mapping, folio);
@@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
        }
 
        WARN_ON(!folio_test_locked(folio));
-       if (folio_get_private(folio)) {
+       if (folio_test_private(folio)) {
                dout("%p invalidate_folio idx %lu full dirty page\n",
                     inode, folio->index);
 
@@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req)
 
        /* clean all pages */
        for (i = 0; i < req->r_num_ops; i++) {
-               if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
+               if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) {
+                       pr_warn("%s incorrect op %d req %p index %d tid %llu\n",
+                               __func__, req->r_ops[i].op, req, i, req->r_tid);
                        break;
+               }
 
                osd_data = osd_req_op_extent_osd_data(req, i);
                BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
index f1ad6884d4dafdfb7403b26646274211e34f91eb..5c14ef04e474247e3d98fbdacfef017c5950d217 100644 (file)
@@ -2274,6 +2274,8 @@ retry:
                        list_for_each_entry(req, &ci->i_unsafe_dirops,
                                            r_unsafe_dir_item) {
                                s = req->r_session;
+                               if (!s)
+                                       continue;
                                if (unlikely(s->s_mds >= max_sessions)) {
                                        spin_unlock(&ci->i_unsafe_lock);
                                        for (i = 0; i < max_sessions; i++) {
@@ -2294,6 +2296,8 @@ retry:
                        list_for_each_entry(req, &ci->i_unsafe_iops,
                                            r_unsafe_target_item) {
                                s = req->r_session;
+                               if (!s)
+                                       continue;
                                if (unlikely(s->s_mds >= max_sessions)) {
                                        spin_unlock(&ci->i_unsafe_lock);
                                        for (i = 0; i < max_sessions; i++) {
@@ -3870,6 +3874,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
        dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
             inode, ci, mds, mseq, target);
 retry:
+       down_read(&mdsc->snap_rwsem);
        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ci, mds);
        if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
@@ -3933,6 +3938,7 @@ retry:
        }
 
        spin_unlock(&ci->i_ceph_lock);
+       up_read(&mdsc->snap_rwsem);
        mutex_unlock(&session->s_mutex);
 
        /* open target session */
@@ -3958,6 +3964,7 @@ retry:
 
 out_unlock:
        spin_unlock(&ci->i_ceph_lock);
+       up_read(&mdsc->snap_rwsem);
        mutex_unlock(&session->s_mutex);
        if (tsession) {
                mutex_unlock(&tsession->s_mutex);
index 6c9e837aa1d3d183d507d72c21f5a44aa1e54522..8c8226c0feaccce31395d349d2bc266b6497b033 100644 (file)
@@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
        iinfo.change_attr = 1;
        ceph_encode_timespec64(&iinfo.btime, &now);
 
-       iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
-       iinfo.xattr_data = xattr_buf;
-       memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+       if (req->r_pagelist) {
+               iinfo.xattr_len = req->r_pagelist->length;
+               iinfo.xattr_data = req->r_pagelist->mapped_tail;
+       } else {
+               /* fake it */
+               iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
+               iinfo.xattr_data = xattr_buf;
+               memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+       }
 
        in.ino = cpu_to_le64(vino.ino);
        in.snapid = cpu_to_le64(CEPH_NOSNAP);
@@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                err = ceph_security_init_secctx(dentry, mode, &as_ctx);
                if (err < 0)
                        goto out_ctx;
+               /* Async create can't handle more than a page of xattrs */
+               if (as_ctx.pagelist &&
+                   !list_is_singular(&as_ctx.pagelist->head))
+                       try_async = false;
        } else if (!d_in_lookup(dentry)) {
                /* If it's not being looked up, it's negative */
                return -ENOENT;
index fa38c013126d4fcaa89c8284175284b66f3b7fcc..00c3de177dd66f5eb90451558fcfbf9e7a67a240 100644 (file)
@@ -4434,8 +4434,6 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 
 bool check_session_state(struct ceph_mds_session *s)
 {
-       struct ceph_fs_client *fsc = s->s_mdsc->fsc;
-
        switch (s->s_state) {
        case CEPH_MDS_SESSION_OPEN:
                if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
@@ -4444,10 +4442,6 @@ bool check_session_state(struct ceph_mds_session *s)
                }
                break;
        case CEPH_MDS_SESSION_CLOSING:
-               /* Should never reach this when not force unmounting */
-               WARN_ON_ONCE(s->s_ttl &&
-                            READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
-               fallthrough;
        case CEPH_MDS_SESSION_NEW:
        case CEPH_MDS_SESSION_RESTARTING:
        case CEPH_MDS_SESSION_CLOSED:
index a47fa44b6d52bd320deba3e4fffa37c303468bcb..2b1a1c029c75ec19376a7250fb806b516e59ad03 100644 (file)
@@ -266,22 +266,24 @@ static void cifs_kill_sb(struct super_block *sb)
         * before we kill the sb.
         */
        if (cifs_sb->root) {
+               for (node = rb_first(root); node; node = rb_next(node)) {
+                       tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+                       tcon = tlink_tcon(tlink);
+                       if (IS_ERR(tcon))
+                               continue;
+                       cfid = &tcon->crfid;
+                       mutex_lock(&cfid->fid_mutex);
+                       if (cfid->dentry) {
+                               dput(cfid->dentry);
+                               cfid->dentry = NULL;
+                       }
+                       mutex_unlock(&cfid->fid_mutex);
+               }
+
+               /* finally release root dentry */
                dput(cifs_sb->root);
                cifs_sb->root = NULL;
        }
-       node = rb_first(root);
-       while (node != NULL) {
-               tlink = rb_entry(node, struct tcon_link, tl_rbnode);
-               tcon = tlink_tcon(tlink);
-               cfid = &tcon->crfid;
-               mutex_lock(&cfid->fid_mutex);
-               if (cfid->dentry) {
-                       dput(cfid->dentry);
-                       cfid->dentry = NULL;
-               }
-               mutex_unlock(&cfid->fid_mutex);
-               node = rb_next(node);
-       }
 
        kill_anon_super(sb);
        cifs_umount(cifs_sb);
@@ -944,7 +946,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t rc;
        struct inode *inode = file_inode(iocb->ki_filp);
 
-       if (iocb->ki_filp->f_flags & O_DIRECT)
+       if (iocb->ki_flags & IOCB_DIRECT)
                return cifs_user_readv(iocb, iter);
 
        rc = cifs_revalidate_mapping(inode);
index 15a5c5db038b8230cd36afab81274096f1e21db2..c0542bdcd06bcc80dd61c2034ec36b0d64ea2266 100644 (file)
@@ -153,5 +153,5 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 #define SMB3_PRODUCT_BUILD 35
-#define CIFS_VERSION   "2.35"
+#define CIFS_VERSION   "2.36"
 #endif                         /* _CIFSFS_H */
index ee3b7c15e884c25609fb3d964167b6d5da1f1384..42e14f408856d6e6d40f6ecaada3e350f893b4da 100644 (file)
@@ -453,9 +453,7 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_
        return rc;
 }
 
-static int
-reconnect_dfs_server(struct TCP_Server_Info *server,
-                    bool mark_smb_session)
+static int reconnect_dfs_server(struct TCP_Server_Info *server)
 {
        int rc = 0;
        const char *refpath = server->current_fullpath + 1;
@@ -479,7 +477,12 @@ reconnect_dfs_server(struct TCP_Server_Info *server,
        if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
                return 0;
 
-       cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
+       /*
+        * Unconditionally mark all sessions & tcons for reconnect as we might be connecting to a
+        * different server or share during failover.  It could be improved by adding some logic to
+        * only do that in case it connects to a different server or share, though.
+        */
+       cifs_mark_tcp_ses_conns_for_reconnect(server, true);
 
        cifs_abort_connection(server);
 
@@ -531,13 +534,20 @@ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
 {
        /* If tcp session is not an dfs connection, then reconnect to last target server */
        spin_lock(&cifs_tcp_ses_lock);
-       if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
+       if (!server->is_dfs_conn) {
                spin_unlock(&cifs_tcp_ses_lock);
                return __cifs_reconnect(server, mark_smb_session);
        }
        spin_unlock(&cifs_tcp_ses_lock);
 
-       return reconnect_dfs_server(server, mark_smb_session);
+       mutex_lock(&server->refpath_lock);
+       if (!server->origin_fullpath || !server->leaf_fullpath) {
+               mutex_unlock(&server->refpath_lock);
+               return __cifs_reconnect(server, mark_smb_session);
+       }
+       mutex_unlock(&server->refpath_lock);
+
+       return reconnect_dfs_server(server);
 }
 #else
 int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
@@ -1046,7 +1056,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
 
-               trace_smb3_add_credits(server->CurrentMid,
+               trace_smb3_hdr_credits(server->CurrentMid,
                                server->conn_id, server->hostname, scredits,
                                le16_to_cpu(shdr->CreditRequest), in_flight);
                cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
@@ -3672,9 +3682,11 @@ static void setup_server_referral_paths(struct mount_ctx *mnt_ctx)
 {
        struct TCP_Server_Info *server = mnt_ctx->server;
 
+       mutex_lock(&server->refpath_lock);
        server->origin_fullpath = mnt_ctx->origin_fullpath;
        server->leaf_fullpath = mnt_ctx->leaf_fullpath;
        server->current_fullpath = mnt_ctx->leaf_fullpath;
+       mutex_unlock(&server->refpath_lock);
        mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL;
 }
 
@@ -4465,7 +4477,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco
         */
        if (rc && server->current_fullpath != server->origin_fullpath) {
                server->current_fullpath = server->origin_fullpath;
-               cifs_reconnect(tcon->ses->server, true);
+               cifs_signal_cifsd_for_reconnect(server, true);
        }
 
        dfs_cache_free_tgts(tl);
index 30e040da4f096b34480a0e47a85a4f1d2f318dde..956f8e5cf3e7423438b235a99711563c9ccbb205 100644 (file)
@@ -1422,12 +1422,14 @@ static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool
        struct TCP_Server_Info *server = tcon->ses->server;
 
        mutex_lock(&server->refpath_lock);
-       if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
-               __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+       if (server->origin_fullpath) {
+               if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath,
+                                                       server->origin_fullpath))
+                       __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+               __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
+       }
        mutex_unlock(&server->refpath_lock);
 
-       __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
-
        return 0;
 }
 
@@ -1530,11 +1532,14 @@ static void refresh_mounts(struct cifs_ses **sessions)
                list_del_init(&tcon->ulist);
 
                mutex_lock(&server->refpath_lock);
-               if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
-                       __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+               if (server->origin_fullpath) {
+                       if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath,
+                                                               server->origin_fullpath))
+                               __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+                       __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
+               }
                mutex_unlock(&server->refpath_lock);
 
-               __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
                cifs_put_tcon(tcon);
        }
 }
index 852e54ee82c28279c00e6013b19916f3815a8b70..bbdf3281559c8f53c0e2c9d281f8fd5b4921e11b 100644 (file)
@@ -85,6 +85,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
        if (rc != 1)
                return -EINVAL;
 
+       if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
+               return -EINVAL;
+
        rc = symlink_hash(link_len, link_str, md5_hash);
        if (rc) {
                cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
index ebe236b9d9f56e35c1d1b65ad9038421f6b6d894..235aa1b395ebcce0b9ba1bfb42a62909089f3182 100644 (file)
@@ -896,7 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr)
                if (class == ERRSRV && code == ERRbaduid) {
                        cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n",
                                code);
-                       cifs_reconnect(mid->server, false);
+                       cifs_signal_cifsd_for_reconnect(mid->server, false);
                }
        }
 
index c653beb735b89f9aa0d32fdf13a0e8e771622e65..3fe47a88f47d0d0a373d4cb8ce0c13bb57b6b9d2 100644 (file)
@@ -150,16 +150,18 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
                struct smb2_transform_hdr *thdr =
                        (struct smb2_transform_hdr *)buf;
                struct cifs_ses *ses = NULL;
+               struct cifs_ses *iter;
 
                /* decrypt frame now that it is completely read in */
                spin_lock(&cifs_tcp_ses_lock);
-               list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) {
-                       if (ses->Suid == le64_to_cpu(thdr->SessionId))
+               list_for_each_entry(iter, &srvr->smb_ses_list, smb_ses_list) {
+                       if (iter->Suid == le64_to_cpu(thdr->SessionId)) {
+                               ses = iter;
                                break;
+                       }
                }
                spin_unlock(&cifs_tcp_ses_lock);
-               if (list_entry_is_head(ses, &srvr->smb_ses_list,
-                                      smb_ses_list)) {
+               if (!ses) {
                        cifs_dbg(VFS, "no decryption - session id not found\n");
                        return 1;
                }
index db23f5b404bad9706294f6c1279b015250fd7a16..d6aaeff4a30a530864211b79fbda744b87d6b720 100644 (file)
@@ -86,6 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server,
        if (*val > 65000) {
                *val = 65000; /* Don't get near 64K credits, avoid srv bugs */
                pr_warn_once("server overflowed SMB3 credits\n");
+               trace_smb3_overflow_credits(server->CurrentMid,
+                                           server->conn_id, server->hostname, *val,
+                                           add, server->in_flight);
        }
        server->in_flight--;
        if (server->in_flight == 0 &&
@@ -251,7 +254,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
        in_flight = server->in_flight;
        spin_unlock(&server->req_lock);
 
-       trace_smb3_add_credits(server->CurrentMid,
+       trace_smb3_wait_credits(server->CurrentMid,
                        server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
        cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
                        __func__, credits->value, scredits);
@@ -300,7 +303,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
 
-       trace_smb3_add_credits(server->CurrentMid,
+       trace_smb3_adj_credits(server->CurrentMid,
                        server->conn_id, server->hostname, scredits,
                        credits->value - new_val, in_flight);
        cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
@@ -1855,9 +1858,17 @@ smb2_copychunk_range(const unsigned int xid,
        int chunks_copied = 0;
        bool chunk_sizes_updated = false;
        ssize_t bytes_written, total_bytes_written = 0;
+       struct inode *inode;
 
        pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
 
+       /*
+        * We need to flush all unwritten data before we can send the
+        * copychunk ioctl to the server.
+        */
+       inode = d_inode(trgtfile->dentry);
+       filemap_write_and_wait(inode->i_mapping);
+
        if (pcchunk == NULL)
                return -ENOMEM;
 
@@ -2492,7 +2503,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
 
-               trace_smb3_add_credits(server->CurrentMid,
+               trace_smb3_pend_credits(server->CurrentMid,
                                server->conn_id, server->hostname, scredits,
                                le16_to_cpu(shdr->CreditRequest), in_flight);
                cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
index 6cecf302dcfdc2ad0fe15c3affc34d125e38bf1d..bc279616c513a86f14c002b9b29c9e34069820b9 100644 (file)
@@ -1006,6 +1006,13 @@ DEFINE_SMB3_CREDIT_EVENT(credit_timeout);
 DEFINE_SMB3_CREDIT_EVENT(insufficient_credits);
 DEFINE_SMB3_CREDIT_EVENT(too_many_credits);
 DEFINE_SMB3_CREDIT_EVENT(add_credits);
+DEFINE_SMB3_CREDIT_EVENT(adj_credits);
+DEFINE_SMB3_CREDIT_EVENT(hdr_credits);
+DEFINE_SMB3_CREDIT_EVENT(nblk_credits);
+DEFINE_SMB3_CREDIT_EVENT(pend_credits);
+DEFINE_SMB3_CREDIT_EVENT(wait_credits);
+DEFINE_SMB3_CREDIT_EVENT(waitff_credits);
+DEFINE_SMB3_CREDIT_EVENT(overflow_credits);
 DEFINE_SMB3_CREDIT_EVENT(set_credits);
 
 #endif /* _CIFS_TRACE_H */
index eeb1a699bd6f2360198f72eb7642887261feded1..c667e6ddfe2f7c52b4673a12295875f4d83ccb25 100644 (file)
@@ -464,13 +464,12 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
                return -EIO;
        }
 
-       tr_hdr = kmalloc(sizeof(*tr_hdr), GFP_NOFS);
+       tr_hdr = kzalloc(sizeof(*tr_hdr), GFP_NOFS);
        if (!tr_hdr)
                return -ENOMEM;
 
        memset(&cur_rqst[0], 0, sizeof(cur_rqst));
        memset(&iov, 0, sizeof(iov));
-       memset(tr_hdr, 0, sizeof(*tr_hdr));
 
        iov.iov_base = tr_hdr;
        iov.iov_len = sizeof(*tr_hdr);
@@ -542,7 +541,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                in_flight = server->in_flight;
                spin_unlock(&server->req_lock);
 
-               trace_smb3_add_credits(server->CurrentMid,
+               trace_smb3_nblk_credits(server->CurrentMid,
                                server->conn_id, server->hostname, scredits, -1, in_flight);
                cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
                                __func__, 1, scredits);
@@ -648,7 +647,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                        in_flight = server->in_flight;
                        spin_unlock(&server->req_lock);
 
-                       trace_smb3_add_credits(server->CurrentMid,
+                       trace_smb3_waitff_credits(server->CurrentMid,
                                        server->conn_id, server->hostname, scredits,
                                        -(num_credits), in_flight);
                        cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
index aef06e607b4054073f47b8cc4f8f274f903fd1a2..840752006f601aab0c23924425da77ea0e8ae39f 100644 (file)
@@ -1115,11 +1115,10 @@ static inline int drop_refcount(struct dio *dio)
  * individual fields and will generate much worse code. This is important
  * for the whole file.
  */
-static inline ssize_t
-do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
-                     struct block_device *bdev, struct iov_iter *iter,
-                     get_block_t get_block, dio_iodone_t end_io,
-                     dio_submit_t submit_io, int flags)
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+               struct block_device *bdev, struct iov_iter *iter,
+               get_block_t get_block, dio_iodone_t end_io,
+               dio_submit_t submit_io, int flags)
 {
        unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
        unsigned blkbits = i_blkbits;
@@ -1334,29 +1333,6 @@ fail_dio:
        kmem_cache_free(dio_cache, dio);
        return retval;
 }
-
-ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
-                            struct block_device *bdev, struct iov_iter *iter,
-                            get_block_t get_block,
-                            dio_iodone_t end_io, dio_submit_t submit_io,
-                            int flags)
-{
-       /*
-        * The block device state is needed in the end to finally
-        * submit everything.  Since it's likely to be cache cold
-        * prefetch it here as first thing to hide some of the
-        * latency.
-        *
-        * Attempt to prefetch the pieces we likely need later.
-        */
-       prefetch(&bdev->bd_disk->part_tbl);
-       prefetch(bdev->bd_disk->queue);
-       prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
-
-       return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
-                                    end_io, submit_io, flags);
-}
-
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
 static __init int dio_init(void)
index 0ed880f42525b11b28ed93cd8dd38ceee464e373..e6dea6dfca161398c9e38324fc605e3a757ddd0f 100644 (file)
@@ -1066,12 +1066,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
 
        /* wake up the caller thread for sync decompression */
        if (sync) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&io->u.wait.lock, flags);
                if (!atomic_add_return(bios, &io->pending_bios))
-                       wake_up_locked(&io->u.wait);
-               spin_unlock_irqrestore(&io->u.wait.lock, flags);
+                       complete(&io->u.done);
+
                return;
        }
 
@@ -1217,7 +1214,7 @@ jobqueue_init(struct super_block *sb,
        } else {
 fg_out:
                q = fgq;
-               init_waitqueue_head(&fgq->u.wait);
+               init_completion(&fgq->u.done);
                atomic_set(&fgq->pending_bios, 0);
        }
        q->sb = sb;
@@ -1419,8 +1416,7 @@ static void z_erofs_runqueue(struct super_block *sb,
                return;
 
        /* wait until all bios are completed */
-       io_wait_event(io[JQ_SUBMIT].u.wait,
-                     !atomic_read(&io[JQ_SUBMIT].pending_bios));
+       wait_for_completion_io(&io[JQ_SUBMIT].u.done);
 
        /* handle synchronous decompress queue in the caller context */
        z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
index e043216b545f19ecb4b2d250b1e1efb2ca720823..800b11c53f5749ebcd3634a736b242b9fe887e9a 100644 (file)
@@ -97,7 +97,7 @@ struct z_erofs_decompressqueue {
        z_erofs_next_pcluster_t head;
 
        union {
-               wait_queue_head_t wait;
+               struct completion done;
                struct work_struct work;
        } u;
 };
index 2f513005923661d4962817b7b6531f1b6548b7da..20d4e47f57ab2e27310f5da64df61eae95d3c8b3 100644 (file)
@@ -351,21 +351,20 @@ out:
 
 static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg)
 {
-       struct request_queue *q = bdev_get_queue(inode->i_sb->s_bdev);
        struct fstrim_range range;
        int ret = 0;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(inode->i_sb->s_bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
                return -EFAULT;
 
        range.minlen = max_t(unsigned int, range.minlen,
-                               q->limits.discard_granularity);
+                               bdev_discard_granularity(inode->i_sb->s_bdev));
 
        ret = exfat_trim_fs(inode, &range);
        if (ret < 0)
index 8ca21e7917d16a8806c4debb09b1a4e2197a4ac8..be0788ecaf20e894bf00fb7373e3e83f4a09a41e 100644 (file)
@@ -627,13 +627,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
        if (opts->allow_utime == (unsigned short)-1)
                opts->allow_utime = ~opts->fs_dmask & 0022;
 
-       if (opts->discard) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
-               if (!blk_queue_discard(q)) {
-                       exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
-                       opts->discard = 0;
-               }
+       if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) {
+               exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
+               opts->discard = 0;
        }
 
        sb->s_flags |= SB_NODIRATIME;
index 3f87cca49f0ce31bd0dec08893399235b8289585..a743b1e3b89ec2d58e8f427ba345e55801a62a41 100644 (file)
@@ -2273,6 +2273,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
  * Structure of a directory entry
  */
 #define EXT4_NAME_LEN 255
+/*
+ * Base length of the ext4 directory entry excluding the name length
+ */
+#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
 
 struct ext4_dir_entry {
        __le32  inode;                  /* Inode number */
@@ -3032,7 +3036,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
 extern int ext4_break_layouts(struct inode *);
-extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
+extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
 extern void ext4_set_inode_flags(struct inode *, bool init);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
@@ -3064,6 +3068,7 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
                      struct dentry *dentry, struct fileattr *fa);
 int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa);
 extern void ext4_reset_inode_seed(struct inode *inode);
+int ext4_update_overhead(struct super_block *sb);
 
 /* migrate.c */
 extern int ext4_ext_migrate(struct inode *);
index 0d98cf402282cbb5f361ef92a4a19598be348ade..e473fde6b64b4e80c32ed1fafbf5b6187a5ba3fa 100644 (file)
@@ -4500,9 +4500,9 @@ retry:
        return ret > 0 ? ret2 : ret;
 }
 
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
 
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
 
 static long ext4_zero_range(struct file *file, loff_t offset,
                            loff_t len, int mode)
@@ -4574,6 +4574,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
+       ret = file_modified(file);
+       if (ret)
+               goto out_mutex;
+
        /* Preallocate the range including the unaligned edges */
        if (partial_begin || partial_end) {
                ret = ext4_alloc_file_blocks(file,
@@ -4690,7 +4694,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
-               ret = ext4_punch_hole(inode, offset, len);
+               ret = ext4_punch_hole(file, offset, len);
                goto exit;
        }
 
@@ -4699,12 +4703,12 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                goto exit;
 
        if (mode & FALLOC_FL_COLLAPSE_RANGE) {
-               ret = ext4_collapse_range(inode, offset, len);
+               ret = ext4_collapse_range(file, offset, len);
                goto exit;
        }
 
        if (mode & FALLOC_FL_INSERT_RANGE) {
-               ret = ext4_insert_range(inode, offset, len);
+               ret = ext4_insert_range(file, offset, len);
                goto exit;
        }
 
@@ -4740,6 +4744,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
+       ret = file_modified(file);
+       if (ret)
+               goto out;
+
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
        if (ret)
                goto out;
@@ -5241,8 +5249,9 @@ out:
  * This implements the fallocate's collapse range functionality for ext4
  * Returns: 0 and non-zero on error.
  */
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
 {
+       struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        struct address_space *mapping = inode->i_mapping;
        ext4_lblk_t punch_start, punch_stop;
@@ -5294,6 +5303,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        /* Wait for existing dio to complete */
        inode_dio_wait(inode);
 
+       ret = file_modified(file);
+       if (ret)
+               goto out_mutex;
+
        /*
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
@@ -5387,8 +5400,9 @@ out_mutex:
  * by len bytes.
  * Returns 0 on success, error otherwise.
  */
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
 {
+       struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        struct address_space *mapping = inode->i_mapping;
        handle_t *handle;
@@ -5445,6 +5459,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
        /* Wait for existing dio to complete */
        inode_dio_wait(inode);
 
+       ret = file_modified(file);
+       if (ret)
+               goto out_mutex;
+
        /*
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
index 13740f2d0e6109a88bae9f83c0f1ff08bad99bce..646ece9b3455ffc04007f330974e3f2284e01bc1 100644 (file)
@@ -3953,12 +3953,14 @@ int ext4_break_layouts(struct inode *inode)
  * Returns: 0 on success or negative on failure
  */
 
-int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
+       struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t first_block, stop_block;
        struct address_space *mapping = inode->i_mapping;
-       loff_t first_block_offset, last_block_offset;
+       loff_t first_block_offset, last_block_offset, max_length;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        handle_t *handle;
        unsigned int credits;
        int ret = 0, ret2 = 0;
@@ -4001,6 +4003,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                   offset;
        }
 
+       /*
+        * For punch hole the length + offset needs to be within one block
+        * before last range. Adjust the length if it goes beyond that limit.
+        */
+       max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
+       if (offset + length > max_length)
+               length = max_length - offset;
+
        if (offset & (sb->s_blocksize - 1) ||
            (offset + length) & (sb->s_blocksize - 1)) {
                /*
@@ -4016,6 +4026,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
+       ret = file_modified(file);
+       if (ret)
+               goto out_mutex;
+
        /*
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
index 992229ca2d83036f9f388cd8b14874949949f086..4d1d2326eee9a381ef095aea94d8f385ad64d262 100644 (file)
@@ -1044,7 +1044,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
        __u32 flags = 0;
        unsigned int flush_flags = 0;
        struct super_block *sb = file_inode(filp)->i_sb;
-       struct request_queue *q;
 
        if (copy_from_user(&flags, (__u32 __user *)arg,
                                sizeof(__u32)))
@@ -1065,10 +1064,8 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
        if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID)
                return -EINVAL;
 
-       q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev);
-       if (!q)
-               return -ENXIO;
-       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+           !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev))
                return -EOPNOTSUPP;
 
        if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
@@ -1393,14 +1390,13 @@ resizefs_out:
 
        case FITRIM:
        {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(sb->s_bdev))
                        return -EOPNOTSUPP;
 
                /*
@@ -1652,3 +1648,19 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
 }
 #endif
+
+static void set_overhead(struct ext4_super_block *es, const void *arg)
+{
+       es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
+}
+
+int ext4_update_overhead(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (sb_rdonly(sb) || sbi->s_overhead == 0 ||
+           sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))
+               return 0;
+
+       return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead);
+}
index 252c168454c7fcfd83a89a7207351190395dba86..ea653d19f9ec76a1996770bade735713b5989ecc 100644 (file)
@@ -3498,7 +3498,7 @@ int ext4_mb_init(struct super_block *sb)
                spin_lock_init(&lg->lg_prealloc_lock);
        }
 
-       if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+       if (bdev_nonrot(sb->s_bdev))
                sbi->s_mb_max_linear_groups = 0;
        else
                sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
@@ -3629,7 +3629,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
                return __blkdev_issue_discard(sb->s_bdev,
                        (sector_t)discard_block << (sb->s_blocksize_bits - 9),
                        (sector_t)count << (sb->s_blocksize_bits - 9),
-                       GFP_NOFS, 0, biop);
+                       GFP_NOFS, biop);
        } else
                return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 }
@@ -6455,7 +6455,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  */
 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 {
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
+       unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev);
        struct ext4_group_info *grp;
        ext4_group_t group, first_group, last_group;
        ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
@@ -6475,9 +6475,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
            range->len < sb->s_blocksize)
                return -EINVAL;
        /* No point to try to trim less than discard granularity */
-       if (range->minlen < q->limits.discard_granularity) {
+       if (range->minlen < discard_granularity) {
                minlen = EXT4_NUM_B2C(EXT4_SB(sb),
-                       q->limits.discard_granularity >> sb->s_blocksize_bits);
+                               discard_granularity >> sb->s_blocksize_bits);
                if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
                        goto out;
        }
index e37da8d5cd0c1ef779a5bcb92e9c93740801b743..767b4bfe39c38974fb90767ad2bbcf3d0638d345 100644 (file)
@@ -1466,10 +1466,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 
        de = (struct ext4_dir_entry_2 *)search_buf;
        dlimit = search_buf + buf_size;
-       while ((char *) de < dlimit) {
+       while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
                /* this code is executed quadratically often */
                /* do minimal checking `by hand' */
-               if ((char *) de + de->name_len <= dlimit &&
+               if (de->name + de->name_len <= dlimit &&
                    ext4_match(dir, fname, de)) {
                        /* found a match - just to be sure, do
                         * a full check */
index 495ce59fb4ad7781bc44bd8ca0ca5649a63915cd..14695e2b5042ba5e6dbb594ecff5238899c8d955 100644 (file)
@@ -134,8 +134,10 @@ static void ext4_finish_bio(struct bio *bio)
                                continue;
                        }
                        clear_buffer_async_write(bh);
-                       if (bio->bi_status)
+                       if (bio->bi_status) {
+                               set_buffer_write_io_error(bh);
                                buffer_io_error(bh);
+                       }
                } while ((bh = bh->b_this_page) != head);
                spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
                if (!under_io) {
index 81749eaddf4c1212ee3650a3bcf021fbcd532826..6900da973ce2872739571d512b12d21e98d1ebb9 100644 (file)
@@ -1199,20 +1199,25 @@ static void ext4_put_super(struct super_block *sb)
        int aborted = 0;
        int i, err;
 
-       ext4_unregister_li_request(sb);
-       ext4_quota_off_umount(sb);
-
-       flush_work(&sbi->s_error_work);
-       destroy_workqueue(sbi->rsv_conversion_wq);
-       ext4_release_orphan_info(sb);
-
        /*
         * Unregister sysfs before destroying jbd2 journal.
         * Since we could still access attr_journal_task attribute via sysfs
         * path which could have sbi->s_journal->j_task as NULL
+        * Unregister sysfs before flush sbi->s_error_work.
+        * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+        * read metadata verify failed then will queue error work.
+        * flush_stashed_error_work will call start_this_handle may trigger
+        * BUG_ON.
         */
        ext4_unregister_sysfs(sb);
 
+       ext4_unregister_li_request(sb);
+       ext4_quota_off_umount(sb);
+
+       flush_work(&sbi->s_error_work);
+       destroy_workqueue(sbi->rsv_conversion_wq);
+       ext4_release_orphan_info(sb);
+
        if (sbi->s_journal) {
                aborted = is_journal_aborted(sbi->s_journal);
                err = jbd2_journal_destroy(sbi->s_journal);
@@ -4172,9 +4177,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
        ext4_fsblk_t            first_block, last_block, b;
        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
        int                     s, j, count = 0;
+       int                     has_super = ext4_bg_has_super(sb, grp);
 
        if (!ext4_has_feature_bigalloc(sb))
-               return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+               return (has_super + ext4_bg_num_gdb(sb, grp) +
+                       (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
                        sbi->s_itb_per_group + 2);
 
        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@ -5282,9 +5289,18 @@ no_journal:
         * Get the # of file system overhead blocks from the
         * superblock if present.
         */
-       if (es->s_overhead_clusters)
-               sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
-       else {
+       sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+       /* ignore the precalculated value if it is ridiculous */
+       if (sbi->s_overhead > ext4_blocks_count(es))
+               sbi->s_overhead = 0;
+       /*
+        * If the bigalloc feature is not enabled recalculating the
+        * overhead doesn't take long, so we might as well just redo
+        * it to make sure we are using the correct value.
+        */
+       if (!ext4_has_feature_bigalloc(sb))
+               sbi->s_overhead = 0;
+       if (sbi->s_overhead == 0) {
                err = ext4_calculate_overhead(sb);
                if (err)
                        goto failed_mount_wq;
@@ -5458,13 +5474,9 @@ no_journal:
                        goto failed_mount9;
        }
 
-       if (test_opt(sb, DISCARD)) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-               if (!blk_queue_discard(q))
-                       ext4_msg(sb, KERN_WARNING,
-                                "mounting with \"discard\" option, but "
-                                "the device does not support discard");
-       }
+       if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
+               ext4_msg(sb, KERN_WARNING,
+                        "mounting with \"discard\" option, but the device does not support discard");
 
        if (es->s_error_count)
                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -5602,6 +5614,8 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
                ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
                         "Quota mode: %s.", descr, ext4_quota_mode(sb));
 
+       /* Update the s_overhead_clusters if necessary */
+       ext4_update_overhead(sb);
        return 0;
 
 free_sbi:
index f5366feea82dcfce0e33653e4001109b78755b7e..909085a78f9c3f0535d00e8c0bca06cf844d35fb 100644 (file)
@@ -98,9 +98,9 @@ repeat:
        }
 
        if (unlikely(!PageUptodate(page))) {
-               if (page->index == sbi->metapage_eio_ofs &&
-                       sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) {
-                       set_ckpt_flags(sbi, CP_ERROR_FLAG);
+               if (page->index == sbi->metapage_eio_ofs) {
+                       if (sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO)
+                               set_ckpt_flags(sbi, CP_ERROR_FLAG);
                } else {
                        sbi->metapage_eio_ofs = page->index;
                        sbi->metapage_eio_cnt = 0;
index 8e0c2e773c8d92f460622673a5731e66554f19dc..9a1a526f20920bb7dd24fca86724c4c4ecb19fb5 100644 (file)
@@ -388,11 +388,23 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
        return 0;
 }
 
-static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
+static unsigned int f2fs_io_flags(struct f2fs_io_info *fio)
 {
        unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
-       unsigned int fua_flag = io_flag & temp_mask;
-       unsigned int meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+       unsigned int fua_flag, meta_flag, io_flag;
+       unsigned int op_flags = 0;
+
+       if (fio->op != REQ_OP_WRITE)
+               return 0;
+       if (fio->type == DATA)
+               io_flag = fio->sbi->data_io_flag;
+       else if (fio->type == NODE)
+               io_flag = fio->sbi->node_io_flag;
+       else
+               return 0;
+
+       fua_flag = io_flag & temp_mask;
+       meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
 
        /*
         * data/node io flag bits per temp:
@@ -401,9 +413,10 @@ static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
         * Cold | Warm | Hot | Cold | Warm | Hot |
         */
        if ((1 << fio->temp) & meta_flag)
-               fio->op_flags |= REQ_META;
+               op_flags |= REQ_META;
        if ((1 << fio->temp) & fua_flag)
-               fio->op_flags |= REQ_FUA;
+               op_flags |= REQ_FUA;
+       return op_flags;
 }
 
 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
@@ -413,14 +426,10 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
        sector_t sector;
        struct bio *bio;
 
-       if (fio->type == DATA)
-               __attach_io_flag(fio, sbi->data_io_flag);
-       else if (fio->type == NODE)
-               __attach_io_flag(fio, sbi->node_io_flag);
-
        bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
-       bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags, GFP_NOIO,
-                              &f2fs_bioset);
+       bio = bio_alloc_bioset(bdev, npages,
+                               fio->op | fio->op_flags | f2fs_io_flags(fio),
+                               GFP_NOIO, &f2fs_bioset);
        bio->bi_iter.bi_sector = sector;
        if (is_read_io(fio->op)) {
                bio->bi_end_io = f2fs_read_end_io;
index cd1e65bcf0b043d6e892f04522aa97556e8c62f9..2b2b3c87e45e05c5d00cb2d52acf33ad2dc925bd 100644 (file)
@@ -154,7 +154,6 @@ struct f2fs_mount_info {
        int s_jquota_fmt;                       /* Format of quota to use */
 #endif
        /* For which write hints are passed down to block layer */
-       int whint_mode;
        int alloc_mode;                 /* segment allocation policy */
        int fsync_mode;                 /* fsync policy */
        int fs_mode;                    /* fs mode: LFS or ADAPTIVE */
@@ -1333,12 +1332,6 @@ enum {
        FS_MODE_FRAGMENT_BLK,           /* block fragmentation mode */
 };
 
-enum {
-       WHINT_MODE_OFF,         /* not pass down write hints */
-       WHINT_MODE_USER,        /* try to pass down hints given by users */
-       WHINT_MODE_FS,          /* pass down hints with F2FS policy */
-};
-
 enum {
        ALLOC_MODE_DEFAULT,     /* stay default */
        ALLOC_MODE_REUSE,       /* reuse segments as much as possible */
@@ -3657,8 +3650,6 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
 int __init f2fs_create_segment_manager_caches(void);
 void f2fs_destroy_segment_manager_caches(void);
 int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
-enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
-                       enum page_type type, enum temp_type temp);
 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
                        unsigned int segno);
 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
@@ -4381,8 +4372,7 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
 
 static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
 {
-       return blk_queue_discard(bdev_get_queue(bdev)) ||
-              bdev_is_zoned(bdev);
+       return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev);
 }
 
 static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
index 5b89af0f27f053265672a30e595ad4db26a2905c..35b6c720c2bc155211ea024ee1c05a4175350e1d 100644 (file)
@@ -2285,7 +2285,6 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
        struct fstrim_range range;
        int ret;
 
@@ -2304,7 +2303,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
                return ret;
 
        range.minlen = max((unsigned int)range.minlen,
-                               q->limits.discard_granularity);
+                          bdev_discard_granularity(sb->s_bdev));
        ret = f2fs_trim_fs(F2FS_SB(sb), &range);
        mnt_drop_write_file(filp);
        if (ret < 0)
@@ -3686,18 +3685,18 @@ out:
 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
                pgoff_t off, block_t block, block_t len, u32 flags)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
        sector_t sector = SECTOR_FROM_BLOCK(block);
        sector_t nr_sects = SECTOR_FROM_BLOCK(len);
        int ret = 0;
 
-       if (!q)
-               return -ENXIO;
-
-       if (flags & F2FS_TRIM_FILE_DISCARD)
-               ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
-                                               blk_queue_secure_erase(q) ?
-                                               BLKDEV_DISCARD_SECURE : 0);
+       if (flags & F2FS_TRIM_FILE_DISCARD) {
+               if (bdev_max_secure_erase_sectors(bdev))
+                       ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
+                                       GFP_NOFS);
+               else
+                       ret = blkdev_issue_discard(bdev, sector, nr_sects,
+                                       GFP_NOFS);
+       }
 
        if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
                if (IS_ENCRYPTED(inode))
index 71f232dcf3c20665a0dea421797f598a2d77a067..83639238a1fe9ed4225cba8642963b70d29b05ab 100644 (file)
@@ -550,7 +550,8 @@ make_now:
        }
        f2fs_set_inode_flags(inode);
 
-       if (file_should_truncate(inode)) {
+       if (file_should_truncate(inode) &&
+                       !is_sbi_flag_set(sbi, SBI_POR_DOING)) {
                ret = f2fs_truncate(inode);
                if (ret)
                        goto bad_inode;
index 22dfeb9915290120e9dbe190a231715d2bc51617..7225ce09f3ab9a4931f9e272a26e30066efefd40 100644 (file)
@@ -1196,9 +1196,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
                                                unsigned int *issued)
 {
        struct block_device *bdev = dc->bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
        unsigned int max_discard_blocks =
-                       SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+                       SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
                                        &(dcc->fstrim_list) : &(dcc->wait_list);
@@ -1245,7 +1244,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
                err = __blkdev_issue_discard(bdev,
                                        SECTOR_FROM_BLOCK(start),
                                        SECTOR_FROM_BLOCK(len),
-                                       GFP_NOFS, 0, &bio);
+                                       GFP_NOFS, &bio);
 submit:
                if (err) {
                        spin_lock_irqsave(&dc->lock, flags);
@@ -1375,9 +1374,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
        struct discard_cmd *dc;
        struct discard_info di = {0};
        struct rb_node **insert_p = NULL, *insert_parent = NULL;
-       struct request_queue *q = bdev_get_queue(bdev);
        unsigned int max_discard_blocks =
-                       SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+                       SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
        block_t end = lstart + len;
 
        dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
@@ -3243,101 +3241,6 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
        }
 }
 
-/* This returns write hints for each segment type. This hints will be
- * passed down to block layer. There are mapping tables which depend on
- * the mount option 'whint_mode'.
- *
- * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
- *
- * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
- *
- * User                  F2FS                     Block
- * ----                  ----                     -----
- *                       META                     WRITE_LIFE_NOT_SET
- *                       HOT_NODE                 "
- *                       WARM_NODE                "
- *                       COLD_NODE                "
- * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
- * extension list        "                        "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE       "                        "
- * WRITE_LIFE_MEDIUM     "                        "
- * WRITE_LIFE_LONG       "                        "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
- *
- * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
- *
- * User                  F2FS                     Block
- * ----                  ----                     -----
- *                       META                     WRITE_LIFE_MEDIUM;
- *                       HOT_NODE                 WRITE_LIFE_NOT_SET
- *                       WARM_NODE                "
- *                       COLD_NODE                WRITE_LIFE_NONE
- * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
- * extension list        "                        "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
- * WRITE_LIFE_NONE       "                        "
- * WRITE_LIFE_MEDIUM     "                        "
- * WRITE_LIFE_LONG       "                        "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
- */
-
-enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
-                               enum page_type type, enum temp_type temp)
-{
-       if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
-               if (type == DATA) {
-                       if (temp == WARM)
-                               return WRITE_LIFE_NOT_SET;
-                       else if (temp == HOT)
-                               return WRITE_LIFE_SHORT;
-                       else if (temp == COLD)
-                               return WRITE_LIFE_EXTREME;
-               } else {
-                       return WRITE_LIFE_NOT_SET;
-               }
-       } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
-               if (type == DATA) {
-                       if (temp == WARM)
-                               return WRITE_LIFE_LONG;
-                       else if (temp == HOT)
-                               return WRITE_LIFE_SHORT;
-                       else if (temp == COLD)
-                               return WRITE_LIFE_EXTREME;
-               } else if (type == NODE) {
-                       if (temp == WARM || temp == HOT)
-                               return WRITE_LIFE_NOT_SET;
-                       else if (temp == COLD)
-                               return WRITE_LIFE_NONE;
-               } else if (type == META) {
-                       return WRITE_LIFE_MEDIUM;
-               }
-       }
-       return WRITE_LIFE_NOT_SET;
-}
-
 static int __get_segment_type_2(struct f2fs_io_info *fio)
 {
        if (fio->type == DATA)
index ea939db18f88da34f01b8a79dafc0c881f202e23..4368f90571bd6194a992ba0bed1b7a109dcee597 100644 (file)
@@ -138,7 +138,6 @@ enum {
        Opt_jqfmt_vfsold,
        Opt_jqfmt_vfsv0,
        Opt_jqfmt_vfsv1,
-       Opt_whint,
        Opt_alloc,
        Opt_fsync,
        Opt_test_dummy_encryption,
@@ -214,7 +213,6 @@ static match_table_t f2fs_tokens = {
        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
-       {Opt_whint, "whint_mode=%s"},
        {Opt_alloc, "alloc_mode=%s"},
        {Opt_fsync, "fsync_mode=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
@@ -975,22 +973,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        f2fs_info(sbi, "quota operations not supported");
                        break;
 #endif
-               case Opt_whint:
-                       name = match_strdup(&args[0]);
-                       if (!name)
-                               return -ENOMEM;
-                       if (!strcmp(name, "user-based")) {
-                               F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
-                       } else if (!strcmp(name, "off")) {
-                               F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
-                       } else if (!strcmp(name, "fs-based")) {
-                               F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
-                       } else {
-                               kfree(name);
-                               return -EINVAL;
-                       }
-                       kfree(name);
-                       break;
                case Opt_alloc:
                        name = match_strdup(&args[0]);
                        if (!name)
@@ -1328,12 +1310,6 @@ default_check:
                return -EINVAL;
        }
 
-       /* Not pass down write hints if the number of active logs is lesser
-        * than NR_CURSEG_PERSIST_TYPE.
-        */
-       if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
-               F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
-
        if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
                f2fs_err(sbi, "Allow to mount readonly mode only");
                return -EROFS;
@@ -1978,10 +1954,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, ",prjquota");
 #endif
        f2fs_show_quota_options(seq, sbi->sb);
-       if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER)
-               seq_printf(seq, ",whint_mode=%s", "user-based");
-       else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
-               seq_printf(seq, ",whint_mode=%s", "fs-based");
 
        fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
 
@@ -2033,7 +2005,6 @@ static void default_options(struct f2fs_sb_info *sbi)
                F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
 
        F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
-       F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
        F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
        F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
        F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
@@ -2314,8 +2285,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                need_stop_gc = true;
        }
 
-       if (*flags & SB_RDONLY ||
-               F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
+       if (*flags & SB_RDONLY) {
                sync_inodes_sb(sb);
 
                set_sbi_flag(sbi, SBI_IS_DIRTY);
index a5a309fcc7faf6dbdffa07243208e3d53e6c9462..bf91f977debea1968f1b0de7d45d2a3364b71834 100644 (file)
@@ -127,13 +127,12 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
        struct super_block *sb = inode->i_sb;
        struct fstrim_range __user *user_range;
        struct fstrim_range range;
-       struct request_queue *q = bdev_get_queue(sb->s_bdev);
        int err;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(sb->s_bdev))
                return -EOPNOTSUPP;
 
        user_range = (struct fstrim_range __user *)arg;
@@ -141,7 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
                return -EFAULT;
 
        range.minlen = max_t(unsigned int, range.minlen,
-                            q->limits.discard_granularity);
+                            bdev_discard_granularity(sb->s_bdev));
 
        err = fat_trim_fs(inode, &range);
        if (err < 0)
index bf6051bdf1d1d98ccc0ce64dd720734080f28a94..3d1afb95a925a609b797666abb55cd21035f27df 100644 (file)
@@ -1872,13 +1872,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
                goto out_fail;
        }
 
-       if (sbi->options.discard) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-               if (!blk_queue_discard(q))
-                       fat_msg(sb, KERN_WARNING,
-                                       "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-       }
+       if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev))
+               fat_msg(sb, KERN_WARNING,
+                       "mounting with \"discard\" option, but the device does not support discard");
 
        fat_set_state(sb, 1, 0);
        return 0;
index 7d2e692b66a94fcfc609747802517e27489b8a0c..ada8fe814db97d118e10506382e38a02a6c9bc2b 100644 (file)
@@ -412,6 +412,7 @@ void __fput_sync(struct file *file)
 }
 
 EXPORT_SYMBOL(fput);
+EXPORT_SYMBOL(__fput_sync);
 
 void __init files_init(void)
 {
index 591fe9cf1659301372abe7efff2c68a5f76d2d0c..a1074a26e784d227922fcf558048790a87380330 100644 (file)
@@ -1712,6 +1712,10 @@ static int writeback_single_inode(struct inode *inode,
         */
        if (!(inode->i_state & I_DIRTY_ALL))
                inode_cgwb_move_to_attached(inode, wb);
+       else if (!(inode->i_state & I_SYNC_QUEUED) &&
+                (inode->i_state & I_DIRTY))
+               redirty_tail_locked(inode, wb);
+
        spin_unlock(&wb->list_lock);
        inode_sync_complete(inode);
 out:
@@ -1775,11 +1779,12 @@ static long writeback_sb_inodes(struct super_block *sb,
        };
        unsigned long start_time = jiffies;
        long write_chunk;
-       long wrote = 0;  /* count both pages and inodes */
+       long total_wrote = 0;  /* count both pages and inodes */
 
        while (!list_empty(&wb->b_io)) {
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct bdi_writeback *tmp_wb;
+               long wrote;
 
                if (inode->i_sb != sb) {
                        if (work->sb) {
@@ -1855,7 +1860,9 @@ static long writeback_sb_inodes(struct super_block *sb,
 
                wbc_detach_inode(&wbc);
                work->nr_pages -= write_chunk - wbc.nr_to_write;
-               wrote += write_chunk - wbc.nr_to_write;
+               wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
+               wrote = wrote < 0 ? 0 : wrote;
+               total_wrote += wrote;
 
                if (need_resched()) {
                        /*
@@ -1877,7 +1884,7 @@ static long writeback_sb_inodes(struct super_block *sb,
                tmp_wb = inode_to_wb_and_lock_list(inode);
                spin_lock(&inode->i_lock);
                if (!(inode->i_state & I_DIRTY_ALL))
-                       wrote++;
+                       total_wrote++;
                requeue_inode(inode, tmp_wb, &wbc);
                inode_sync_complete(inode);
                spin_unlock(&inode->i_lock);
@@ -1891,14 +1898,14 @@ static long writeback_sb_inodes(struct super_block *sb,
                 * bail out to wb_writeback() often enough to check
                 * background threshold and other termination conditions.
                 */
-               if (wrote) {
+               if (total_wrote) {
                        if (time_is_before_jiffies(start_time + HZ / 10UL))
                                break;
                        if (work->nr_pages <= 0)
                                break;
                }
        }
-       return wrote;
+       return total_wrote;
 }
 
 static long __writeback_inodes_wb(struct bdi_writeback *wb,
index 76316c4a3fb7f08bbbbe4dba8175e6aad6b1b100..b313a978ae0a25cfbf44a866f178b2ecacb96aaa 100644 (file)
@@ -38,6 +38,3 @@ config FSCACHE_DEBUG
          enabled by setting bits in /sys/modules/fscache/parameter/debug.
 
          See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_OLD_API
-       bool
index 2749933852a991c3f3ce58bc03bd7bcba2ac9d29..d645f8b302a27882c86c3c46e134dd5bcbc35cef 100644 (file)
@@ -214,7 +214,7 @@ void fscache_relinquish_cache(struct fscache_cache *cache)
 
        cache->ops = NULL;
        cache->cache_priv = NULL;
-       smp_store_release(&cache->state, FSCACHE_CACHE_IS_NOT_PRESENT);
+       fscache_set_cache_state(cache, FSCACHE_CACHE_IS_NOT_PRESENT);
        fscache_put_cache(cache, where);
 }
 EXPORT_SYMBOL(fscache_relinquish_cache);
index 9bb1ab5fe5ed15078a9d738edec04e852588235f..9d3cf01117093da9aa133bc83066b0338466e74a 100644 (file)
@@ -30,7 +30,7 @@ static DEFINE_SPINLOCK(fscache_cookie_lru_lock);
 DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out);
 static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker);
 static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD";
-unsigned int fscache_lru_cookie_timeout = 10 * HZ;
+static unsigned int fscache_lru_cookie_timeout = 10 * HZ;
 
 void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
 {
@@ -1069,6 +1069,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
 }
 EXPORT_SYMBOL(__fscache_invalidate);
 
+#ifdef CONFIG_PROC_FS
 /*
  * Generate a list of extant cookies in /proc/fs/fscache/cookies
  */
@@ -1145,3 +1146,4 @@ const struct seq_operations fscache_cookies_seq_ops = {
        .stop   = fscache_cookies_seq_stop,
        .show   = fscache_cookies_seq_show,
 };
+#endif
index ed1c9ed737f24ffa53dd456a078a050951cffc67..1336f517e9b1a60a41281f2f9e21d3e9dddcd025 100644 (file)
@@ -56,7 +56,9 @@ static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
  * cookie.c
  */
 extern struct kmem_cache *fscache_cookie_jar;
+#ifdef CONFIG_PROC_FS
 extern const struct seq_operations fscache_cookies_seq_ops;
+#endif
 extern struct timer_list fscache_cookie_lru_timer;
 
 extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
@@ -137,7 +139,9 @@ int fscache_stats_show(struct seq_file *m, void *v);
 /*
  * volume.c
  */
+#ifdef CONFIG_PROC_FS
 extern const struct seq_operations fscache_volumes_seq_ops;
+#endif
 
 struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
                                          enum fscache_volume_trace where);
index c8c7fe9e9a6ec0d9f0df27f4b8f3b84daf4425bf..3af3b08a9bb3f1ba21b249d48586145911bf7863 100644 (file)
@@ -235,8 +235,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
 {
        struct fscache_write_request *wreq = priv;
 
-       fscache_clear_page_bits(fscache_cres_cookie(&wreq->cache_resources),
-                               wreq->mapping, wreq->start, wreq->len,
+       fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
                                wreq->set_bits);
 
        if (wreq->term_func)
@@ -296,7 +295,7 @@ abandon_end:
 abandon_free:
        kfree(wreq);
 abandon:
-       fscache_clear_page_bits(cookie, mapping, start, len, cond);
+       fscache_clear_page_bits(mapping, start, len, cond);
        if (term_func)
                term_func(term_func_priv, ret, false);
 }
index 39080b2d6cf86d26287b6e91b134997b5a1d5dff..b6697333bb2b9e77345f324fb515dc93419f0854 100644 (file)
@@ -1153,13 +1153,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 
        if (length != written && (iomap->flags & IOMAP_F_NEW)) {
                /* Deallocate blocks that were just allocated. */
-               loff_t blockmask = i_blocksize(inode) - 1;
-               loff_t end = (pos + length) & ~blockmask;
+               loff_t hstart = round_up(pos + written, i_blocksize(inode));
+               loff_t hend = iomap->offset + iomap->length;
 
-               pos = (pos + written + blockmask) & ~blockmask;
-               if (pos < end) {
-                       truncate_pagecache_range(inode, pos, end - 1);
-                       punch_hole(ip, pos, end - pos);
+               if (hstart < hend) {
+                       truncate_pagecache_range(inode, hstart, hend - 1);
+                       punch_hole(ip, hstart, hend - hstart);
                }
        }
 
index 22b41acfbbc39a008dddb61d2b66fee883cb06c2..2556ae1f92ea2d9e27d0fefe9c60c09543c6c563 100644 (file)
@@ -770,30 +770,27 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
        return ret ? ret : ret1;
 }
 
-static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
+static inline bool should_fault_in_pages(struct iov_iter *i,
+                                        struct kiocb *iocb,
                                         size_t *prev_count,
                                         size_t *window_size)
 {
        size_t count = iov_iter_count(i);
        size_t size, offs;
 
-       if (likely(!count))
-               return false;
-       if (ret <= 0 && ret != -EFAULT)
+       if (!count)
                return false;
        if (!iter_is_iovec(i))
                return false;
 
        size = PAGE_SIZE;
-       offs = offset_in_page(i->iov[0].iov_base + i->iov_offset);
+       offs = offset_in_page(iocb->ki_pos);
        if (*prev_count != count || !*window_size) {
                size_t nr_dirtied;
 
-               size = ALIGN(offs + count, PAGE_SIZE);
-               size = min_t(size_t, size, SZ_1M);
                nr_dirtied = max(current->nr_dirtied_pause -
                                 current->nr_dirtied, 8);
-               size = min(size, nr_dirtied << PAGE_SHIFT);
+               size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT);
        }
 
        *prev_count = count;
@@ -807,7 +804,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
        struct file *file = iocb->ki_filp;
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
        size_t prev_count = 0, window_size = 0;
-       size_t written = 0;
+       size_t read = 0;
        ssize_t ret;
 
        /*
@@ -835,35 +832,31 @@ retry:
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
        pagefault_disable();
        to->nofault = true;
        ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
-                          IOMAP_DIO_PARTIAL, written);
+                          IOMAP_DIO_PARTIAL, read);
        to->nofault = false;
        pagefault_enable();
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
        if (ret > 0)
-               written = ret;
-
-       if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
-               size_t leftover;
+               read = ret;
 
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_writeable(to, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
+       if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               window_size -= fault_in_iov_iter_writeable(to, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
+out_unlock:
        if (gfs2_holder_queued(gh))
                gfs2_glock_dq(gh);
 out_uninit:
        gfs2_holder_uninit(gh);
        if (ret < 0)
                return ret;
-       return written;
+       return read;
 }
 
 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -873,7 +866,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
        struct inode *inode = file->f_mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
        size_t prev_count = 0, window_size = 0;
-       size_t read = 0;
+       size_t written = 0;
        ssize_t ret;
 
        /*
@@ -899,41 +892,37 @@ retry:
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
        /* Silently fall back to buffered I/O when writing beyond EOF */
        if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
-               goto out;
+               goto out_unlock;
 
        from->nofault = true;
        ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
-                          IOMAP_DIO_PARTIAL, read);
+                          IOMAP_DIO_PARTIAL, written);
        from->nofault = false;
-
-       if (ret == -ENOTBLK)
-               ret = 0;
+       if (ret <= 0) {
+               if (ret == -ENOTBLK)
+                       ret = 0;
+               if (ret != -EFAULT)
+                       goto out_unlock;
+       }
        if (ret > 0)
-               read = ret;
-
-       if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
-               size_t leftover;
+               written = ret;
 
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_readable(from, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               window_size -= fault_in_iov_iter_readable(from, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
-out:
+out_unlock:
        if (gfs2_holder_queued(gh))
                gfs2_glock_dq(gh);
 out_uninit:
        gfs2_holder_uninit(gh);
        if (ret < 0)
                return ret;
-       return read;
+       return written;
 }
 
 static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -941,7 +930,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        struct gfs2_inode *ip;
        struct gfs2_holder gh;
        size_t prev_count = 0, window_size = 0;
-       size_t written = 0;
+       size_t read = 0;
        ssize_t ret;
 
        /*
@@ -962,7 +951,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (ret >= 0) {
                if (!iov_iter_count(to))
                        return ret;
-               written = ret;
+               read = ret;
        } else if (ret != -EFAULT) {
                if (ret != -EAGAIN)
                        return ret;
@@ -975,32 +964,26 @@ retry:
        ret = gfs2_glock_nq(&gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
        pagefault_disable();
        ret = generic_file_read_iter(iocb, to);
        pagefault_enable();
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
        if (ret > 0)
-               written += ret;
+               read += ret;
 
-       if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
-               size_t leftover;
-
-               gfs2_holder_allow_demote(&gh);
-               leftover = fault_in_iov_iter_writeable(to, window_size);
-               gfs2_holder_disallow_demote(&gh);
-               if (leftover != window_size) {
-                       if (gfs2_holder_queued(&gh))
-                               goto retry_under_glock;
-                       if (written)
-                               goto out_uninit;
+       if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(&gh);
+               window_size -= fault_in_iov_iter_writeable(to, window_size);
+               if (window_size)
                        goto retry;
-               }
        }
+out_unlock:
        if (gfs2_holder_queued(&gh))
                gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
-       return written ? written : ret;
+       return read ? read : ret;
 }
 
 static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
@@ -1014,7 +997,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
        struct gfs2_holder *statfs_gh = NULL;
        size_t prev_count = 0, window_size = 0;
        size_t orig_count = iov_iter_count(from);
-       size_t read = 0;
+       size_t written = 0;
        ssize_t ret;
 
        /*
@@ -1032,10 +1015,18 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
 
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
 retry:
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               window_size -= fault_in_iov_iter_readable(from, window_size);
+               if (!window_size) {
+                       ret = -EFAULT;
+                       goto out_uninit;
+               }
+               from->count = min(from->count, window_size);
+       }
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
-retry_under_glock:
+
        if (inode == sdp->sd_rindex) {
                struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 
@@ -1052,27 +1043,19 @@ retry_under_glock:
        current->backing_dev_info = NULL;
        if (ret > 0) {
                iocb->ki_pos += ret;
-               read += ret;
+               written += ret;
        }
 
        if (inode == sdp->sd_rindex)
                gfs2_glock_dq_uninit(statfs_gh);
 
-       from->count = orig_count - read;
-       if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
-               size_t leftover;
-
-               gfs2_holder_allow_demote(gh);
-               leftover = fault_in_iov_iter_readable(from, window_size);
-               gfs2_holder_disallow_demote(gh);
-               if (leftover != window_size) {
-                       from->count = min(from->count, window_size - leftover);
-                       if (gfs2_holder_queued(gh))
-                               goto retry_under_glock;
-                       if (read && !(iocb->ki_flags & IOCB_DIRECT))
-                               goto out_uninit;
-                       goto retry;
-               }
+       if (ret <= 0 && ret != -EFAULT)
+               goto out_unlock;
+
+       from->count = orig_count - written;
+       if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+               gfs2_glock_dq(gh);
+               goto retry;
        }
 out_unlock:
        if (gfs2_holder_queued(gh))
@@ -1081,8 +1064,8 @@ out_uninit:
        gfs2_holder_uninit(gh);
        if (statfs_gh)
                kfree(statfs_gh);
-       from->count = orig_count - read;
-       return read ? read : ret;
+       from->count = orig_count - written;
+       return written ? written : ret;
 }
 
 /**
index 801ad9f4f2bef9cc4b0ec335cc794a6c572d733f..6d26bb52548448a67170b3150b32773afd3cb45f 100644 (file)
@@ -1386,7 +1386,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 {
        struct inode *inode = file_inode(filp);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
+       struct block_device *bdev = sdp->sd_vfs->s_bdev;
        struct buffer_head *bh;
        struct gfs2_rgrpd *rgd;
        struct gfs2_rgrpd *rgd_end;
@@ -1405,7 +1405,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                return -EROFS;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&r, argp, sizeof(r)))
@@ -1418,8 +1418,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        start = r.start >> bs_shift;
        end = start + (r.len >> bs_shift);
        minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
-       minlen = max_t(u64, minlen,
-                      q->limits.discard_granularity) >> bs_shift;
+       minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift;
 
        if (end <= start || minlen > sdp->sd_max_rg_data)
                return -EINVAL;
index 99c7477cee5c2b003934a86c4bec9d4a75fa08d9..dd3a088db11d1e9b501f087bb3e87426b666312d 100644 (file)
@@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
        info.flags = 0;
        info.length = len;
        info.low_limit = current->mm->mmap_base;
-       info.high_limit = TASK_SIZE;
+       info.high_limit = arch_get_mmap_end(addr);
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        return vm_unmapped_area(&info);
@@ -222,7 +222,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
        info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-       info.high_limit = current->mm->mmap_base;
+       info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
@@ -237,7 +237,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = current->mm->mmap_base;
-               info.high_limit = TASK_SIZE;
+               info.high_limit = arch_get_mmap_end(addr);
                addr = vm_unmapped_area(&info);
        }
 
@@ -251,6 +251,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        struct hstate *h = hstate_file(file);
+       const unsigned long mmap_end = arch_get_mmap_end(addr);
 
        if (len & ~huge_page_mask(h))
                return -EINVAL;
@@ -266,7 +267,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        if (addr) {
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
-               if (TASK_SIZE - len >= addr &&
+               if (mmap_end - len >= addr &&
                    (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
index 08503dc68d2b1ef7f2cc0f6b0986654036dcdc1a..9a6c233ee7f12cc312b4fffb5babeb722401452b 100644 (file)
@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
                         struct pipe_inode_info *opipe,
                         loff_t *offset,
                         size_t len, unsigned int flags);
+
+/*
+ * fs/xattr.c:
+ */
+struct xattr_name {
+       char name[XATTR_NAME_MAX + 1];
+};
+
+struct xattr_ctx {
+       /* Value of attribute */
+       union {
+               const void __user *cvalue;
+               void __user *value;
+       };
+       void *kvalue;
+       size_t size;
+       /* Attribute name */
+       struct xattr_name *kname;
+       unsigned int flags;
+};
+
+
+ssize_t do_getxattr(struct user_namespace *mnt_userns,
+                   struct dentry *d,
+                   struct xattr_ctx *ctx);
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct xattr_ctx *ctx);
index 32aeb2c581c580fa26cf5264b332beb487b0d47f..824623bcf1a53f78e67370a947e063c4245c8c13 100644 (file)
@@ -871,7 +871,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
 
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
-       set_notify_signal(worker->task);
+       __set_notify_signal(worker->task);
        wake_up_process(worker->task);
        return false;
 }
@@ -991,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
 {
        if (work && match->fn(work, match->data)) {
                work->flags |= IO_WQ_WORK_CANCEL;
-               set_notify_signal(worker->task);
+               __set_notify_signal(worker->task);
                return true;
        }
 
index dbecd27656c7ccd79f1d738f40a1c9feebd147ad..ba6eee76d028f64ff932c94b1de3976994d14227 100644 (file)
@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
 struct io_wq_work {
        struct io_wq_work_node list;
        unsigned flags;
+       int cancel_seq;
 };
 
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
index a8413f0064170c5a6197ac6c5fd59e6cefd45cbf..9f1c682d7caf22b6181a6dfeda59271a7c3ca26c 100644 (file)
@@ -63,7 +63,6 @@
 #include <net/sock.h>
 #include <net/af_unix.h>
 #include <net/scm.h>
-#include <net/busy_poll.h>
 #include <linux/anon_inodes.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
@@ -81,6 +80,7 @@
 #include <linux/io_uring.h>
 #include <linux/audit.h>
 #include <linux/security.h>
+#include <linux/xattr.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -95,7 +95,7 @@
 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
 
 /* only define max */
-#define IORING_MAX_FIXED_FILES (1U << 15)
+#define IORING_MAX_FIXED_FILES (1U << 20)
 #define IORING_MAX_RESTRICTIONS        (IORING_RESTRICTION_LAST + \
                                 IORING_REGISTER_LAST + IORING_OP_LAST)
 
                        IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
 
 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
-                               REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
-                               REQ_F_ASYNC_DATA)
+                               REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
+
+#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
+                                IO_REQ_CLEAN_FLAGS)
+
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
 
 #define IO_TCTX_REFS_CACHE_NR  (1U << 10)
 
@@ -168,7 +172,7 @@ struct io_rings {
         * The application needs a full memory barrier before checking
         * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
         */
-       u32                     sq_flags;
+       atomic_t                sq_flags;
        /*
         * Runtime CQ flags
         *
@@ -200,13 +204,6 @@ struct io_rings {
        struct io_uring_cqe     cqes[] ____cacheline_aligned_in_smp;
 };
 
-enum io_uring_cmd_flags {
-       IO_URING_F_COMPLETE_DEFER       = 1,
-       IO_URING_F_UNLOCKED             = 2,
-       /* int's last bit, sign checks are usually faster than a bit test */
-       IO_URING_F_NONBLOCK             = INT_MIN,
-};
-
 struct io_mapped_ubuf {
        u64             ubuf;
        u64             ubuf_end;
@@ -218,10 +215,27 @@ struct io_mapped_ubuf {
 struct io_ring_ctx;
 
 struct io_overflow_cqe {
-       struct io_uring_cqe cqe;
        struct list_head list;
+       struct io_uring_cqe cqe;
 };
 
+/*
+ * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
+ * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we
+ * can't safely always dereference the file when the task has exited and ring
+ * cleanup is done. If a file is tracked and part of SCM, then unix gc on
+ * process exit may reap it before __io_sqe_files_unregister() is run.
+ */
+#define FFS_NOWAIT             0x1UL
+#define FFS_ISREG              0x2UL
+#if defined(CONFIG_64BIT)
+#define FFS_SCM                        0x4UL
+#else
+#define IO_URING_SCM_ALL
+#define FFS_SCM                        0x0UL
+#endif
+#define FFS_MASK               ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM)
+
 struct io_fixed_file {
        /* file * with additional FFS_* flags */
        unsigned long file_ptr;
@@ -239,6 +253,8 @@ struct io_rsrc_put {
 
 struct io_file_table {
        struct io_fixed_file *files;
+       unsigned long *bitmap;
+       unsigned int alloc_hint;
 };
 
 struct io_rsrc_node {
@@ -263,10 +279,26 @@ struct io_rsrc_data {
        bool                            quiesce;
 };
 
+#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
 struct io_buffer_list {
-       struct list_head list;
-       struct list_head buf_list;
+       /*
+        * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not,
+        * then these are classic provided buffers and ->buf_list is used.
+        */
+       union {
+               struct list_head buf_list;
+               struct {
+                       struct page **buf_pages;
+                       struct io_uring_buf_ring *buf_ring;
+               };
+       };
        __u16 bgid;
+
+       /* below is for ring provided buffers */
+       __u16 buf_nr_pages;
+       __u16 nr_entries;
+       __u32 head;
+       __u32 mask;
 };
 
 struct io_buffer {
@@ -339,7 +371,7 @@ struct io_ev_fd {
        struct rcu_head         rcu;
 };
 
-#define IO_BUFFERS_HASH_BITS   5
+#define BGID_ARRAY     64
 
 struct io_ring_ctx {
        /* const or read-mostly hot data */
@@ -348,6 +380,7 @@ struct io_ring_ctx {
 
                struct io_rings         *rings;
                unsigned int            flags;
+               enum task_work_notify_mode      notify_method;
                unsigned int            compat: 1;
                unsigned int            drain_next: 1;
                unsigned int            restricted: 1;
@@ -355,6 +388,7 @@ struct io_ring_ctx {
                unsigned int            drain_active: 1;
                unsigned int            drain_disabled: 1;
                unsigned int            has_evfd: 1;
+               unsigned int            syscall_iopoll: 1;
        } ____cacheline_aligned_in_smp;
 
        /* submission data */
@@ -384,17 +418,21 @@ struct io_ring_ctx {
                 */
                struct io_rsrc_node     *rsrc_node;
                int                     rsrc_cached_refs;
+               atomic_t                cancel_seq;
                struct io_file_table    file_table;
                unsigned                nr_user_files;
                unsigned                nr_user_bufs;
                struct io_mapped_ubuf   **user_bufs;
 
                struct io_submit_state  submit_state;
+
+               struct io_buffer_list   *io_bl;
+               struct xarray           io_bl_xa;
+               struct list_head        io_buffers_cache;
+
                struct list_head        timeout_list;
                struct list_head        ltimeout_list;
                struct list_head        cq_overflow_list;
-               struct list_head        *io_buffers;
-               struct list_head        io_buffers_cache;
                struct list_head        apoll_cache;
                struct xarray           personalities;
                u32                     pers_next;
@@ -411,14 +449,16 @@ struct io_ring_ctx {
        struct wait_queue_head  sqo_sq_wait;
        struct list_head        sqd_list;
 
-       unsigned long           check_cq_overflow;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       /* used to track busy poll napi_id */
-       struct list_head        napi_list;
-       spinlock_t              napi_lock;      /* napi_list lock */
-#endif
+       unsigned long           check_cq;
 
        struct {
+               /*
+                * We cache a range of free CQEs we can use, once exhausted it
+                * should go through a slower range setup, see __io_get_cqe()
+                */
+               struct io_uring_cqe     *cqe_cached;
+               struct io_uring_cqe     *cqe_sentinel;
+
                unsigned                cached_cq_tail;
                unsigned                cq_entries;
                struct io_ev_fd __rcu   *io_ev_fd;
@@ -500,12 +540,11 @@ struct io_uring_task {
        const struct io_ring_ctx *last;
        struct io_wq            *io_wq;
        struct percpu_counter   inflight;
-       atomic_t                inflight_tracked;
        atomic_t                in_idle;
 
        spinlock_t              task_lock;
        struct io_wq_work_list  task_list;
-       struct io_wq_work_list  prior_task_list;
+       struct io_wq_work_list  prio_task_list;
        struct callback_head    task_work;
        struct file             **registered_rings;
        bool                    task_running;
@@ -554,6 +593,16 @@ struct io_accept {
        unsigned long                   nofile;
 };
 
+struct io_socket {
+       struct file                     *file;
+       int                             domain;
+       int                             type;
+       int                             protocol;
+       int                             flags;
+       u32                             file_slot;
+       unsigned long                   nofile;
+};
+
 struct io_sync {
        struct file                     *file;
        loff_t                          len;
@@ -565,6 +614,8 @@ struct io_sync {
 struct io_cancel {
        struct file                     *file;
        u64                             addr;
+       u32                             flags;
+       s32                             fd;
 };
 
 struct io_timeout {
@@ -592,7 +643,8 @@ struct io_rw {
        /* NOTE: kiocb has the file as the first member, so don't do it here */
        struct kiocb                    kiocb;
        u64                             addr;
-       u64                             len;
+       u32                             len;
+       rwf_t                           flags;
 };
 
 struct io_connect {
@@ -609,9 +661,9 @@ struct io_sr_msg {
                void __user                     *buf;
        };
        int                             msg_flags;
-       int                             bgid;
        size_t                          len;
        size_t                          done_io;
+       unsigned int                    flags;
 };
 
 struct io_open {
@@ -654,10 +706,10 @@ struct io_epoll {
 
 struct io_splice {
        struct file                     *file_out;
-       struct file                     *file_in;
        loff_t                          off_out;
        loff_t                          off_in;
        u64                             len;
+       int                             splice_fd_in;
        unsigned int                    flags;
 };
 
@@ -729,6 +781,12 @@ struct io_msg {
        u32 len;
 };
 
+struct io_nop {
+       struct file                     *file;
+       u64                             extra1;
+       u64                             extra2;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -755,6 +813,12 @@ struct io_async_rw {
        struct wait_page_queue          wpq;
 };
 
+struct io_xattr {
+       struct file                     *file;
+       struct xattr_ctx                ctx;
+       struct filename                 *filename;
+};
+
 enum {
        REQ_F_FIXED_FILE_BIT    = IOSQE_FIXED_FILE_BIT,
        REQ_F_IO_DRAIN_BIT      = IOSQE_IO_DRAIN_BIT,
@@ -773,6 +837,7 @@ enum {
        REQ_F_NEED_CLEANUP_BIT,
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
+       REQ_F_BUFFER_RING_BIT,
        REQ_F_COMPLETE_INLINE_BIT,
        REQ_F_REISSUE_BIT,
        REQ_F_CREDS_BIT,
@@ -783,6 +848,7 @@ enum {
        REQ_F_SINGLE_POLL_BIT,
        REQ_F_DOUBLE_POLL_BIT,
        REQ_F_PARTIAL_IO_BIT,
+       REQ_F_APOLL_MULTISHOT_BIT,
        /* keep async read/write and isreg together and in order */
        REQ_F_SUPPORT_NOWAIT_BIT,
        REQ_F_ISREG_BIT,
@@ -823,6 +889,8 @@ enum {
        REQ_F_POLLED            = BIT(REQ_F_POLLED_BIT),
        /* buffer already selected */
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
+       /* buffer selected from ring, needs commit */
+       REQ_F_BUFFER_RING       = BIT(REQ_F_BUFFER_RING_BIT),
        /* completion is deferred through io_comp_state */
        REQ_F_COMPLETE_INLINE   = BIT(REQ_F_COMPLETE_INLINE_BIT),
        /* caller should reissue async */
@@ -847,6 +915,8 @@ enum {
        REQ_F_DOUBLE_POLL       = BIT(REQ_F_DOUBLE_POLL_BIT),
        /* request has already done partial IO */
        REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
+       /* fast poll multishot mode */
+       REQ_F_APOLL_MULTISHOT   = BIT(REQ_F_APOLL_MULTISHOT_BIT),
 };
 
 struct async_poll {
@@ -869,6 +939,21 @@ enum {
        IORING_RSRC_BUFFER              = 1,
 };
 
+struct io_cqe {
+       __u64   user_data;
+       __s32   res;
+       /* fd initially, then cflags for completion */
+       union {
+               __u32   flags;
+               int     fd;
+       };
+};
+
+enum {
+       IO_CHECK_CQ_OVERFLOW_BIT,
+       IO_CHECK_CQ_DROPPED_BIT,
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -904,38 +989,65 @@ struct io_kiocb {
                struct io_symlink       symlink;
                struct io_hardlink      hardlink;
                struct io_msg           msg;
+               struct io_xattr         xattr;
+               struct io_socket        sock;
+               struct io_nop           nop;
+               struct io_uring_cmd     uring_cmd;
        };
 
        u8                              opcode;
        /* polled IO has completed */
        u8                              iopoll_completed;
+       /*
+        * Can be either a fixed buffer index, or used with provided buffers.
+        * For the latter, before issue it points to the buffer group ID,
+        * and after selection it points to the buffer ID itself.
+        */
        u16                             buf_index;
        unsigned int                    flags;
 
-       u64                             user_data;
-       u32                             result;
-       u32                             cflags;
+       struct io_cqe                   cqe;
 
        struct io_ring_ctx              *ctx;
        struct task_struct              *task;
 
-       struct percpu_ref               *fixed_rsrc_refs;
-       /* store used ubuf, so we can prevent reloading */
-       struct io_mapped_ubuf           *imu;
+       struct io_rsrc_node             *rsrc_node;
+
+       union {
+               /* store used ubuf, so we can prevent reloading */
+               struct io_mapped_ubuf   *imu;
+
+               /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+               struct io_buffer        *kbuf;
 
-       /* used by request caches, completion batching and iopoll */
-       struct io_wq_work_node          comp_list;
+               /*
+                * stores buffer ID for ring provided buffers, valid IFF
+                * REQ_F_BUFFER_RING is set.
+                */
+               struct io_buffer_list   *buf_list;
+       };
+
+       union {
+               /* used by request caches, completion batching and iopoll */
+               struct io_wq_work_node  comp_list;
+               /* cache ->apoll->events */
+               __poll_t apoll_events;
+       };
        atomic_t                        refs;
        atomic_t                        poll_refs;
        struct io_task_work             io_task_work;
        /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
-       struct hlist_node               hash_node;
+       union {
+               struct hlist_node       hash_node;
+               struct {
+                       u64             extra1;
+                       u64             extra2;
+               };
+       };
        /* internal polling, see IORING_FEAT_FAST_POLL */
        struct async_poll               *apoll;
        /* opcode allocated if it needs to store data for async defer */
        void                            *async_data;
-       /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
-       struct io_buffer                *kbuf;
        /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */
        struct io_kiocb                 *link;
        /* custom credentials, valid IFF REQ_F_CREDS is set */
@@ -955,6 +1067,24 @@ struct io_defer_entry {
        u32                     seq;
 };
 
+struct io_cancel_data {
+       struct io_ring_ctx *ctx;
+       union {
+               u64 data;
+               struct file *file;
+       };
+       u32 flags;
+       int seq;
+};
+
+/*
+ * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
+ * the following sqe if SQE128 is used.
+ */
+#define uring_cmd_pdu_size(is_sqe128)                          \
+       ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -    \
+               offsetof(struct io_uring_sqe, cmd))
+
 struct io_op_def {
        /* needs req->file assigned */
        unsigned                needs_file : 1;
@@ -976,12 +1106,20 @@ struct io_op_def {
        unsigned                not_supported : 1;
        /* skip auditing */
        unsigned                audit_skip : 1;
+       /* supports ioprio */
+       unsigned                ioprio : 1;
+       /* supports iopoll */
+       unsigned                iopoll : 1;
        /* size of async data needed, if any */
        unsigned short          async_size;
 };
 
 static const struct io_op_def io_op_defs[] = {
-       [IORING_OP_NOP] = {},
+       [IORING_OP_NOP] = {
+               .audit_skip             = 1,
+               .iopoll                 = 1,
+               .buffer_select          = 1,
+       },
        [IORING_OP_READV] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
@@ -990,6 +1128,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_setup      = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITEV] = {
@@ -1000,6 +1140,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_setup      = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_FSYNC] = {
@@ -1012,6 +1154,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollin                 = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITE_FIXED] = {
@@ -1021,6 +1165,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_POLL_ADD] = {
@@ -1063,6 +1209,7 @@ static const struct io_op_def io_op_defs[] = {
                .unbound_nonreg_file    = 1,
                .pollin                 = 1,
                .poll_exclusive         = 1,
+               .ioprio                 = 1,    /* used for flags */
        },
        [IORING_OP_ASYNC_CANCEL] = {
                .audit_skip             = 1,
@@ -1085,6 +1232,7 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_CLOSE] = {},
        [IORING_OP_FILES_UPDATE] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_STATX] = {
                .audit_skip             = 1,
@@ -1096,6 +1244,8 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_WRITE] = {
@@ -1105,6 +1255,8 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .audit_skip             = 1,
+               .ioprio                 = 1,
+               .iopoll                 = 1,
                .async_size             = sizeof(struct io_async_rw),
        },
        [IORING_OP_FADVISE] = {
@@ -1139,9 +1291,11 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_PROVIDE_BUFFERS] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_REMOVE_BUFFERS] = {
                .audit_skip             = 1,
+               .iopoll                 = 1,
        },
        [IORING_OP_TEE] = {
                .needs_file             = 1,
@@ -1159,11 +1313,30 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_LINKAT] = {},
        [IORING_OP_MSG_RING] = {
                .needs_file             = 1,
+               .iopoll                 = 1,
+       },
+       [IORING_OP_FSETXATTR] = {
+               .needs_file = 1
+       },
+       [IORING_OP_SETXATTR] = {},
+       [IORING_OP_FGETXATTR] = {
+               .needs_file = 1
+       },
+       [IORING_OP_GETXATTR] = {},
+       [IORING_OP_SOCKET] = {
+               .audit_skip             = 1,
+       },
+       [IORING_OP_URING_CMD] = {
+               .needs_file             = 1,
+               .plug                   = 1,
+               .needs_async_setup      = 1,
+               .async_size             = uring_cmd_pdu_size(1),
        },
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
 #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
 
 static bool io_disarm_next(struct io_kiocb *req);
 static void io_uring_del_tctx_node(unsigned long index);
@@ -1172,19 +1345,19 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         bool cancel_all);
 static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
 
-static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
-
-static void io_put_req(struct io_kiocb *req);
-static void io_put_req_deferred(struct io_kiocb *req);
+static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags);
 static void io_dismantle_req(struct io_kiocb *req);
 static void io_queue_linked_timeout(struct io_kiocb *req);
 static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
                                     struct io_uring_rsrc_update2 *up,
                                     unsigned nr_args);
 static void io_clean_op(struct io_kiocb *req);
-static struct file *io_file_get(struct io_ring_ctx *ctx,
-                               struct io_kiocb *req, int fd, bool fixed);
-static void __io_queue_sqe(struct io_kiocb *req);
+static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
+                                            unsigned issue_flags);
+static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
+static void io_drop_inflight_file(struct io_kiocb *req);
+static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
+static void io_queue_sqe(struct io_kiocb *req);
 static void io_rsrc_put_work(struct work_struct *work);
 
 static void io_req_task_queue(struct io_kiocb *req);
@@ -1197,11 +1370,115 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
 static void io_eventfd_signal(struct io_ring_ctx *ctx);
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
 
 static struct kmem_cache *req_cachep;
 
 static const struct file_operations io_uring_fops;
 
+const char *io_uring_get_opcode(u8 opcode)
+{
+       switch ((enum io_uring_op)opcode) {
+       case IORING_OP_NOP:
+               return "NOP";
+       case IORING_OP_READV:
+               return "READV";
+       case IORING_OP_WRITEV:
+               return "WRITEV";
+       case IORING_OP_FSYNC:
+               return "FSYNC";
+       case IORING_OP_READ_FIXED:
+               return "READ_FIXED";
+       case IORING_OP_WRITE_FIXED:
+               return "WRITE_FIXED";
+       case IORING_OP_POLL_ADD:
+               return "POLL_ADD";
+       case IORING_OP_POLL_REMOVE:
+               return "POLL_REMOVE";
+       case IORING_OP_SYNC_FILE_RANGE:
+               return "SYNC_FILE_RANGE";
+       case IORING_OP_SENDMSG:
+               return "SENDMSG";
+       case IORING_OP_RECVMSG:
+               return "RECVMSG";
+       case IORING_OP_TIMEOUT:
+               return "TIMEOUT";
+       case IORING_OP_TIMEOUT_REMOVE:
+               return "TIMEOUT_REMOVE";
+       case IORING_OP_ACCEPT:
+               return "ACCEPT";
+       case IORING_OP_ASYNC_CANCEL:
+               return "ASYNC_CANCEL";
+       case IORING_OP_LINK_TIMEOUT:
+               return "LINK_TIMEOUT";
+       case IORING_OP_CONNECT:
+               return "CONNECT";
+       case IORING_OP_FALLOCATE:
+               return "FALLOCATE";
+       case IORING_OP_OPENAT:
+               return "OPENAT";
+       case IORING_OP_CLOSE:
+               return "CLOSE";
+       case IORING_OP_FILES_UPDATE:
+               return "FILES_UPDATE";
+       case IORING_OP_STATX:
+               return "STATX";
+       case IORING_OP_READ:
+               return "READ";
+       case IORING_OP_WRITE:
+               return "WRITE";
+       case IORING_OP_FADVISE:
+               return "FADVISE";
+       case IORING_OP_MADVISE:
+               return "MADVISE";
+       case IORING_OP_SEND:
+               return "SEND";
+       case IORING_OP_RECV:
+               return "RECV";
+       case IORING_OP_OPENAT2:
+               return "OPENAT2";
+       case IORING_OP_EPOLL_CTL:
+               return "EPOLL_CTL";
+       case IORING_OP_SPLICE:
+               return "SPLICE";
+       case IORING_OP_PROVIDE_BUFFERS:
+               return "PROVIDE_BUFFERS";
+       case IORING_OP_REMOVE_BUFFERS:
+               return "REMOVE_BUFFERS";
+       case IORING_OP_TEE:
+               return "TEE";
+       case IORING_OP_SHUTDOWN:
+               return "SHUTDOWN";
+       case IORING_OP_RENAMEAT:
+               return "RENAMEAT";
+       case IORING_OP_UNLINKAT:
+               return "UNLINKAT";
+       case IORING_OP_MKDIRAT:
+               return "MKDIRAT";
+       case IORING_OP_SYMLINKAT:
+               return "SYMLINKAT";
+       case IORING_OP_LINKAT:
+               return "LINKAT";
+       case IORING_OP_MSG_RING:
+               return "MSG_RING";
+       case IORING_OP_FSETXATTR:
+               return "FSETXATTR";
+       case IORING_OP_SETXATTR:
+               return "SETXATTR";
+       case IORING_OP_FGETXATTR:
+               return "FGETXATTR";
+       case IORING_OP_GETXATTR:
+               return "GETXATTR";
+       case IORING_OP_SOCKET:
+               return "SOCKET";
+       case IORING_OP_URING_CMD:
+               return "URING_CMD";
+       case IORING_OP_LAST:
+               return "INVALID";
+       }
+       return "INVALID";
+}
+
 struct sock *io_uring_get_socket(struct file *file)
 {
 #if defined(CONFIG_UNIX)
@@ -1215,6 +1492,42 @@ struct sock *io_uring_get_socket(struct file *file)
 }
 EXPORT_SYMBOL(io_uring_get_socket);
 
+#if defined(CONFIG_UNIX)
+static inline bool io_file_need_scm(struct file *filp)
+{
+#if defined(IO_URING_SCM_ALL)
+       return true;
+#else
+       return !!unix_get_socket(filp);
+#endif
+}
+#else
+static inline bool io_file_need_scm(struct file *filp)
+{
+       return false;
+}
+#endif
+
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+       lockdep_assert_held(&ctx->uring_lock);
+       if (issue_flags & IO_URING_F_UNLOCKED)
+               mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+       /*
+        * "Normal" inline submissions always hold the uring_lock, since we
+        * grab it from the system call. Same is true for the SQPOLL offload.
+        * The only exception is when we've detached the request and issue it
+        * from an async worker thread, grab the lock for that case.
+        */
+       if (issue_flags & IO_URING_F_UNLOCKED)
+               mutex_lock(&ctx->uring_lock);
+       lockdep_assert_held(&ctx->uring_lock);
+}
+
 static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
 {
        if (!*locked) {
@@ -1276,31 +1589,36 @@ static inline void io_req_set_refcount(struct io_kiocb *req)
 
 #define IO_RSRC_REF_BATCH      100
 
+static void io_rsrc_put_node(struct io_rsrc_node *node, int nr)
+{
+       percpu_ref_put_many(&node->refs, nr);
+}
+
 static inline void io_req_put_rsrc_locked(struct io_kiocb *req,
                                          struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
-       struct percpu_ref *ref = req->fixed_rsrc_refs;
+       struct io_rsrc_node *node = req->rsrc_node;
 
-       if (ref) {
-               if (ref == &ctx->rsrc_node->refs)
+       if (node) {
+               if (node == ctx->rsrc_node)
                        ctx->rsrc_cached_refs++;
                else
-                       percpu_ref_put(ref);
+                       io_rsrc_put_node(node, 1);
        }
 }
 
-static inline void io_req_put_rsrc(struct io_kiocb *req, struct io_ring_ctx *ctx)
+static inline void io_req_put_rsrc(struct io_kiocb *req)
 {
-       if (req->fixed_rsrc_refs)
-               percpu_ref_put(req->fixed_rsrc_refs);
+       if (req->rsrc_node)
+               io_rsrc_put_node(req->rsrc_node, 1);
 }
 
 static __cold void io_rsrc_refs_drop(struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
        if (ctx->rsrc_cached_refs) {
-               percpu_ref_put_many(&ctx->rsrc_node->refs, ctx->rsrc_cached_refs);
+               io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs);
                ctx->rsrc_cached_refs = 0;
        }
 }
@@ -1313,33 +1631,42 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx)
 }
 
 static inline void io_req_set_rsrc_node(struct io_kiocb *req,
-                                       struct io_ring_ctx *ctx)
+                                       struct io_ring_ctx *ctx,
+                                       unsigned int issue_flags)
 {
-       if (!req->fixed_rsrc_refs) {
-               req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
-               ctx->rsrc_cached_refs--;
-               if (unlikely(ctx->rsrc_cached_refs < 0))
-                       io_rsrc_refs_refill(ctx);
+       if (!req->rsrc_node) {
+               req->rsrc_node = ctx->rsrc_node;
+
+               if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+                       lockdep_assert_held(&ctx->uring_lock);
+                       ctx->rsrc_cached_refs--;
+                       if (unlikely(ctx->rsrc_cached_refs < 0))
+                               io_rsrc_refs_refill(ctx);
+               } else {
+                       percpu_ref_get(&req->rsrc_node->refs);
+               }
        }
 }
 
 static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list)
 {
-       struct io_buffer *kbuf = req->kbuf;
-       unsigned int cflags;
+       if (req->flags & REQ_F_BUFFER_RING) {
+               if (req->buf_list)
+                       req->buf_list->head++;
+               req->flags &= ~REQ_F_BUFFER_RING;
+       } else {
+               list_add(&req->kbuf->list, list);
+               req->flags &= ~REQ_F_BUFFER_SELECTED;
+       }
 
-       cflags = IORING_CQE_F_BUFFER | (kbuf->bid << IORING_CQE_BUFFER_SHIFT);
-       req->flags &= ~REQ_F_BUFFER_SELECTED;
-       list_add(&kbuf->list, list);
-       req->kbuf = NULL;
-       return cflags;
+       return IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
 }
 
 static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
 {
        lockdep_assert_held(&req->ctx->completion_lock);
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return 0;
        return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
 }
@@ -1349,7 +1676,7 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 {
        unsigned int cflags;
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return 0;
 
        /*
@@ -1364,7 +1691,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
         * We migrate buffers from the comp_list to the issue cache list
         * when we need one.
         */
-       if (issue_flags & IO_URING_F_UNLOCKED) {
+       if (req->flags & REQ_F_BUFFER_RING) {
+               /* no buffers to recycle for this case */
+               cflags = __io_put_kbuf(req, NULL);
+       } else if (issue_flags & IO_URING_F_UNLOCKED) {
                struct io_ring_ctx *ctx = req->ctx;
 
                spin_lock(&ctx->completion_lock);
@@ -1382,15 +1712,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
                                                 unsigned int bgid)
 {
-       struct list_head *hash_list;
-       struct io_buffer_list *bl;
-
-       hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-       list_for_each_entry(bl, hash_list, list)
-               if (bl->bgid == bgid || bgid == -1U)
-                       return bl;
+       if (ctx->io_bl && bgid < BGID_ARRAY)
+               return &ctx->io_bl[bgid];
 
-       return NULL;
+       return xa_load(&ctx->io_bl_xa, bgid);
 }
 
 static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
@@ -1399,54 +1724,42 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
        struct io_buffer_list *bl;
        struct io_buffer *buf;
 
-       if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
                return;
        /* don't recycle if we already did IO to this buffer */
        if (req->flags & REQ_F_PARTIAL_IO)
                return;
+       /*
+        * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+        * the flag and hence ensure that bl->head doesn't get incremented.
+        * If the tail has already been incremented, hang on to it.
+        */
+       if (req->flags & REQ_F_BUFFER_RING) {
+               if (req->buf_list) {
+                       req->buf_index = req->buf_list->bgid;
+                       req->flags &= ~REQ_F_BUFFER_RING;
+               }
+               return;
+       }
 
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_lock(&ctx->uring_lock);
-
-       lockdep_assert_held(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
        buf = req->kbuf;
        bl = io_buffer_get_list(ctx, buf->bgid);
        list_add(&buf->list, &bl->buf_list);
        req->flags &= ~REQ_F_BUFFER_SELECTED;
-       req->kbuf = NULL;
+       req->buf_index = buf->bgid;
 
-       if (issue_flags & IO_URING_F_UNLOCKED)
-               mutex_unlock(&ctx->uring_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
 }
 
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
                          bool cancel_all)
        __must_hold(&req->ctx->timeout_lock)
 {
-       struct io_kiocb *req;
-
        if (task && head->task != task)
                return false;
-       if (cancel_all)
-               return true;
-
-       io_for_each_link(req, head) {
-               if (req->flags & REQ_F_INFLIGHT)
-                       return true;
-       }
-       return false;
-}
-
-static bool io_match_linked(struct io_kiocb *head)
-{
-       struct io_kiocb *req;
-
-       io_for_each_link(req, head) {
-               if (req->flags & REQ_F_INFLIGHT)
-                       return true;
-       }
-       return false;
+       return cancel_all;
 }
 
 /*
@@ -1456,24 +1769,9 @@ static bool io_match_linked(struct io_kiocb *head)
 static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
                               bool cancel_all)
 {
-       bool matched;
-
        if (task && head->task != task)
                return false;
-       if (cancel_all)
-               return true;
-
-       if (head->flags & REQ_F_LINK_TIMEOUT) {
-               struct io_ring_ctx *ctx = head->ctx;
-
-               /* protect against races with linked timeouts */
-               spin_lock_irq(&ctx->timeout_lock);
-               matched = io_match_linked(head);
-               spin_unlock_irq(&ctx->timeout_lock);
-       } else {
-               matched = io_match_linked(head);
-       }
-       return matched;
+       return cancel_all;
 }
 
 static inline bool req_has_async_data(struct io_kiocb *req)
@@ -1493,7 +1791,12 @@ static inline void req_set_fail(struct io_kiocb *req)
 static inline void req_fail_link_node(struct io_kiocb *req, int res)
 {
        req_set_fail(req);
-       req->result = res;
+       req->cqe.res = res;
+}
+
+static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+       wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
 }
 
 static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1530,12 +1833,14 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
        struct io_ring_ctx *ctx;
-       int i, hash_bits;
+       int hash_bits;
 
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
                return NULL;
 
+       xa_init(&ctx->io_bl_xa);
+
        /*
         * Use 5 bits less than the max cq entries, that should give us around
         * 32 entries per hash list if totally full and uniformly spread.
@@ -1557,13 +1862,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        /* set invalid range, so io_import_fixed() fails meeting it */
        ctx->dummy_ubuf->ubuf = -1UL;
 
-       ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
-                                       sizeof(struct list_head), GFP_KERNEL);
-       if (!ctx->io_buffers)
-               goto err;
-       for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
-               INIT_LIST_HEAD(&ctx->io_buffers[i]);
-
        if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
                            PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
                goto err;
@@ -1595,15 +1893,12 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        INIT_WQ_LIST(&ctx->locked_free_list);
        INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
        INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       INIT_LIST_HEAD(&ctx->napi_list);
-       spin_lock_init(&ctx->napi_lock);
-#endif
        return ctx;
 err:
        kfree(ctx->dummy_ubuf);
        kfree(ctx->cancel_hash);
-       kfree(ctx->io_buffers);
+       kfree(ctx->io_bl);
+       xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
        return NULL;
 }
@@ -1627,23 +1922,11 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
        return false;
 }
 
-#define FFS_NOWAIT             0x1UL
-#define FFS_ISREG              0x2UL
-#define FFS_MASK               ~(FFS_NOWAIT|FFS_ISREG)
-
 static inline bool io_req_ffs_set(struct io_kiocb *req)
 {
        return req->flags & REQ_F_FIXED_FILE;
 }
 
-static inline void io_req_track_inflight(struct io_kiocb *req)
-{
-       if (!(req->flags & REQ_F_INFLIGHT)) {
-               req->flags |= REQ_F_INFLIGHT;
-               atomic_inc(&current->io_uring->inflight_tracked);
-       }
-}
-
 static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
 {
        if (WARN_ON_ONCE(!req->link))
@@ -1665,6 +1948,17 @@ static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
        return __io_prep_linked_timeout(req);
 }
 
+static noinline void __io_arm_ltimeout(struct io_kiocb *req)
+{
+       io_queue_linked_timeout(__io_prep_linked_timeout(req));
+}
+
+static inline void io_arm_ltimeout(struct io_kiocb *req)
+{
+       if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
+               __io_arm_ltimeout(req);
+}
+
 static void io_prep_async_work(struct io_kiocb *req)
 {
        const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1677,6 +1971,7 @@ static void io_prep_async_work(struct io_kiocb *req)
 
        req->work.list.next = NULL;
        req->work.flags = 0;
+       req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
        if (req->flags & REQ_F_FORCE_ASYNC)
                req->work.flags |= IO_WQ_WORK_CONCURRENT;
 
@@ -1687,14 +1982,6 @@ static void io_prep_async_work(struct io_kiocb *req)
                if (def->unbound_nonreg_file)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
-
-       switch (req->opcode) {
-       case IORING_OP_SPLICE:
-       case IORING_OP_TEE:
-               if (!S_ISREG(file_inode(req->splice.file_in)->i_mode))
-                       req->work.flags |= IO_WQ_WORK_UNBOUND;
-               break;
-       }
 }
 
 static void io_prep_async_link(struct io_kiocb *req)
@@ -1716,17 +2003,15 @@ static void io_prep_async_link(struct io_kiocb *req)
 
 static inline void io_req_add_compl_list(struct io_kiocb *req)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_submit_state *state = &ctx->submit_state;
+       struct io_submit_state *state = &req->ctx->submit_state;
 
        if (!(req->flags & REQ_F_CQE_SKIP))
-               ctx->submit_state.flush_cqes = true;
+               state->flush_cqes = true;
        wq_list_add_tail(&req->comp_list, &state->compl_reqs);
 }
 
-static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
+static void io_queue_iowq(struct io_kiocb *req, bool *dont_use)
 {
-       struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *link = io_prep_linked_timeout(req);
        struct io_uring_task *tctx = req->task->io_uring;
 
@@ -1746,8 +2031,9 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
        if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
                req->work.flags |= IO_WQ_WORK_CANCEL;
 
-       trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
-                                       &req->work, io_wq_is_hashed(&req->work));
+       trace_io_uring_queue_async_work(req->ctx, req, req->cqe.user_data,
+                                       req->opcode, req->flags, &req->work,
+                                       io_wq_is_hashed(&req->work));
        io_wq_enqueue(tctx->io_wq, &req->work);
        if (link)
                io_queue_linked_timeout(link);
@@ -1765,8 +2051,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
                atomic_set(&req->ctx->cq_timeouts,
                        atomic_read(&req->ctx->cq_timeouts) + 1);
                list_del_init(&req->timeout.list);
-               io_fill_cqe_req(req, status, 0);
-               io_put_req_deferred(req);
+               io_req_tw_post_queue(req, status, 0);
        }
 }
 
@@ -1788,12 +2073,11 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
        __must_hold(&ctx->completion_lock)
 {
        u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+       struct io_kiocb *req, *tmp;
 
        spin_lock_irq(&ctx->timeout_lock);
-       while (!list_empty(&ctx->timeout_list)) {
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
                u32 events_needed, events_got;
-               struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
-                                               struct io_kiocb, timeout.list);
 
                if (io_is_timeout_noseq(req))
                        break;
@@ -1810,7 +2094,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
                if (events_got < events_needed)
                        break;
 
-               list_del_init(&req->timeout.list);
                io_kill_timeout(req, 0);
        }
        ctx->cq_last_tm_flush = seq;
@@ -1850,29 +2133,61 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
        return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
 }
 
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+/*
+ * writes to the cq entry need to come after reading head; the
+ * control dependency is enough as we're using WRITE_ONCE to
+ * fill the cq entry
+ */
+static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
 {
        struct io_rings *rings = ctx->rings;
-       unsigned tail, mask = ctx->cq_entries - 1;
-
-       /*
-        * writes to the cq entry need to come after reading head; the
-        * control dependency is enough as we're using WRITE_ONCE to
-        * fill the cq entry
-        */
-       if (__io_cqring_events(ctx) == ctx->cq_entries)
+       unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
+       unsigned int shift = 0;
+       unsigned int free, queued, len;
+
+       if (ctx->flags & IORING_SETUP_CQE32)
+               shift = 1;
+
+       /* userspace may cheat modifying the tail, be safe and do min */
+       queued = min(__io_cqring_events(ctx), ctx->cq_entries);
+       free = ctx->cq_entries - queued;
+       /* we need a contiguous range, limit based on the current array offset */
+       len = min(free, ctx->cq_entries - off);
+       if (!len)
                return NULL;
 
-       tail = ctx->cached_cq_tail++;
-       return &rings->cqes[tail & mask];
+       ctx->cached_cq_tail++;
+       ctx->cqe_cached = &rings->cqes[off];
+       ctx->cqe_sentinel = ctx->cqe_cached + len;
+       ctx->cqe_cached++;
+       return &rings->cqes[off << shift];
 }
 
-static void io_eventfd_signal(struct io_ring_ctx *ctx)
+static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
 {
-       struct io_ev_fd *ev_fd;
+       if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
+               struct io_uring_cqe *cqe = ctx->cqe_cached;
 
-       rcu_read_lock();
-       /*
+               if (ctx->flags & IORING_SETUP_CQE32) {
+                       unsigned int off = ctx->cqe_cached - ctx->rings->cqes;
+
+                       cqe += off;
+               }
+
+               ctx->cached_cq_tail++;
+               ctx->cqe_cached++;
+               return cqe;
+       }
+
+       return __io_get_cqe(ctx);
+}
+
+static void io_eventfd_signal(struct io_ring_ctx *ctx)
+{
+       struct io_ev_fd *ev_fd;
+
+       rcu_read_lock();
+       /*
         * rcu_dereference ctx->io_ev_fd once and use it for both for checking
         * and eventfd_signal
         */
@@ -1935,10 +2250,14 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
        bool all_flushed, posted;
+       size_t cqe_size = sizeof(struct io_uring_cqe);
 
        if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
                return false;
 
+       if (ctx->flags & IORING_SETUP_CQE32)
+               cqe_size <<= 1;
+
        posted = false;
        spin_lock(&ctx->completion_lock);
        while (!list_empty(&ctx->cq_overflow_list)) {
@@ -1950,7 +2269,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
                ocqe = list_first_entry(&ctx->cq_overflow_list,
                                        struct io_overflow_cqe, list);
                if (cqe)
-                       memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+                       memcpy(cqe, &ocqe->cqe, cqe_size);
                else
                        io_account_cq_overflow(ctx);
 
@@ -1961,13 +2280,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 
        all_flushed = list_empty(&ctx->cq_overflow_list);
        if (all_flushed) {
-               clear_bit(0, &ctx->check_cq_overflow);
-               WRITE_ONCE(ctx->rings->sq_flags,
-                          ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
+               clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+               atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
        }
 
-       if (posted)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (posted)
                io_cqring_ev_posted(ctx);
@@ -1978,7 +2295,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
 {
        bool ret = true;
 
-       if (test_bit(0, &ctx->check_cq_overflow)) {
+       if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
                /* iopoll syncs against uring_lock, not completion_lock */
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_lock(&ctx->uring_lock);
@@ -1990,19 +2307,23 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
        return ret;
 }
 
-/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
+static void __io_put_task(struct task_struct *task, int nr)
 {
        struct io_uring_task *tctx = task->io_uring;
 
-       if (likely(task == current)) {
-               tctx->cached_refs += nr;
-       } else {
-               percpu_counter_sub(&tctx->inflight, nr);
-               if (unlikely(atomic_read(&tctx->in_idle)))
-                       wake_up(&tctx->wait);
-               put_task_struct_many(task, nr);
-       }
+       percpu_counter_sub(&tctx->inflight, nr);
+       if (unlikely(atomic_read(&tctx->in_idle)))
+               wake_up(&tctx->wait);
+       put_task_struct_many(task, nr);
+}
+
+/* must to be called somewhat shortly after putting a request */
+static inline void io_put_task(struct task_struct *task, int nr)
+{
+       if (likely(task == current))
+               task->io_uring->cached_refs += nr;
+       else
+               __io_put_task(task, nr);
 }
 
 static void io_task_refs_refill(struct io_uring_task *tctx)
@@ -2036,11 +2357,18 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
 }
 
 static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
-                                    s32 res, u32 cflags)
+                                    s32 res, u32 cflags, u64 extra1,
+                                    u64 extra2)
 {
        struct io_overflow_cqe *ocqe;
+       size_t ocq_size = sizeof(struct io_overflow_cqe);
+       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+
+       if (is_cqe32)
+               ocq_size += sizeof(struct io_uring_cqe);
 
-       ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+       ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
+       trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
        if (!ocqe) {
                /*
                 * If we're in ring overflow flush mode, or in task cancel mode,
@@ -2048,17 +2376,21 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
                 * on the floor.
                 */
                io_account_cq_overflow(ctx);
+               set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
                return false;
        }
        if (list_empty(&ctx->cq_overflow_list)) {
-               set_bit(0, &ctx->check_cq_overflow);
-               WRITE_ONCE(ctx->rings->sq_flags,
-                          ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+               set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+               atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
 
        }
        ocqe->cqe.user_data = user_data;
        ocqe->cqe.res = res;
        ocqe->cqe.flags = cflags;
+       if (is_cqe32) {
+               ocqe->cqe.big_cqe[0] = extra1;
+               ocqe->cqe.big_cqe[1] = extra2;
+       }
        list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
        return true;
 }
@@ -2080,42 +2412,114 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
                WRITE_ONCE(cqe->flags, cflags);
                return true;
        }
-       return io_cqring_event_overflow(ctx, user_data, res, cflags);
+       return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+}
+
+static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
+                                           struct io_kiocb *req)
+{
+       struct io_uring_cqe *cqe;
+
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                               req->cqe.res, req->cqe.flags, 0, 0);
+
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               memcpy(cqe, &req->cqe, sizeof(*cqe));
+               return true;
+       }
+       return io_cqring_event_overflow(ctx, req->cqe.user_data,
+                                       req->cqe.res, req->cqe.flags, 0, 0);
+}
+
+static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
+                                             struct io_kiocb *req)
+{
+       struct io_uring_cqe *cqe;
+       u64 extra1 = req->extra1;
+       u64 extra2 = req->extra2;
+
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+                               req->cqe.res, req->cqe.flags, extra1, extra2);
+
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+               cqe->big_cqe[0] = extra1;
+               cqe->big_cqe[1] = extra2;
+               return true;
+       }
+
+       return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
+                                       req->cqe.flags, extra1, extra2);
 }
 
 static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
-       trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
-       return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+       trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
+       return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
 }
 
-static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
+static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
+                               u64 extra1, u64 extra2)
 {
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(req, res, cflags);
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_uring_cqe *cqe;
+
+       if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
+               return;
+       if (req->flags & REQ_F_CQE_SKIP)
+               return;
+
+       trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
+                               extra1, extra2);
+
+       /*
+        * If we can't get a cq entry, userspace overflowed the
+        * submission (by quite a lot). Increment the overflow count in
+        * the ring.
+        */
+       cqe = io_get_cqe(ctx);
+       if (likely(cqe)) {
+               WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+               WRITE_ONCE(cqe->res, res);
+               WRITE_ONCE(cqe->flags, cflags);
+               WRITE_ONCE(cqe->big_cqe[0], extra1);
+               WRITE_ONCE(cqe->big_cqe[1], extra2);
+               return;
+       }
+
+       io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
 }
 
 static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
                                     s32 res, u32 cflags)
 {
        ctx->cq_extra++;
-       trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
+       trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
        return __io_fill_cqe(ctx, user_data, res, cflags);
 }
 
-static void __io_req_complete_post(struct io_kiocb *req, s32 res,
-                                  u32 cflags)
+static void __io_req_complete_put(struct io_kiocb *req)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       if (!(req->flags & REQ_F_CQE_SKIP))
-               __io_fill_cqe_req(req, res, cflags);
        /*
         * If we're the last reference to this request, add to our locked
         * free_list cache.
         */
        if (req_ref_put_and_test(req)) {
-               if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+               struct io_ring_ctx *ctx = req->ctx;
+
+               if (req->flags & IO_REQ_LINK_FLAGS) {
                        if (req->flags & IO_DISARM_MASK)
                                io_disarm_next(req);
                        if (req->link) {
@@ -2123,7 +2527,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
                                req->link = NULL;
                        }
                }
-               io_req_put_rsrc(req, ctx);
+               io_req_put_rsrc(req);
                /*
                 * Selected buffer deallocation in io_clean_op() assumes that
                 * we don't hold ->completion_lock. Clean them here to avoid
@@ -2137,8 +2541,23 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
        }
 }
 
-static void io_req_complete_post(struct io_kiocb *req, s32 res,
-                                u32 cflags)
+static void __io_req_complete_post(struct io_kiocb *req, s32 res,
+                                  u32 cflags)
+{
+       if (!(req->flags & REQ_F_CQE_SKIP))
+               __io_fill_cqe_req(req, res, cflags);
+       __io_req_complete_put(req);
+}
+
+static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
+                                  u32 cflags, u64 extra1, u64 extra2)
+{
+       if (!(req->flags & REQ_F_CQE_SKIP))
+               __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+       __io_req_complete_put(req);
+}
+
+static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
@@ -2149,11 +2568,23 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res,
        io_cqring_ev_posted(ctx);
 }
 
+static void io_req_complete_post32(struct io_kiocb *req, s32 res,
+                                  u32 cflags, u64 extra1, u64 extra2)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+
+       spin_lock(&ctx->completion_lock);
+       __io_req_complete_post32(req, res, cflags, extra1, extra2);
+       io_commit_cqring(ctx);
+       spin_unlock(&ctx->completion_lock);
+       io_cqring_ev_posted(ctx);
+}
+
 static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
                                         u32 cflags)
 {
-       req->result = res;
-       req->cflags = cflags;
+       req->cqe.res = res;
+       req->cqe.flags = cflags;
        req->flags |= REQ_F_COMPLETE_INLINE;
 }
 
@@ -2166,8 +2597,23 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
                io_req_complete_post(req, res, cflags);
 }
 
+static inline void __io_req_complete32(struct io_kiocb *req,
+                                      unsigned int issue_flags, s32 res,
+                                      u32 cflags, u64 extra1, u64 extra2)
+{
+       if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
+               io_req_complete_state(req, res, cflags);
+               req->extra1 = extra1;
+               req->extra2 = extra2;
+       } else {
+               io_req_complete_post32(req, res, cflags, extra1, extra2);
+       }
+}
+
 static inline void io_req_complete(struct io_kiocb *req, s32 res)
 {
+       if (res < 0)
+               req_set_fail(req);
        __io_req_complete(req, 0, res, 0);
 }
 
@@ -2177,17 +2623,6 @@ static void io_req_complete_failed(struct io_kiocb *req, s32 res)
        io_req_complete_post(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
 }
 
-static void io_req_complete_fail_submit(struct io_kiocb *req)
-{
-       /*
-        * We don't submit, fail them all, for that replace hardlinks with
-        * normal links. Extra REQ_F_LINK is tolerated.
-        */
-       req->flags &= ~REQ_F_HARDLINK;
-       req->flags |= REQ_F_LINK;
-       io_req_complete_failed(req, req->result);
-}
-
 /*
  * Don't initialise the fields below on every allocation, but do that in
  * advance and keep them valid across allocations.
@@ -2198,7 +2633,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
        req->link = NULL;
        req->async_data = NULL;
        /* not necessary, but safer to zero */
-       req->result = 0;
+       req->cqe.res = 0;
 }
 
 static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
@@ -2210,19 +2645,9 @@ static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
        spin_unlock(&ctx->completion_lock);
 }
 
-/* Returns true IFF there are requests in the cache */
-static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
+static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
 {
-       struct io_submit_state *state = &ctx->submit_state;
-
-       /*
-        * If we have more than a batch's worth of requests in our IRQ side
-        * locked cache, grab the lock and move them over to our submission
-        * side cache.
-        */
-       if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
-               io_flush_cached_locked_reqs(ctx, state);
-       return !!state->free_list.next;
+       return !ctx->submit_state.free_list.next;
 }
 
 /*
@@ -2234,14 +2659,20 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
 static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
        __must_hold(&ctx->uring_lock)
 {
-       struct io_submit_state *state = &ctx->submit_state;
        gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
        void *reqs[IO_REQ_ALLOC_BATCH];
-       struct io_kiocb *req;
        int ret, i;
 
-       if (likely(state->free_list.next || io_flush_cached_reqs(ctx)))
-               return true;
+       /*
+        * If we have more than a batch's worth of requests in our IRQ side
+        * locked cache, grab the lock and move them over to our submission
+        * side cache.
+        */
+       if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) {
+               io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
+               if (!io_req_cache_empty(ctx))
+                       return true;
+       }
 
        ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs);
 
@@ -2258,17 +2689,17 @@ static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
 
        percpu_ref_get_many(&ctx->refs, ret);
        for (i = 0; i < ret; i++) {
-               req = reqs[i];
+               struct io_kiocb *req = reqs[i];
 
                io_preinit_req(req, ctx);
-               wq_stack_add_head(&req->comp_list, &state->free_list);
+               io_req_add_to_cache(req, ctx);
        }
        return true;
 }
 
 static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx)
 {
-       if (unlikely(!ctx->submit_state.free_list.next))
+       if (unlikely(io_req_cache_empty(ctx)))
                return __io_alloc_req_refill(ctx);
        return true;
 }
@@ -2297,11 +2728,11 @@ static inline void io_dismantle_req(struct io_kiocb *req)
                io_put_file(req->file);
 }
 
-static __cold void __io_free_req(struct io_kiocb *req)
+static __cold void io_free_req(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
-       io_req_put_rsrc(req, ctx);
+       io_req_put_rsrc(req);
        io_dismantle_req(req);
        io_put_task(req->task, 1);
 
@@ -2319,7 +2750,7 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
        nxt->link = NULL;
 }
 
-static bool io_kill_linked_timeout(struct io_kiocb *req)
+static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
        __must_hold(&req->ctx->completion_lock)
        __must_hold(&req->ctx->timeout_lock)
 {
@@ -2332,13 +2763,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
                link->timeout.head = NULL;
                if (hrtimer_try_to_cancel(&io->timer) != -1) {
                        list_del(&link->timeout.list);
-                       /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
-                       io_fill_cqe_req(link, -ECANCELED, 0);
-                       io_put_req_deferred(link);
-                       return true;
+                       return link;
                }
        }
-       return false;
+       return NULL;
 }
 
 static void io_fail_links(struct io_kiocb *req)
@@ -2352,19 +2780,19 @@ static void io_fail_links(struct io_kiocb *req)
                long res = -ECANCELED;
 
                if (link->flags & REQ_F_FAIL)
-                       res = link->result;
+                       res = link->cqe.res;
 
                nxt = link->link;
                link->link = NULL;
 
-               trace_io_uring_fail_link(req->ctx, req, req->user_data,
+               trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
                                        req->opcode, link);
 
-               if (!ignore_cqes) {
+               if (ignore_cqes)
+                       link->flags |= REQ_F_CQE_SKIP;
+               else
                        link->flags &= ~REQ_F_CQE_SKIP;
-                       io_fill_cqe_req(link, res, 0);
-               }
-               io_put_req_deferred(link);
+               __io_req_complete_post(link, res, 0);
                link = nxt;
        }
 }
@@ -2372,25 +2800,27 @@ static void io_fail_links(struct io_kiocb *req)
 static bool io_disarm_next(struct io_kiocb *req)
        __must_hold(&req->ctx->completion_lock)
 {
+       struct io_kiocb *link = NULL;
        bool posted = false;
 
        if (req->flags & REQ_F_ARM_LTIMEOUT) {
-               struct io_kiocb *link = req->link;
-
+               link = req->link;
                req->flags &= ~REQ_F_ARM_LTIMEOUT;
                if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
                        io_remove_next_linked(req);
-                       /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
-                       io_fill_cqe_req(link, -ECANCELED, 0);
-                       io_put_req_deferred(link);
+                       io_req_tw_post_queue(link, -ECANCELED, 0);
                        posted = true;
                }
        } else if (req->flags & REQ_F_LINK_TIMEOUT) {
                struct io_ring_ctx *ctx = req->ctx;
 
                spin_lock_irq(&ctx->timeout_lock);
-               posted = io_kill_linked_timeout(req);
+               link = io_disarm_linked_timeout(req);
                spin_unlock_irq(&ctx->timeout_lock);
+               if (link) {
+                       posted = true;
+                       io_req_tw_post_queue(link, -ECANCELED, 0);
+               }
        }
        if (unlikely((req->flags & REQ_F_FAIL) &&
                     !(req->flags & REQ_F_HARDLINK))) {
@@ -2407,8 +2837,7 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
 
        spin_lock(&ctx->completion_lock);
        posted = io_disarm_next(req);
-       if (posted)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (posted)
                io_cqring_ev_posted(ctx);
@@ -2418,8 +2847,6 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt;
 
-       if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK))))
-               return NULL;
        /*
         * If LINK is set, we have dependent requests in this chain. If we
         * didn't fail this request, queue the first one up, moving any other
@@ -2437,6 +2864,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
 {
        if (!ctx)
                return;
+       if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+               atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
        if (*locked) {
                io_submit_flush_completions(ctx);
                mutex_unlock(&ctx->uring_lock);
@@ -2480,7 +2909,7 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
                if (likely(*uring_locked))
                        req->io_task_work.func(req, uring_locked);
                else
-                       __io_req_complete_post(req, req->result,
+                       __io_req_complete_post(req, req->cqe.res,
                                                io_put_kbuf_comp(req));
                node = next;
        } while (node);
@@ -2521,15 +2950,11 @@ static void tctx_task_work(struct callback_head *cb)
        while (1) {
                struct io_wq_work_node *node1, *node2;
 
-               if (!tctx->task_list.first &&
-                   !tctx->prior_task_list.first && uring_locked)
-                       io_submit_flush_completions(ctx);
-
                spin_lock_irq(&tctx->task_lock);
-               node1 = tctx->prior_task_list.first;
+               node1 = tctx->prio_task_list.first;
                node2 = tctx->task_list.first;
                INIT_WQ_LIST(&tctx->task_list);
-               INIT_WQ_LIST(&tctx->prior_task_list);
+               INIT_WQ_LIST(&tctx->prio_task_list);
                if (!node2 && !node1)
                        tctx->task_running = false;
                spin_unlock_irq(&tctx->task_lock);
@@ -2538,10 +2963,13 @@ static void tctx_task_work(struct callback_head *cb)
 
                if (node1)
                        handle_prev_tw_list(node1, &ctx, &uring_locked);
-
                if (node2)
                        handle_tw_list(node2, &ctx, &uring_locked);
                cond_resched();
+
+               if (data_race(!tctx->task_list.first) &&
+                   data_race(!tctx->prio_task_list.first) && uring_locked)
+                       io_submit_flush_completions(ctx);
        }
 
        ctx_flush_and_put(ctx, &uring_locked);
@@ -2551,22 +2979,19 @@ static void tctx_task_work(struct callback_head *cb)
                io_uring_drop_tctx_refs(current);
 }
 
-static void io_req_task_work_add(struct io_kiocb *req, bool priority)
+static void __io_req_task_work_add(struct io_kiocb *req,
+                                  struct io_uring_task *tctx,
+                                  struct io_wq_work_list *list)
 {
-       struct task_struct *tsk = req->task;
-       struct io_uring_task *tctx = tsk->io_uring;
-       enum task_work_notify_mode notify;
+       struct io_ring_ctx *ctx = req->ctx;
        struct io_wq_work_node *node;
        unsigned long flags;
        bool running;
 
-       WARN_ON_ONCE(!tctx);
+       io_drop_inflight_file(req);
 
        spin_lock_irqsave(&tctx->task_lock, flags);
-       if (priority)
-               wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list);
-       else
-               wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+       wq_list_add_tail(&req->io_task_work.node, list);
        running = tctx->task_running;
        if (!running)
                tctx->task_running = true;
@@ -2576,22 +3001,15 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
        if (running)
                return;
 
-       /*
-        * SQPOLL kernel thread doesn't need notification, just a wakeup. For
-        * all other cases, use TWA_SIGNAL unconditionally to ensure we're
-        * processing task_work. There's no reliable way to tell if TWA_RESUME
-        * will do the job.
-        */
-       notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
-       if (likely(!task_work_add(tsk, &tctx->task_work, notify))) {
-               if (notify == TWA_NONE)
-                       wake_up_process(tsk);
+       if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+               atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+
+       if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
                return;
-       }
 
        spin_lock_irqsave(&tctx->task_lock, flags);
        tctx->task_running = false;
-       node = wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
+       node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list);
        spin_unlock_irqrestore(&tctx->task_lock, flags);
 
        while (node) {
@@ -2603,47 +3021,73 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
        }
 }
 
-static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+static void io_req_task_work_add(struct io_kiocb *req)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       struct io_uring_task *tctx = req->task->io_uring;
+
+       __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
+
+static void io_req_task_prio_work_add(struct io_kiocb *req)
+{
+       struct io_uring_task *tctx = req->task->io_uring;
+
+       if (req->ctx->flags & IORING_SETUP_SQPOLL)
+               __io_req_task_work_add(req, tctx, &tctx->prio_task_list);
+       else
+               __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
+
+static void io_req_tw_post(struct io_kiocb *req, bool *locked)
+{
+       io_req_complete_post(req, req->cqe.res, req->cqe.flags);
+}
+
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags)
+{
+       req->cqe.res = res;
+       req->cqe.flags = cflags;
+       req->io_task_work.func = io_req_tw_post;
+       io_req_task_work_add(req);
+}
 
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+{
        /* not needed for normal modes, but SQPOLL depends on it */
-       io_tw_lock(ctx, locked);
-       io_req_complete_failed(req, req->result);
+       io_tw_lock(req->ctx, locked);
+       io_req_complete_failed(req, req->cqe.res);
 }
 
 static void io_req_task_submit(struct io_kiocb *req, bool *locked)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       io_tw_lock(ctx, locked);
+       io_tw_lock(req->ctx, locked);
        /* req->task == current here, checking PF_EXITING is safe */
        if (likely(!(req->task->flags & PF_EXITING)))
-               __io_queue_sqe(req);
+               io_queue_sqe(req);
        else
                io_req_complete_failed(req, -EFAULT);
 }
 
 static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
 {
-       req->result = ret;
+       req->cqe.res = ret;
        req->io_task_work.func = io_req_task_cancel;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
 }
 
 static void io_req_task_queue(struct io_kiocb *req)
 {
        req->io_task_work.func = io_req_task_submit;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
 }
 
 static void io_req_task_queue_reissue(struct io_kiocb *req)
 {
-       req->io_task_work.func = io_queue_async_work;
-       io_req_task_work_add(req, false);
+       req->io_task_work.func = io_queue_iowq;
+       io_req_task_work_add(req);
 }
 
-static inline void io_queue_next(struct io_kiocb *req)
+static void io_queue_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt = io_req_find_next(req);
 
@@ -2651,17 +3095,6 @@ static inline void io_queue_next(struct io_kiocb *req)
                io_req_task_queue(nxt);
 }
 
-static void io_free_req(struct io_kiocb *req)
-{
-       io_queue_next(req);
-       __io_free_req(req);
-}
-
-static void io_free_req_work(struct io_kiocb *req, bool *locked)
-{
-       io_free_req(req);
-}
-
 static void io_free_batch_list(struct io_ring_ctx *ctx,
                                struct io_wq_work_node *node)
        __must_hold(&ctx->uring_lock)
@@ -2673,15 +3106,30 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
                struct io_kiocb *req = container_of(node, struct io_kiocb,
                                                    comp_list);
 
-               if (unlikely(req->flags & REQ_F_REFCOUNT)) {
-                       node = req->comp_list.next;
-                       if (!req_ref_put_and_test(req))
-                               continue;
+               if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) {
+                       if (req->flags & REQ_F_REFCOUNT) {
+                               node = req->comp_list.next;
+                               if (!req_ref_put_and_test(req))
+                                       continue;
+                       }
+                       if ((req->flags & REQ_F_POLLED) && req->apoll) {
+                               struct async_poll *apoll = req->apoll;
+
+                               if (apoll->double_poll)
+                                       kfree(apoll->double_poll);
+                               list_add(&apoll->poll.wait.entry,
+                                               &ctx->apoll_cache);
+                               req->flags &= ~REQ_F_POLLED;
+                       }
+                       if (req->flags & IO_REQ_LINK_FLAGS)
+                               io_queue_next(req);
+                       if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS))
+                               io_clean_op(req);
                }
+               if (!(req->flags & REQ_F_FIXED_FILE))
+                       io_put_file(req->file);
 
                io_req_put_rsrc_locked(req, ctx);
-               io_queue_next(req);
-               io_dismantle_req(req);
 
                if (req->task != task) {
                        if (task)
@@ -2691,7 +3139,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
                }
                task_refs++;
                node = req->comp_list.next;
-               wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+               io_req_add_to_cache(req, ctx);
        } while (node);
 
        if (task)
@@ -2710,16 +3158,11 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
                        struct io_kiocb *req = container_of(node, struct io_kiocb,
                                                    comp_list);
 
-                       if (!(req->flags & REQ_F_CQE_SKIP))
-                               __io_fill_cqe_req(req, req->result, req->cflags);
-                       if ((req->flags & REQ_F_POLLED) && req->apoll) {
-                               struct async_poll *apoll = req->apoll;
-
-                               if (apoll->double_poll)
-                                       kfree(apoll->double_poll);
-                               list_add(&apoll->poll.wait.entry,
-                                               &ctx->apoll_cache);
-                               req->flags &= ~REQ_F_POLLED;
+                       if (!(req->flags & REQ_F_CQE_SKIP)) {
+                               if (!(ctx->flags & IORING_SETUP_CQE32))
+                                       __io_fill_cqe_req_filled(ctx, req);
+                               else
+                                       __io_fill_cqe32_req_filled(ctx, req);
                        }
                }
 
@@ -2742,23 +3185,18 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
        struct io_kiocb *nxt = NULL;
 
        if (req_ref_put_and_test(req)) {
-               nxt = io_req_find_next(req);
-               __io_free_req(req);
+               if (unlikely(req->flags & IO_REQ_LINK_FLAGS))
+                       nxt = io_req_find_next(req);
+               io_free_req(req);
        }
        return nxt;
 }
 
 static inline void io_put_req(struct io_kiocb *req)
-{
-       if (req_ref_put_and_test(req))
-               io_free_req(req);
-}
-
-static inline void io_put_req_deferred(struct io_kiocb *req)
 {
        if (req_ref_put_and_test(req)) {
-               req->io_task_work.func = io_free_req_work;
-               io_req_task_work_add(req, false);
+               io_queue_next(req);
+               io_free_req(req);
        }
 }
 
@@ -2841,11 +3279,10 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
                /* order with io_complete_rw_iopoll(), e.g. ->result updates */
                if (!smp_load_acquire(&req->iopoll_completed))
                        break;
+               nr_events++;
                if (unlikely(req->flags & REQ_F_CQE_SKIP))
                        continue;
-
-               __io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
-               nr_events++;
+               __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
        }
 
        if (unlikely(!nr_events))
@@ -2891,22 +3328,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 {
        unsigned int nr_events = 0;
        int ret = 0;
+       unsigned long check_cq;
 
-       /*
-        * We disallow the app entering submit/complete with polling, but we
-        * still need to lock the ring to prevent racing with polled issue
-        * that got punted to a workqueue.
-        */
-       mutex_lock(&ctx->uring_lock);
        /*
         * Don't enter poll loop if we already have events pending.
         * If we do, we can potentially be spinning for commands that
         * already triggered a CQE (eg in error).
         */
-       if (test_bit(0, &ctx->check_cq_overflow))
+       check_cq = READ_ONCE(ctx->check_cq);
+       if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
                __io_cqring_overflow_flush(ctx, false);
        if (io_cqring_events(ctx))
-               goto out;
+               return 0;
+
+       /*
+        * Similarly do not spin if we have not informed the user of any
+        * dropped CQE.
+        */
+       if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+               return -EBADR;
+
        do {
                /*
                 * If a submit got punted to a workqueue, we can have the
@@ -2936,8 +3377,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                nr_events += ret;
                ret = 0;
        } while (nr_events < min && !need_resched());
-out:
-       mutex_unlock(&ctx->uring_lock);
+
        return ret;
 }
 
@@ -3010,21 +3450,21 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
        } else {
                fsnotify_access(req->file);
        }
-       if (unlikely(res != req->result)) {
+       if (unlikely(res != req->cqe.res)) {
                if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
                    io_rw_should_reissue(req)) {
                        req->flags |= REQ_F_REISSUE;
                        return true;
                }
                req_set_fail(req);
-               req->result = res;
+               req->cqe.res = res;
        }
        return false;
 }
 
 static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
 {
-       int res = req->result;
+       int res = req->cqe.res;
 
        if (*locked) {
                io_req_complete_state(req, res, io_put_kbuf(req, 0));
@@ -3040,7 +3480,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
 {
        if (__io_complete_rw_common(req, res))
                return;
-       __io_req_complete(req, issue_flags, req->result,
+       __io_req_complete(req, issue_flags, req->cqe.res,
                                io_put_kbuf(req, issue_flags));
 }
 
@@ -3050,9 +3490,9 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
 
        if (__io_complete_rw_common(req, res))
                return;
-       req->result = res;
+       req->cqe.res = res;
        req->io_task_work.func = io_req_task_complete;
-       io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
+       io_req_task_prio_work_add(req);
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
@@ -3061,12 +3501,12 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
 
        if (kiocb->ki_flags & IOCB_WRITE)
                kiocb_end_write(req);
-       if (unlikely(res != req->result)) {
+       if (unlikely(res != req->cqe.res)) {
                if (res == -EAGAIN && io_rw_should_reissue(req)) {
                        req->flags |= REQ_F_REISSUE;
                        return;
                }
-               req->result = res;
+               req->cqe.res = res;
        }
 
        /* order with io_iopoll_complete() checking ->iopoll_completed */
@@ -3176,6 +3616,8 @@ static unsigned int io_file_get_flags(struct file *file)
                res |= FFS_ISREG;
        if (__io_file_supports_nowait(file, mode))
                res |= FFS_NOWAIT;
+       if (io_file_need_scm(file))
+               res |= FFS_SCM;
        return res;
 }
 
@@ -3186,48 +3628,17 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req)
 
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
        struct kiocb *kiocb = &req->rw.kiocb;
-       struct file *file = req->file;
        unsigned ioprio;
        int ret;
 
-       if (!io_req_ffs_set(req))
-               req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
-
        kiocb->ki_pos = READ_ONCE(sqe->off);
-       kiocb->ki_flags = iocb_flags(file);
-       ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
-       if (unlikely(ret))
-               return ret;
 
-       /*
-        * If the file is marked O_NONBLOCK, still allow retry for it if it
-        * supports async. Otherwise it's impossible to use O_NONBLOCK files
-        * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
-        */
-       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
-           ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
-               req->flags |= REQ_F_NOWAIT;
-
-       if (ctx->flags & IORING_SETUP_IOPOLL) {
-               if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
-                       return -EOPNOTSUPP;
-
-               kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
-               kiocb->ki_complete = io_complete_rw_iopoll;
-               req->iopoll_completed = 0;
-       } else {
-               if (kiocb->ki_flags & IOCB_HIPRI)
-                       return -EINVAL;
-               kiocb->ki_complete = io_complete_rw;
-       }
-
-       ioprio = READ_ONCE(sqe->ioprio);
-       if (ioprio) {
-               ret = ioprio_check_cap(ioprio);
-               if (ret)
-                       return ret;
+       ioprio = READ_ONCE(sqe->ioprio);
+       if (ioprio) {
+               ret = ioprio_check_cap(ioprio);
+               if (ret)
+                       return ret;
 
                kiocb->ki_ioprio = ioprio;
        } else {
@@ -3237,6 +3648,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        req->imu = NULL;
        req->rw.addr = READ_ONCE(sqe->addr);
        req->rw.len = READ_ONCE(sqe->len);
+       req->rw.flags = READ_ONCE(sqe->rw_flags);
+       /* used for fixed read/write too - just read unconditionally */
        req->buf_index = READ_ONCE(sqe->buf_index);
        return 0;
 }
@@ -3265,19 +3678,18 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 {
        struct kiocb *kiocb = &req->rw.kiocb;
-       bool is_stream = req->file->f_mode & FMODE_STREAM;
 
-       if (kiocb->ki_pos == -1) {
-               if (!is_stream) {
-                       req->flags |= REQ_F_CUR_POS;
-                       kiocb->ki_pos = req->file->f_pos;
-                       return &kiocb->ki_pos;
-               } else {
-                       kiocb->ki_pos = 0;
-                       return NULL;
-               }
+       if (kiocb->ki_pos != -1)
+               return &kiocb->ki_pos;
+
+       if (!(req->file->f_mode & FMODE_STREAM)) {
+               req->flags |= REQ_F_CUR_POS;
+               kiocb->ki_pos = req->file->f_pos;
+               return &kiocb->ki_pos;
        }
-       return is_stream ? NULL : &kiocb->ki_pos;
+
+       kiocb->ki_pos = 0;
+       return NULL;
 }
 
 static void kiocb_done(struct io_kiocb *req, ssize_t ret,
@@ -3367,7 +3779,8 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
        return 0;
 }
 
-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
+static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
+                          unsigned int issue_flags)
 {
        struct io_mapped_ubuf *imu = req->imu;
        u16 index, buf_index = req->buf_index;
@@ -3377,7 +3790,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
 
                if (unlikely(buf_index >= ctx->nr_user_bufs))
                        return -EFAULT;
-               io_req_set_rsrc_node(req, ctx);
+               io_req_set_rsrc_node(req, ctx, issue_flags);
                index = array_index_nospec(buf_index, ctx->nr_user_bufs);
                imu = READ_ONCE(ctx->user_bufs[index]);
                req->imu = imu;
@@ -3385,77 +3798,96 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
        return __io_import_fixed(req, rw, iter, imu);
 }
 
-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
-{
-       if (needs_lock)
-               mutex_unlock(&ctx->uring_lock);
-}
-
-static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
-{
-       /*
-        * "Normal" inline submissions always hold the uring_lock, since we
-        * grab it from the system call. Same is true for the SQPOLL offload.
-        * The only exception is when we've detached the request and issue it
-        * from an async worker thread, grab the lock for that case.
-        */
-       if (needs_lock)
-               mutex_lock(&ctx->uring_lock);
-}
-
-static void io_buffer_add_list(struct io_ring_ctx *ctx,
-                              struct io_buffer_list *bl, unsigned int bgid)
+static int io_buffer_add_list(struct io_ring_ctx *ctx,
+                             struct io_buffer_list *bl, unsigned int bgid)
 {
-       struct list_head *list;
-
-       list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
-       INIT_LIST_HEAD(&bl->buf_list);
        bl->bgid = bgid;
-       list_add(&bl->list, list);
+       if (bgid < BGID_ARRAY)
+               return 0;
+
+       return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
 }
 
-static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
-                                         int bgid, unsigned int issue_flags)
+static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+                                             struct io_buffer_list *bl)
 {
-       struct io_buffer *kbuf = req->kbuf;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_buffer_list *bl;
-
-       if (req->flags & REQ_F_BUFFER_SELECTED)
-               return kbuf;
+       if (!list_empty(&bl->buf_list)) {
+               struct io_buffer *kbuf;
 
-       io_ring_submit_lock(ctx, needs_lock);
-
-       lockdep_assert_held(&ctx->uring_lock);
-
-       bl = io_buffer_get_list(ctx, bgid);
-       if (bl && !list_empty(&bl->buf_list)) {
                kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
                list_del(&kbuf->list);
                if (*len > kbuf->len)
                        *len = kbuf->len;
                req->flags |= REQ_F_BUFFER_SELECTED;
                req->kbuf = kbuf;
+               req->buf_index = kbuf->bid;
+               return u64_to_user_ptr(kbuf->addr);
+       }
+       return NULL;
+}
+
+static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
+                                         struct io_buffer_list *bl,
+                                         unsigned int issue_flags)
+{
+       struct io_uring_buf_ring *br = bl->buf_ring;
+       struct io_uring_buf *buf;
+       __u32 head = bl->head;
+
+       if (unlikely(smp_load_acquire(&br->tail) == head)) {
+               io_ring_submit_unlock(req->ctx, issue_flags);
+               return NULL;
+       }
+
+       head &= bl->mask;
+       if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
+               buf = &br->bufs[head];
        } else {
-               kbuf = ERR_PTR(-ENOBUFS);
+               int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
+               int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+               buf = page_address(bl->buf_pages[index]);
+               buf += off;
        }
+       if (*len > buf->len)
+               *len = buf->len;
+       req->flags |= REQ_F_BUFFER_RING;
+       req->buf_list = bl;
+       req->buf_index = buf->bid;
 
-       io_ring_submit_unlock(req->ctx, needs_lock);
-       return kbuf;
+       if (issue_flags & IO_URING_F_UNLOCKED) {
+               /*
+                * If we came in unlocked, we have no choice but to consume the
+                * buffer here. This does mean it'll be pinned until the IO
+                * completes. But coming in unlocked means we're in io-wq
+                * context, hence there should be no further retry. For the
+                * locked case, the caller must ensure to call the commit when
+                * the transfer completes (or if we get -EAGAIN and must poll
+                * or retry).
+                */
+               req->buf_list = NULL;
+               bl->head++;
+       }
+       return u64_to_user_ptr(buf->addr);
 }
 
-static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
-                                       unsigned int issue_flags)
+static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+                                    unsigned int issue_flags)
 {
-       struct io_buffer *kbuf;
-       u16 bgid;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_buffer_list *bl;
+       void __user *ret = NULL;
 
-       bgid = req->buf_index;
-       kbuf = io_buffer_select(req, len, bgid, issue_flags);
-       if (IS_ERR(kbuf))
-               return kbuf;
-       return u64_to_user_ptr(kbuf->addr);
+       io_ring_submit_lock(req->ctx, issue_flags);
+
+       bl = io_buffer_get_list(ctx, req->buf_index);
+       if (likely(bl)) {
+               if (bl->buf_nr_pages)
+                       ret = io_ring_buffer_select(req, len, bl, issue_flags);
+               else
+                       ret = io_provided_buffer_select(req, len, bl);
+       }
+       io_ring_submit_unlock(req->ctx, issue_flags);
+       return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -3465,7 +3897,7 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
        struct compat_iovec __user *uiov;
        compat_ssize_t clen;
        void __user *buf;
-       ssize_t len;
+       size_t len;
 
        uiov = u64_to_user_ptr(req->rw.addr);
        if (!access_ok(uiov, sizeof(*uiov)))
@@ -3476,11 +3908,12 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
                return -EINVAL;
 
        len = clen;
-       buf = io_rw_buffer_select(req, &len, issue_flags);
-       if (IS_ERR(buf))
-               return PTR_ERR(buf);
+       buf = io_buffer_select(req, &len, issue_flags);
+       if (!buf)
+               return -ENOBUFS;
+       req->rw.addr = (unsigned long) buf;
        iov[0].iov_base = buf;
-       iov[0].iov_len = (compat_size_t) len;
+       req->rw.len = iov[0].iov_len = (compat_size_t) len;
        return 0;
 }
 #endif
@@ -3498,22 +3931,21 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
        len = iov[0].iov_len;
        if (len < 0)
                return -EINVAL;
-       buf = io_rw_buffer_select(req, &len, issue_flags);
-       if (IS_ERR(buf))
-               return PTR_ERR(buf);
+       buf = io_buffer_select(req, &len, issue_flags);
+       if (!buf)
+               return -ENOBUFS;
+       req->rw.addr = (unsigned long) buf;
        iov[0].iov_base = buf;
-       iov[0].iov_len = len;
+       req->rw.len = iov[0].iov_len = len;
        return 0;
 }
 
 static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
                                    unsigned int issue_flags)
 {
-       if (req->flags & REQ_F_BUFFER_SELECTED) {
-               struct io_buffer *kbuf = req->kbuf;
-
-               iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
-               iov[0].iov_len = kbuf->len;
+       if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
+               iov[0].iov_base = u64_to_user_ptr(req->rw.addr);
+               iov[0].iov_len = req->rw.len;
                return 0;
        }
        if (req->rw.len != 1)
@@ -3527,6 +3959,13 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
        return __io_iov_buffer_select(req, iov, issue_flags);
 }
 
+static inline bool io_do_buffer_select(struct io_kiocb *req)
+{
+       if (!(req->flags & REQ_F_BUFFER_SELECT))
+               return false;
+       return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+}
+
 static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
                                       struct io_rw_state *s,
                                       unsigned int issue_flags)
@@ -3539,24 +3978,21 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
        ssize_t ret;
 
        if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
-               ret = io_import_fixed(req, rw, iter);
+               ret = io_import_fixed(req, rw, iter, issue_flags);
                if (ret)
                        return ERR_PTR(ret);
                return NULL;
        }
 
-       /* buffer index only valid with fixed read/write, or buffer select  */
-       if (unlikely(req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)))
-               return ERR_PTR(-EINVAL);
-
        buf = u64_to_user_ptr(req->rw.addr);
        sqe_len = req->rw.len;
 
        if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
-               if (req->flags & REQ_F_BUFFER_SELECT) {
-                       buf = io_rw_buffer_select(req, &sqe_len, issue_flags);
-                       if (IS_ERR(buf))
-                               return ERR_CAST(buf);
+               if (io_do_buffer_select(req)) {
+                       buf = io_buffer_select(req, &sqe_len, issue_flags);
+                       if (!buf)
+                               return ERR_PTR(-ENOBUFS);
+                       req->rw.addr = (unsigned long) buf;
                        req->rw.len = sqe_len;
                }
 
@@ -3740,13 +4176,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
        return 0;
 }
 
-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       if (unlikely(!(req->file->f_mode & FMODE_READ)))
-               return -EBADF;
-       return io_prep_rw(req, sqe);
-}
-
 /*
  * This is our waitqueue callback handler, registered through __folio_lock_async()
  * when we initially tried to do the IO with the iocb armed our waitqueue.
@@ -3834,6 +4263,50 @@ static bool need_read_all(struct io_kiocb *req)
                S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
+{
+       struct kiocb *kiocb = &req->rw.kiocb;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct file *file = req->file;
+       int ret;
+
+       if (unlikely(!file || !(file->f_mode & mode)))
+               return -EBADF;
+
+       if (!io_req_ffs_set(req))
+               req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
+
+       kiocb->ki_flags = iocb_flags(file);
+       ret = kiocb_set_rw_flags(kiocb, req->rw.flags);
+       if (unlikely(ret))
+               return ret;
+
+       /*
+        * If the file is marked O_NONBLOCK, still allow retry for it if it
+        * supports async. Otherwise it's impossible to use O_NONBLOCK files
+        * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
+        */
+       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+           ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
+               req->flags |= REQ_F_NOWAIT;
+
+       if (ctx->flags & IORING_SETUP_IOPOLL) {
+               if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
+                       return -EOPNOTSUPP;
+
+               kiocb->private = NULL;
+               kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
+               kiocb->ki_complete = io_complete_rw_iopoll;
+               req->iopoll_completed = 0;
+       } else {
+               if (kiocb->ki_flags & IOCB_HIPRI)
+                       return -EINVAL;
+               kiocb->ki_complete = io_complete_rw;
+       }
+
+       return 0;
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_rw_state __s, *s = &__s;
@@ -3869,7 +4342,12 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                iov_iter_restore(&s->iter, &s->iter_state);
                iovec = NULL;
        }
-       req->result = iov_iter_count(&s->iter);
+       ret = io_rw_init_file(req, FMODE_READ);
+       if (unlikely(ret)) {
+               kfree(iovec);
+               return ret;
+       }
+       req->cqe.res = iov_iter_count(&s->iter);
 
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
@@ -3885,7 +4363,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
        ppos = io_kiocb_update_pos(req);
 
-       ret = rw_verify_area(READ, req->file, ppos, req->result);
+       ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
        if (unlikely(ret)) {
                kfree(iovec);
                return ret;
@@ -3907,7 +4385,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                ret = 0;
        } else if (ret == -EIOCBQUEUED) {
                goto out_free;
-       } else if (ret == req->result || ret <= 0 || !force_nonblock ||
+       } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
                   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
                /* read all, failed, already did sync or don't want to retry */
                goto done;
@@ -3972,13 +4450,6 @@ out_free:
        return 0;
 }
 
-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
-               return -EBADF;
-       return io_prep_rw(req, sqe);
-}
-
 static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_rw_state __s, *s = &__s;
@@ -3999,7 +4470,12 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
                iov_iter_restore(&s->iter, &s->iter_state);
                iovec = NULL;
        }
-       req->result = iov_iter_count(&s->iter);
+       ret = io_rw_init_file(req, FMODE_WRITE);
+       if (unlikely(ret)) {
+               kfree(iovec);
+               return ret;
+       }
+       req->cqe.res = iov_iter_count(&s->iter);
 
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
@@ -4019,7 +4495,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 
        ppos = io_kiocb_update_pos(req);
 
-       ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+       ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
        if (unlikely(ret))
                goto out_free;
 
@@ -4083,9 +4559,7 @@ static int io_renameat_prep(struct io_kiocb *req,
        struct io_rename *ren = &req->rename;
        const char __user *oldf, *newf;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4122,22 +4596,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
                                ren->newpath, ren->flags);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
+static inline void __io_xattr_finish(struct io_kiocb *req)
+{
+       struct io_xattr *ix = &req->xattr;
+
+       if (ix->filename)
+               putname(ix->filename);
+
+       kfree(ix->ctx.kname);
+       kvfree(ix->ctx.kvalue);
+}
+
+static void io_xattr_finish(struct io_kiocb *req, int ret)
+{
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+
+       __io_xattr_finish(req);
+       io_req_complete(req, ret);
+}
+
+static int __io_getxattr_prep(struct io_kiocb *req,
+                             const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *name;
+       int ret;
+
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       ix->filename = NULL;
+       ix->ctx.kvalue = NULL;
+       name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       ix->ctx.size = READ_ONCE(sqe->len);
+       ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+       if (ix->ctx.flags)
+               return -EINVAL;
+
+       ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+       if (!ix->ctx.kname)
+               return -ENOMEM;
+
+       ret = strncpy_from_user(ix->ctx.kname->name, name,
+                               sizeof(ix->ctx.kname->name));
+       if (!ret || ret == sizeof(ix->ctx.kname->name))
+               ret = -ERANGE;
+       if (ret < 0) {
+               kfree(ix->ctx.kname);
+               return ret;
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_fgetxattr_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+       return __io_getxattr_prep(req, sqe);
+}
+
+static int io_getxattr_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *path;
+       int ret;
+
+       ret = __io_getxattr_prep(req, sqe);
+       if (ret)
+               return ret;
+
+       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+       ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+       if (IS_ERR(ix->filename)) {
+               ret = PTR_ERR(ix->filename);
+               ix->filename = NULL;
+       }
+
+       return ret;
+}
+
+static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
+                       req->file->f_path.dentry,
+                       &ix->ctx);
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
+static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       unsigned int lookup_flags = LOOKUP_FOLLOW;
+       struct path path;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+retry:
+       ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+       if (!ret) {
+               ret = do_getxattr(mnt_user_ns(path.mnt),
+                               path.dentry,
+                               &ix->ctx);
+
+               path_put(&path);
+               if (retry_estale(ret, lookup_flags)) {
+                       lookup_flags |= LOOKUP_REVAL;
+                       goto retry;
+               }
+       }
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
+static int __io_setxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *name;
+       int ret;
+
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       ix->filename = NULL;
+       name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       ix->ctx.kvalue = NULL;
+       ix->ctx.size = READ_ONCE(sqe->len);
+       ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+       ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+       if (!ix->ctx.kname)
+               return -ENOMEM;
+
+       ret = setxattr_copy(name, &ix->ctx);
+       if (ret) {
+               kfree(ix->ctx.kname);
+               return ret;
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_setxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       struct io_xattr *ix = &req->xattr;
+       const char __user *path;
+       int ret;
+
+       ret = __io_setxattr_prep(req, sqe);
+       if (ret)
+               return ret;
+
+       path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+       ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+       if (IS_ERR(ix->filename)) {
+               ret = PTR_ERR(ix->filename);
+               ix->filename = NULL;
+       }
+
+       return ret;
+}
+
+static int io_fsetxattr_prep(struct io_kiocb *req,
+                       const struct io_uring_sqe *sqe)
+{
+       return __io_setxattr_prep(req, sqe);
+}
+
+static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
+                       struct path *path)
+{
+       struct io_xattr *ix = &req->xattr;
+       int ret;
+
+       ret = mnt_want_write(path->mnt);
+       if (!ret) {
+               ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx);
+               mnt_drop_write(path->mnt);
+       }
+
+       return ret;
+}
+
+static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = __io_setxattr(req, issue_flags, &req->file->f_path);
+       io_xattr_finish(req, ret);
+
+       return 0;
+}
+
+static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_xattr *ix = &req->xattr;
+       unsigned int lookup_flags = LOOKUP_FOLLOW;
+       struct path path;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+retry:
+       ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+       if (!ret) {
+               ret = __io_setxattr(req, issue_flags, &path);
+               path_put(&path);
+               if (retry_estale(ret, lookup_flags)) {
+                       lookup_flags |= LOOKUP_REVAL;
+                       goto retry;
+               }
+       }
+
+       io_xattr_finish(req, ret);
+       return 0;
+}
+
 static int io_unlinkat_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
        struct io_unlink *un = &req->unlink;
        const char __user *fname;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4171,8 +4880,6 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
                ret = do_unlinkat(un->dfd, un->filename);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4183,10 +4890,7 @@ static int io_mkdirat_prep(struct io_kiocb *req,
        struct io_mkdir *mkd = &req->mkdir;
        const char __user *fname;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4214,8 +4918,6 @@ static int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
        ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4226,10 +4928,7 @@ static int io_symlinkat_prep(struct io_kiocb *req,
        struct io_symlink *sl = &req->symlink;
        const char __user *oldpath, *newpath;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
-           sqe->splice_fd_in)
+       if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4263,8 +4962,6 @@ static int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
        ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4275,9 +4972,7 @@ static int io_linkat_prep(struct io_kiocb *req,
        struct io_hardlink *lnk = &req->hardlink;
        const char __user *oldf, *newf;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4314,9 +5009,97 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
                                lnk->newpath, lnk->flags);
 
        req->flags &= ~REQ_F_NEED_CLEANUP;
+       io_req_complete(req, ret);
+       return 0;
+}
+
+static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
+{
+       req->uring_cmd.task_work_cb(&req->uring_cmd);
+}
+
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
+       req->uring_cmd.task_work_cb = task_work_cb;
+       req->io_task_work.func = io_uring_cmd_work;
+       io_req_task_prio_work_add(req);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+
+/*
+ * Called by consumers of io_uring_cmd, if they originally returned
+ * -EIOCBQUEUED upon receiving the command.
+ */
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
        if (ret < 0)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       if (req->ctx->flags & IORING_SETUP_CQE32)
+               __io_req_complete32(req, 0, ret, 0, res2, 0);
+       else
+               io_req_complete(req, ret);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_done);
+
+static int io_uring_cmd_prep_async(struct io_kiocb *req)
+{
+       size_t cmd_size;
+
+       cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
+
+       memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
+       return 0;
+}
+
+static int io_uring_cmd_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+
+       if (sqe->rw_flags)
+               return -EINVAL;
+       ioucmd->cmd = sqe->cmd;
+       ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+       return 0;
+}
+
+static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct file *file = req->file;
+       int ret;
+
+       if (!req->file->f_op->uring_cmd)
+               return -EOPNOTSUPP;
+
+       if (ctx->flags & IORING_SETUP_SQE128)
+               issue_flags |= IO_URING_F_SQE128;
+       if (ctx->flags & IORING_SETUP_CQE32)
+               issue_flags |= IO_URING_F_CQE32;
+       if (ctx->flags & IORING_SETUP_IOPOLL)
+               issue_flags |= IO_URING_F_IOPOLL;
+
+       if (req_has_async_data(req))
+               ioucmd->cmd = req->async_data;
+
+       ret = file->f_op->uring_cmd(ioucmd, issue_flags);
+       if (ret == -EAGAIN) {
+               if (!req_has_async_data(req)) {
+                       if (io_alloc_async_data(req))
+                               return -ENOMEM;
+                       io_uring_cmd_prep_async(req);
+               }
+               return -EAGAIN;
+       }
+
+       if (ret != -EIOCBQUEUED)
+               io_uring_cmd_done(ioucmd, ret, 0);
        return 0;
 }
 
@@ -4324,9 +5107,7 @@ static int io_shutdown_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_NET)
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+       if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
                     sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
@@ -4351,8 +5132,6 @@ static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
                return -ENOTSOCK;
 
        ret = __sys_shutdown_sock(sock, req->shutdown.how);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 #else
@@ -4366,21 +5145,11 @@ static int __io_splice_prep(struct io_kiocb *req,
        struct io_splice *sp = &req->splice;
        unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
-       sp->file_in = NULL;
        sp->len = READ_ONCE(sqe->len);
        sp->flags = READ_ONCE(sqe->splice_flags);
-
        if (unlikely(sp->flags & ~valid_flags))
                return -EINVAL;
-
-       sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
-                                 (sp->flags & SPLICE_F_FD_IN_FIXED));
-       if (!sp->file_in)
-               return -EBADF;
-       req->flags |= REQ_F_NEED_CLEANUP;
+       sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in);
        return 0;
 }
 
@@ -4395,23 +5164,32 @@ static int io_tee_prep(struct io_kiocb *req,
 static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_splice *sp = &req->splice;
-       struct file *in = sp->file_in;
        struct file *out = sp->file_out;
        unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+       struct file *in;
        long ret = 0;
 
        if (issue_flags & IO_URING_F_NONBLOCK)
                return -EAGAIN;
+
+       if (sp->flags & SPLICE_F_FD_IN_FIXED)
+               in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags);
+       else
+               in = io_file_get_normal(req, sp->splice_fd_in);
+       if (!in) {
+               ret = -EBADF;
+               goto done;
+       }
+
        if (sp->len)
                ret = do_tee(in, out, sp->len, flags);
 
        if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
                io_put_file(in);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
        if (ret != sp->len)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       __io_req_complete(req, 0, ret, 0);
        return 0;
 }
 
@@ -4427,15 +5205,24 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_splice *sp = &req->splice;
-       struct file *in = sp->file_in;
        struct file *out = sp->file_out;
        unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
        loff_t *poff_in, *poff_out;
+       struct file *in;
        long ret = 0;
 
        if (issue_flags & IO_URING_F_NONBLOCK)
                return -EAGAIN;
 
+       if (sp->flags & SPLICE_F_FD_IN_FIXED)
+               in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags);
+       else
+               in = io_file_get_normal(req, sp->splice_fd_in);
+       if (!in) {
+               ret = -EBADF;
+               goto done;
+       }
+
        poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
        poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
 
@@ -4444,11 +5231,23 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
 
        if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
                io_put_file(in);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-
+done:
        if (ret != sp->len)
                req_set_fail(req);
-       io_req_complete(req, ret);
+       __io_req_complete(req, 0, ret, 0);
+       return 0;
+}
+
+static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       /*
+        * If the ring is setup with CQE32, relay back addr/addr
+        */
+       if (req->ctx->flags & IORING_SETUP_CQE32) {
+               req->nop.extra1 = READ_ONCE(sqe->addr);
+               req->nop.extra2 = READ_ONCE(sqe->addr2);
+       }
+
        return 0;
 }
 
@@ -4457,20 +5256,31 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
  */
 static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       unsigned int cflags;
+       void __user *buf;
 
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               size_t len = 1;
+
+               buf = io_buffer_select(req, &len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+       }
 
-       __io_req_complete(req, issue_flags, 0, 0);
+       cflags = io_put_kbuf(req, issue_flags);
+       if (!(req->ctx->flags & IORING_SETUP_CQE32))
+               __io_req_complete(req, issue_flags, 0, cflags);
+       else
+               __io_req_complete32(req, issue_flags, 0, cflags,
+                                   req->nop.extra1, req->nop.extra2);
        return 0;
 }
 
 static int io_msg_ring_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
-                    sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+       if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
+                    sqe->buf_index || sqe->personality))
                return -EINVAL;
 
        req->msg.user_data = READ_ONCE(sqe->off);
@@ -4506,20 +5316,15 @@ done:
        if (ret < 0)
                req_set_fail(req);
        __io_req_complete(req, issue_flags, ret, 0);
+       /* put file to avoid an attempt to IOPOLL the req */
+       io_put_file(req->file);
+       req->file = NULL;
        return 0;
 }
 
 static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       if (!req->file)
-               return -EBADF;
-
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-                    sqe->splice_fd_in))
+       if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
        req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -4543,8 +5348,6 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
        ret = vfs_fsync_range(req->file, req->sync.off,
                                end > 0 ? end : LLONG_MAX,
                                req->sync.flags & IORING_FSYNC_DATASYNC);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
@@ -4552,10 +5355,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
 static int io_fallocate_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
-       if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
-           sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
@@ -4573,9 +5373,7 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
                return -EAGAIN;
        ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
                                req->sync.len);
-       if (ret < 0)
-               req_set_fail(req);
-       else
+       if (ret >= 0)
                fsnotify_modify(req->file);
        io_req_complete(req, ret);
        return 0;
@@ -4586,9 +5384,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
        const char __user *fname;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->ioprio || sqe->buf_index))
+       if (unlikely(sqe->buf_index))
                return -EINVAL;
        if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
@@ -4643,6 +5439,61 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return __io_openat_prep(req, sqe);
 }
 
+static int io_file_bitmap_get(struct io_ring_ctx *ctx)
+{
+       struct io_file_table *table = &ctx->file_table;
+       unsigned long nr = ctx->nr_user_files;
+       int ret;
+
+       if (table->alloc_hint >= nr)
+               table->alloc_hint = 0;
+
+       do {
+               ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
+               if (ret != nr) {
+                       table->alloc_hint = ret + 1;
+                       return ret;
+               }
+               if (!table->alloc_hint)
+                       break;
+
+               nr = table->alloc_hint;
+               table->alloc_hint = 0;
+       } while (1);
+
+       return -ENFILE;
+}
+
+static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+                              struct file *file, unsigned int file_slot)
+{
+       bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
+       struct io_ring_ctx *ctx = req->ctx;
+       int ret;
+
+       if (alloc_slot) {
+               io_ring_submit_lock(ctx, issue_flags);
+               ret = io_file_bitmap_get(ctx);
+               if (unlikely(ret < 0)) {
+                       io_ring_submit_unlock(ctx, issue_flags);
+                       return ret;
+               }
+
+               file_slot = ret;
+       } else {
+               file_slot--;
+       }
+
+       ret = io_install_fixed_file(req, file, issue_flags, file_slot);
+       if (alloc_slot) {
+               io_ring_submit_unlock(ctx, issue_flags);
+               if (!ret)
+                       return file_slot;
+       }
+
+       return ret;
+}
+
 static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct open_flags op;
@@ -4698,8 +5549,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
        if (!fixed)
                fd_install(ret, file);
        else
-               ret = io_install_fixed_file(req, file, issue_flags,
-                                           req->open.file_slot - 1);
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                               req->open.file_slot);
 err:
        putname(req->open.filename);
        req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4720,7 +5571,7 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
        struct io_provide_buf *p = &req->pbuf;
        u64 tmp;
 
-       if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+       if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
            sqe->splice_fd_in)
                return -EINVAL;
 
@@ -4743,6 +5594,20 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
        if (!nbufs)
                return 0;
 
+       if (bl->buf_nr_pages) {
+               int j;
+
+               i = bl->buf_ring->tail - bl->head;
+               for (j = 0; j < bl->buf_nr_pages; j++)
+                       unpin_user_page(bl->buf_pages[j]);
+               kvfree(bl->buf_pages);
+               bl->buf_pages = NULL;
+               bl->buf_nr_pages = 0;
+               /* make sure it's seen as empty */
+               INIT_LIST_HEAD(&bl->buf_list);
+               return i;
+       }
+
        /* the head kbuf is the list itself */
        while (!list_empty(&bl->buf_list)) {
                struct io_buffer *nxt;
@@ -4764,22 +5629,23 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
        struct io_ring_ctx *ctx = req->ctx;
        struct io_buffer_list *bl;
        int ret = 0;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
-
-       io_ring_submit_lock(ctx, needs_lock);
 
-       lockdep_assert_held(&ctx->uring_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
        ret = -ENOENT;
        bl = io_buffer_get_list(ctx, p->bgid);
-       if (bl)
-               ret = __io_remove_buffers(ctx, bl, p->nbufs);
+       if (bl) {
+               ret = -EINVAL;
+               /* can't use provide/remove buffers command on mapped buffers */
+               if (!bl->buf_nr_pages)
+                       ret = __io_remove_buffers(ctx, bl, p->nbufs);
+       }
        if (ret < 0)
                req_set_fail(req);
 
        /* complete before unlock, IOPOLL may need the lock */
        __io_req_complete(req, issue_flags, ret, 0);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return 0;
 }
 
@@ -4790,7 +5656,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
        struct io_provide_buf *p = &req->pbuf;
        u64 tmp;
 
-       if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        tmp = READ_ONCE(sqe->fd);
@@ -4887,26 +5753,56 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
        return i ? 0 : -ENOMEM;
 }
 
+static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+{
+       int i;
+
+       ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
+                               GFP_KERNEL);
+       if (!ctx->io_bl)
+               return -ENOMEM;
+
+       for (i = 0; i < BGID_ARRAY; i++) {
+               INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
+               ctx->io_bl[i].bgid = i;
+       }
+
+       return 0;
+}
+
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_provide_buf *p = &req->pbuf;
        struct io_ring_ctx *ctx = req->ctx;
        struct io_buffer_list *bl;
        int ret = 0;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
 
-       lockdep_assert_held(&ctx->uring_lock);
+       if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+               ret = io_init_bl_list(ctx);
+               if (ret)
+                       goto err;
+       }
 
        bl = io_buffer_get_list(ctx, p->bgid);
        if (unlikely(!bl)) {
-               bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+               bl = kzalloc(sizeof(*bl), GFP_KERNEL);
                if (!bl) {
                        ret = -ENOMEM;
                        goto err;
                }
-               io_buffer_add_list(ctx, bl, p->bgid);
+               INIT_LIST_HEAD(&bl->buf_list);
+               ret = io_buffer_add_list(ctx, bl, p->bgid);
+               if (ret) {
+                       kfree(bl);
+                       goto err;
+               }
+       }
+       /* can't add buffers via this command for a mapped buffer ring */
+       if (bl->buf_nr_pages) {
+               ret = -EINVAL;
+               goto err;
        }
 
        ret = io_add_buffers(ctx, p, bl);
@@ -4915,7 +5811,7 @@ err:
                req_set_fail(req);
        /* complete before unlock, IOPOLL may need the lock */
        __io_req_complete(req, issue_flags, ret, 0);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return 0;
 }
 
@@ -4923,9 +5819,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_EPOLL)
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
 
        req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4969,9 +5863,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
 static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
-       if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
                return -EINVAL;
 
        req->madvise.addr = READ_ONCE(sqe->addr);
@@ -4993,8 +5885,6 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
                return -EAGAIN;
 
        ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 #else
@@ -5004,9 +5894,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
-               return -EINVAL;
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
                return -EINVAL;
 
        req->fadvise.offset = READ_ONCE(sqe->off);
@@ -5042,9 +5930,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        const char __user *path;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
                return -EBADF;
@@ -5080,19 +5966,13 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
                       ctx->buffer);
-
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
 static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
-           sqe->rw_flags || sqe->buf_index)
+       if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
                return -EBADF;
@@ -5124,7 +6004,8 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
                spin_unlock(&files->file_lock);
                goto err;
        }
-       file = fdt->fd[close->fd];
+       file = rcu_dereference_protected(fdt->fd[close->fd],
+                       lockdep_is_held(&files->file_lock));
        if (!file || file->f_op == &io_uring_fops) {
                spin_unlock(&files->file_lock);
                file = NULL;
@@ -5158,12 +6039,7 @@ err:
 
 static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-
-       if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-                    sqe->splice_fd_in))
+       if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
                return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
@@ -5182,13 +6058,18 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = sync_file_range(req->file, req->sync.off, req->sync.len,
                                req->sync.flags);
-       if (ret < 0)
-               req_set_fail(req);
        io_req_complete(req, ret);
        return 0;
 }
 
 #if defined(CONFIG_NET)
+static bool io_net_retry(struct socket *sock, int flags)
+{
+       if (!(flags & MSG_WAITALL))
+               return false;
+       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
 static int io_setup_async_msg(struct io_kiocb *req,
                              struct io_async_msghdr *kmsg)
 {
@@ -5234,11 +6115,16 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sr_msg *sr = &req->sr_msg;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (unlikely(sqe->file_index))
+               return -EINVAL;
+       if (unlikely(sqe->addr2 || sqe->file_index))
                return -EINVAL;
 
        sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
+       sr->flags = READ_ONCE(sqe->addr2);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
        sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
        if (sr->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
@@ -5247,12 +6133,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
+       sr->done_io = 0;
        return 0;
 }
 
 static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_async_msghdr iomsg, *kmsg;
+       struct io_sr_msg *sr = &req->sr_msg;
        struct socket *sock;
        unsigned flags;
        int min_ret = 0;
@@ -5271,7 +6159,11 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
                kmsg = &iomsg;
        }
 
-       flags = req->sr_msg.msg_flags;
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       flags = sr->msg_flags;
        if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5284,12 +6176,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
                        return io_setup_async_msg(req, kmsg);
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
                req_set_fail(req);
        }
        /* fast path, check for non-NULL to avoid function call */
        if (kmsg->free_iov)
                kfree(kmsg->free_iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
 }
@@ -5304,6 +6205,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        int min_ret = 0;
        int ret;
 
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
        sock = sock_from_file(req->file);
        if (unlikely(!sock))
                return -ENOTSOCK;
@@ -5317,7 +6222,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5330,8 +6235,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
                        return -EAGAIN;
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
                req_set_fail(req);
        }
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
 }
@@ -5423,14 +6339,6 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
        return __io_recvmsg_copy_hdr(req, iomsg);
 }
 
-static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
-                                              unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = &req->sr_msg;
-
-       return io_buffer_select(req, &sr->len, sr->bgid, issue_flags);
-}
-
 static int io_recvmsg_prep_async(struct io_kiocb *req)
 {
        int ret;
@@ -5445,12 +6353,16 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sr_msg *sr = &req->sr_msg;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+       if (unlikely(sqe->file_index))
+               return -EINVAL;
+       if (unlikely(sqe->addr2 || sqe->file_index))
                return -EINVAL;
 
        sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
-       sr->bgid = READ_ONCE(sqe->buf_group);
+       sr->flags = READ_ONCE(sqe->addr2);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
        sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
        if (sr->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
@@ -5463,19 +6375,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static bool io_net_retry(struct socket *sock, int flags)
-{
-       if (!(flags & MSG_WAITALL))
-               return false;
-       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
-}
-
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_async_msghdr iomsg, *kmsg;
        struct io_sr_msg *sr = &req->sr_msg;
        struct socket *sock;
-       struct io_buffer *kbuf;
+       unsigned int cflags;
        unsigned flags;
        int ret, min_ret = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -5493,24 +6398,30 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
                kmsg = &iomsg;
        }
 
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               kbuf = io_recv_buffer_select(req, issue_flags);
-               if (IS_ERR(kbuf))
-                       return PTR_ERR(kbuf);
-               kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
-               kmsg->fast_iov[0].iov_len = req->sr_msg.len;
-               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov,
-                               1, req->sr_msg.len);
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               kmsg->fast_iov[0].iov_base = buf;
+               kmsg->fast_iov[0].iov_len = sr->len;
+               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
+                               sr->len);
        }
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (force_nonblock)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
                min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 
-       ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
-                                       kmsg->uaddr, flags);
+       kmsg->msg.msg_get_inq = 1;
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
        if (ret < min_ret) {
                if (ret == -EAGAIN && force_nonblock)
                        return io_setup_async_msg(req, kmsg);
@@ -5534,45 +6445,54 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
                ret += sr->done_io;
        else if (sr->done_io)
                ret = sr->done_io;
-       __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+       cflags = io_put_kbuf(req, issue_flags);
+       if (kmsg->msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
 }
 
 static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 {
-       struct io_buffer *kbuf;
        struct io_sr_msg *sr = &req->sr_msg;
        struct msghdr msg;
-       void __user *buf = sr->buf;
        struct socket *sock;
        struct iovec iov;
+       unsigned int cflags;
        unsigned flags;
        int ret, min_ret = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
        sock = sock_from_file(req->file);
        if (unlikely(!sock))
                return -ENOTSOCK;
 
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               kbuf = io_recv_buffer_select(req, issue_flags);
-               if (IS_ERR(kbuf))
-                       return PTR_ERR(kbuf);
-               buf = u64_to_user_ptr(kbuf->addr);
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               sr->buf = buf;
        }
 
-       ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+       ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
        if (unlikely(ret))
                goto out_free;
 
        msg.msg_name = NULL;
+       msg.msg_namelen = 0;
        msg.msg_control = NULL;
+       msg.msg_get_inq = 1;
+       msg.msg_flags = 0;
        msg.msg_controllen = 0;
-       msg.msg_namelen = 0;
        msg.msg_iocb = NULL;
-       msg.msg_flags = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = sr->msg_flags;
        if (force_nonblock)
                flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
@@ -5601,36 +6521,49 @@ out_free:
                ret += sr->done_io;
        else if (sr->done_io)
                ret = sr->done_io;
-       __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+       cflags = io_put_kbuf(req, issue_flags);
+       if (msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
 }
 
 static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_accept *accept = &req->accept;
+       unsigned flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->buf_index)
+       if (sqe->len || sqe->buf_index)
                return -EINVAL;
 
        accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
        accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        accept->flags = READ_ONCE(sqe->accept_flags);
        accept->nofile = rlimit(RLIMIT_NOFILE);
+       flags = READ_ONCE(sqe->ioprio);
+       if (flags & ~IORING_ACCEPT_MULTISHOT)
+               return -EINVAL;
 
        accept->file_slot = READ_ONCE(sqe->file_index);
-       if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
-               return -EINVAL;
+       if (accept->file_slot) {
+               if (accept->flags & SOCK_CLOEXEC)
+                       return -EINVAL;
+               if (flags & IORING_ACCEPT_MULTISHOT &&
+                   accept->file_slot != IORING_FILE_INDEX_ALLOC)
+                       return -EINVAL;
+       }
        if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
                return -EINVAL;
        if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
                accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+       if (flags & IORING_ACCEPT_MULTISHOT)
+               req->flags |= REQ_F_APOLL_MULTISHOT;
        return 0;
 }
 
 static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 {
+       struct io_ring_ctx *ctx = req->ctx;
        struct io_accept *accept = &req->accept;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
        unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -5638,6 +6571,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
        struct file *file;
        int ret, fd;
 
+retry:
        if (!fixed) {
                fd = __get_unused_fd_flags(accept->flags, accept->nofile);
                if (unlikely(fd < 0))
@@ -5649,7 +6583,89 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
                if (!fixed)
                        put_unused_fd(fd);
                ret = PTR_ERR(file);
-               if (ret == -EAGAIN && force_nonblock)
+               if (ret == -EAGAIN && force_nonblock) {
+                       /*
+                        * if it's multishot and polled, we don't need to
+                        * return EAGAIN to arm the poll infra since it
+                        * has already been done
+                        */
+                       if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+                           IO_APOLL_MULTI_POLLED)
+                               ret = 0;
+                       return ret;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+       } else if (!fixed) {
+               fd_install(fd, file);
+               ret = fd;
+       } else {
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                               accept->file_slot);
+       }
+
+       if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+               __io_req_complete(req, issue_flags, ret, 0);
+               return 0;
+       }
+       if (ret >= 0) {
+               bool filled;
+
+               spin_lock(&ctx->completion_lock);
+               filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+                                        IORING_CQE_F_MORE);
+               io_commit_cqring(ctx);
+               spin_unlock(&ctx->completion_lock);
+               if (filled) {
+                       io_cqring_ev_posted(ctx);
+                       goto retry;
+               }
+               ret = -ECANCELED;
+       }
+
+       return ret;
+}
+
+static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_socket *sock = &req->sock;
+
+       if (sqe->addr || sqe->rw_flags || sqe->buf_index)
+               return -EINVAL;
+
+       sock->domain = READ_ONCE(sqe->fd);
+       sock->type = READ_ONCE(sqe->off);
+       sock->protocol = READ_ONCE(sqe->len);
+       sock->file_slot = READ_ONCE(sqe->file_index);
+       sock->nofile = rlimit(RLIMIT_NOFILE);
+
+       sock->flags = sock->type & ~SOCK_TYPE_MASK;
+       if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+               return -EINVAL;
+       if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return -EINVAL;
+       return 0;
+}
+
+static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_socket *sock = &req->sock;
+       bool fixed = !!sock->file_slot;
+       struct file *file;
+       int ret, fd;
+
+       if (!fixed) {
+               fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+               if (unlikely(fd < 0))
+                       return fd;
+       }
+       file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+       if (IS_ERR(file)) {
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
+               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
                        return -EAGAIN;
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
@@ -5659,7 +6675,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
                ret = fd;
        } else {
                ret = io_install_fixed_file(req, file, issue_flags,
-                                           accept->file_slot - 1);
+                                           sock->file_slot - 1);
        }
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
@@ -5677,10 +6693,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_connect *conn = &req->connect;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
-           sqe->splice_fd_in)
+       if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5753,112 +6766,11 @@ IO_NETOP_PREP_ASYNC(sendmsg);
 IO_NETOP_PREP_ASYNC(recvmsg);
 IO_NETOP_PREP_ASYNC(connect);
 IO_NETOP_PREP(accept);
+IO_NETOP_PREP(socket);
 IO_NETOP_FN(send);
 IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-
-#define NAPI_TIMEOUT                   (60 * SEC_CONVERSION)
-
-struct napi_entry {
-       struct list_head        list;
-       unsigned int            napi_id;
-       unsigned long           timeout;
-};
-
-/*
- * Add busy poll NAPI ID from sk.
- */
-static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
-{
-       unsigned int napi_id;
-       struct socket *sock;
-       struct sock *sk;
-       struct napi_entry *ne;
-
-       if (!net_busy_loop_on())
-               return;
-
-       sock = sock_from_file(file);
-       if (!sock)
-               return;
-
-       sk = sock->sk;
-       if (!sk)
-               return;
-
-       napi_id = READ_ONCE(sk->sk_napi_id);
-
-       /* Non-NAPI IDs can be rejected */
-       if (napi_id < MIN_NAPI_ID)
-               return;
-
-       spin_lock(&ctx->napi_lock);
-       list_for_each_entry(ne, &ctx->napi_list, list) {
-               if (ne->napi_id == napi_id) {
-                       ne->timeout = jiffies + NAPI_TIMEOUT;
-                       goto out;
-               }
-       }
-
-       ne = kmalloc(sizeof(*ne), GFP_NOWAIT);
-       if (!ne)
-               goto out;
-
-       ne->napi_id = napi_id;
-       ne->timeout = jiffies + NAPI_TIMEOUT;
-       list_add_tail(&ne->list, &ctx->napi_list);
-out:
-       spin_unlock(&ctx->napi_lock);
-}
-
-static inline void io_check_napi_entry_timeout(struct napi_entry *ne)
-{
-       if (time_after(jiffies, ne->timeout)) {
-               list_del(&ne->list);
-               kfree(ne);
-       }
-}
-
-/*
- * Busy poll if globally on and supporting sockets found
- */
-static bool io_napi_busy_loop(struct list_head *napi_list)
-{
-       struct napi_entry *ne, *n;
-
-       list_for_each_entry_safe(ne, n, napi_list, list) {
-               napi_busy_loop(ne->napi_id, NULL, NULL, true,
-                              BUSY_POLL_BUDGET);
-               io_check_napi_entry_timeout(ne);
-       }
-       return !list_empty(napi_list);
-}
-
-static void io_free_napi_list(struct io_ring_ctx *ctx)
-{
-       spin_lock(&ctx->napi_lock);
-       while (!list_empty(&ctx->napi_list)) {
-               struct napi_entry *ne =
-                       list_first_entry(&ctx->napi_list, struct napi_entry,
-                                        list);
-
-               list_del(&ne->list);
-               kfree(ne);
-       }
-       spin_unlock(&ctx->napi_lock);
-}
-#else
-static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
-{
-}
-
-static inline void io_free_napi_list(struct io_ring_ctx *ctx)
-{
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
 struct io_poll_table {
        struct poll_table_struct pt;
        struct io_kiocb *req;
@@ -5905,7 +6817,7 @@ static void io_poll_req_insert(struct io_kiocb *req)
        struct io_ring_ctx *ctx = req->ctx;
        struct hlist_head *list;
 
-       list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+       list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
        hlist_add_head(&req->hash_node, list);
 }
 
@@ -5964,23 +6876,23 @@ static void io_poll_remove_entries(struct io_kiocb *req)
        rcu_read_unlock();
 }
 
+static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags);
 /*
  * All poll tw should go through this. Checks for poll events, manages
  * references, does rewait, etc.
  *
  * Returns a negative error on failure. >0 when no action require, which is
  * either spurious wakeup or multishot CQE is served. 0 when it's done with
- * the request, then the mask is stored in req->result.
+ * the request, then the mask is stored in req->cqe.res.
  */
-static int io_poll_check_events(struct io_kiocb *req)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       struct io_poll_iocb *poll = io_poll_get_single(req);
-       int v;
+       int v, ret;
 
        /* req->task == current here, checking PF_EXITING is safe */
        if (unlikely(req->task->flags & PF_EXITING))
-               io_poll_mark_cancelled(req);
+               return -ECANCELED;
 
        do {
                v = atomic_read(&req->poll_refs);
@@ -5991,30 +6903,46 @@ static int io_poll_check_events(struct io_kiocb *req)
                if (v & IO_POLL_CANCEL_FLAG)
                        return -ECANCELED;
 
-               if (!req->result) {
-                       struct poll_table_struct pt = { ._key = req->cflags };
+               if (!req->cqe.res) {
+                       struct poll_table_struct pt = { ._key = req->apoll_events };
+                       unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
 
-                       req->result = vfs_poll(req->file, &pt) & req->cflags;
+                       if (unlikely(!io_assign_file(req, flags)))
+                               return -EBADF;
+                       req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
                }
 
-               /* multishot, just fill an CQE and proceed */
-               if (req->result && !(req->cflags & EPOLLONESHOT)) {
-                       __poll_t mask = mangle_poll(req->result & poll->events);
+               if ((unlikely(!req->cqe.res)))
+                       continue;
+               if (req->apoll_events & EPOLLONESHOT)
+                       return 0;
+
+               /* multishot, just fill a CQE and proceed */
+               if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+                       __poll_t mask = mangle_poll(req->cqe.res &
+                                                   req->apoll_events);
                        bool filled;
 
                        spin_lock(&ctx->completion_lock);
-                       filled = io_fill_cqe_aux(ctx, req->user_data, mask,
-                                                IORING_CQE_F_MORE);
+                       filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+                                                mask, IORING_CQE_F_MORE);
                        io_commit_cqring(ctx);
                        spin_unlock(&ctx->completion_lock);
-                       if (unlikely(!filled))
-                               return -ECANCELED;
-                       io_cqring_ev_posted(ctx);
-                       io_add_napi(req->file, ctx);
-               } else if (req->result) {
-                       return 0;
+                       if (filled) {
+                               io_cqring_ev_posted(ctx);
+                               continue;
+                       }
+                       return -ECANCELED;
                }
 
+               io_tw_lock(req->ctx, locked);
+               if (unlikely(req->task->flags & PF_EXITING))
+                       return -EFAULT;
+               ret = io_issue_sqe(req,
+                                  IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+               if (ret)
+                       return ret;
+
                /*
                 * Release all references, retry if someone tried to restart
                 * task_work while we were executing it.
@@ -6029,21 +6957,21 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
-       ret = io_poll_check_events(req);
+       ret = io_poll_check_events(req, locked);
        if (ret > 0)
                return;
 
        if (!ret) {
-               req->result = mangle_poll(req->result & req->poll.events);
+               req->cqe.res = mangle_poll(req->cqe.res & req->poll.events);
        } else {
-               req->result = ret;
+               req->cqe.res = ret;
                req_set_fail(req);
        }
 
        io_poll_remove_entries(req);
        spin_lock(&ctx->completion_lock);
        hash_del(&req->hash_node);
-       __io_req_complete_post(req, req->result, 0);
+       __io_req_complete_post(req, req->cqe.res, 0);
        io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        io_cqring_ev_posted(ctx);
@@ -6054,7 +6982,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
-       ret = io_poll_check_events(req);
+       ret = io_poll_check_events(req, locked);
        if (ret > 0)
                return;
 
@@ -6069,26 +6997,27 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
                io_req_complete_failed(req, ret);
 }
 
-static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
+static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
 {
-       req->result = mask;
+       req->cqe.res = mask;
        /*
         * This is useful for poll that is armed on behalf of another
         * request, and where the wakeup path could be on a different
         * CPU. We want to avoid pulling in req->apoll->events for that
         * case.
         */
-       req->cflags = events;
+       req->apoll_events = events;
        if (req->opcode == IORING_OP_POLL_ADD)
                req->io_task_work.func = io_poll_task_func;
        else
                req->io_task_work.func = io_apoll_task_func;
 
-       trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
-       io_req_task_work_add(req, false);
+       trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
+       io_req_task_work_add(req);
 }
 
-static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
+static inline void io_poll_execute(struct io_kiocb *req, int res,
+               __poll_t events)
 {
        if (io_poll_get_ownership(req))
                __io_poll_execute(req, res, events);
@@ -6103,6 +7032,7 @@ static void io_poll_cancel_req(struct io_kiocb *req)
 
 #define wqe_to_req(wait)       ((void *)((unsigned long) (wait)->private & ~1))
 #define wqe_is_double(wait)    ((unsigned long) (wait)->private & 1)
+#define IO_ASYNC_POLL_COMMON   (EPOLLONESHOT | EPOLLPRI)
 
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
                        void *key)
@@ -6137,7 +7067,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
        }
 
        /* for instances that support it check for an event match first */
-       if (mask && !(mask & poll->events))
+       if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
                return 0;
 
        if (io_poll_get_ownership(req)) {
@@ -6223,6 +7153,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
        int v;
 
        INIT_HLIST_NODE(&req->hash_node);
+       req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
        io_init_poll_iocb(poll, mask, io_poll_wake);
        poll->file = req->file;
 
@@ -6261,7 +7192,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
                __io_poll_execute(req, mask, poll->events);
                return 0;
        }
-       io_add_napi(req->file, req->ctx);
 
        /*
         * Release ownership. If someone tried to queue a tw while it was
@@ -6294,28 +7224,34 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
        struct io_ring_ctx *ctx = req->ctx;
        struct async_poll *apoll;
        struct io_poll_table ipt;
-       __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+       __poll_t mask = POLLPRI | POLLERR;
        int ret;
 
        if (!def->pollin && !def->pollout)
                return IO_APOLL_ABORTED;
-       if (!file_can_poll(req->file) || (req->flags & REQ_F_POLLED))
+       if (!file_can_poll(req->file))
                return IO_APOLL_ABORTED;
+       if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
+               return IO_APOLL_ABORTED;
+       if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+               mask |= EPOLLONESHOT;
 
        if (def->pollin) {
-               mask |= POLLIN | POLLRDNORM;
+               mask |= EPOLLIN | EPOLLRDNORM;
 
                /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
                if ((req->opcode == IORING_OP_RECVMSG) &&
                    (req->sr_msg.msg_flags & MSG_ERRQUEUE))
-                       mask &= ~POLLIN;
+                       mask &= ~EPOLLIN;
        } else {
-               mask |= POLLOUT | POLLWRNORM;
+               mask |= EPOLLOUT | EPOLLWRNORM;
        }
        if (def->poll_exclusive)
                mask |= EPOLLEXCLUSIVE;
-       if (!(issue_flags & IO_URING_F_UNLOCKED) &&
-           !list_empty(&ctx->apoll_cache)) {
+       if (req->flags & REQ_F_POLLED) {
+               apoll = req->apoll;
+       } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+                  !list_empty(&ctx->apoll_cache)) {
                apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
                                                poll.wait.entry);
                list_del_init(&apoll->poll.wait.entry);
@@ -6335,7 +7271,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
        if (ret || ipt.error)
                return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
 
-       trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
+       trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
                                mask, apoll->poll.events);
        return IO_APOLL_OK;
 }
@@ -6368,24 +7304,53 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx,
        return found;
 }
 
-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
-                                    bool poll_only)
+static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
+                                    struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
 {
        struct hlist_head *list;
        struct io_kiocb *req;
 
-       list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
+       list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
        hlist_for_each_entry(req, list, hash_node) {
-               if (sqe_addr != req->user_data)
+               if (cd->data != req->cqe.user_data)
                        continue;
                if (poll_only && req->opcode != IORING_OP_POLL_ADD)
                        continue;
+               if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+               }
                return req;
        }
        return NULL;
 }
 
+static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
+                                         struct io_cancel_data *cd)
+       __must_hold(&ctx->completion_lock)
+{
+       struct io_kiocb *req;
+       int i;
+
+       for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+               struct hlist_head *list;
+
+               list = &ctx->cancel_hash[i];
+               hlist_for_each_entry(req, list, hash_node) {
+                       if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+                           req->file != cd->file)
+                               continue;
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+                       return req;
+               }
+       }
+       return NULL;
+}
+
 static bool io_poll_disarm(struct io_kiocb *req)
        __must_hold(&ctx->completion_lock)
 {
@@ -6396,12 +7361,15 @@ static bool io_poll_disarm(struct io_kiocb *req)
        return true;
 }
 
-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
-                         bool poll_only)
+static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
 {
-       struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only);
+       struct io_kiocb *req;
 
+       if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
+               req = io_poll_file_find(ctx, cd);
+       else
+               req = io_poll_find(ctx, false, cd);
        if (!req)
                return -ENOENT;
        io_poll_cancel_req(req);
@@ -6428,9 +7396,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
        struct io_poll_update *upd = &req->poll_update;
        u32 flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->splice_fd_in)
                return -EINVAL;
        flags = READ_ONCE(sqe->len);
        if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -6460,9 +7426,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
        struct io_poll_iocb *poll = &req->poll;
        u32 flags;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+       if (sqe->buf_index || sqe->off || sqe->addr)
                return -EINVAL;
        flags = READ_ONCE(sqe->len);
        if (flags & ~IORING_POLL_ADD_MULTI)
@@ -6471,7 +7435,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
                return -EINVAL;
 
        io_req_set_refcount(req);
-       req->cflags = poll->events = io_poll_parse_events(sqe, flags);
+       req->apoll_events = poll->events = io_poll_parse_events(sqe, flags);
        return 0;
 }
 
@@ -6492,13 +7456,14 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
 {
+       struct io_cancel_data cd = { .data = req->poll_update.old_user_data, };
        struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *preq;
        int ret2, ret = 0;
        bool locked;
 
        spin_lock(&ctx->completion_lock);
-       preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+       preq = io_poll_find(ctx, true, &cd);
        if (!preq || !io_poll_disarm(preq)) {
                spin_unlock(&ctx->completion_lock);
                ret = preq ? -EALREADY : -ENOENT;
@@ -6514,7 +7479,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
                        preq->poll.events |= IO_POLL_UNMASK;
                }
                if (req->poll_update.update_user_data)
-                       preq->user_data = req->poll_update.new_user_data;
+                       preq->cqe.user_data = req->poll_update.new_user_data;
 
                ret2 = io_poll_add(preq, issue_flags);
                /* successfully updated, don't complete poll request */
@@ -6523,7 +7488,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
        }
 
        req_set_fail(preq);
-       preq->result = -ECANCELED;
+       preq->cqe.res = -ECANCELED;
        locked = !(issue_flags & IO_URING_F_UNLOCKED);
        io_req_task_complete(preq, &locked);
 out:
@@ -6551,14 +7516,14 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
        if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
                req_set_fail(req);
 
-       req->result = -ETIME;
+       req->cqe.res = -ETIME;
        req->io_task_work.func = io_req_task_complete;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
        return HRTIMER_NORESTART;
 }
 
 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
-                                          __u64 user_data)
+                                          struct io_cancel_data *cd)
        __must_hold(&ctx->timeout_lock)
 {
        struct io_timeout_data *io;
@@ -6566,9 +7531,16 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
        bool found = false;
 
        list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
-               found = user_data == req->user_data;
-               if (found)
-                       break;
+               if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+                   cd->data != req->cqe.user_data)
+                       continue;
+               if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+                       if (cd->seq == req->work.cancel_seq)
+                               continue;
+                       req->work.cancel_seq = cd->seq;
+               }
+               found = true;
+               break;
        }
        if (!found)
                return ERR_PTR(-ENOENT);
@@ -6580,11 +7552,14 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
        return req;
 }
 
-static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
        __must_hold(&ctx->completion_lock)
-       __must_hold(&ctx->timeout_lock)
 {
-       struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+       struct io_kiocb *req;
+
+       spin_lock_irq(&ctx->timeout_lock);
+       req = io_timeout_extract(ctx, cd);
+       spin_unlock_irq(&ctx->timeout_lock);
 
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -6617,7 +7592,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
        bool found = false;
 
        list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
-               found = user_data == req->user_data;
+               found = user_data == req->cqe.user_data;
                if (found)
                        break;
        }
@@ -6637,7 +7612,8 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
                             struct timespec64 *ts, enum hrtimer_mode mode)
        __must_hold(&ctx->timeout_lock)
 {
-       struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+       struct io_cancel_data cd = { .data = user_data, };
+       struct io_kiocb *req = io_timeout_extract(ctx, &cd);
        struct io_timeout_data *data;
 
        if (IS_ERR(req))
@@ -6657,11 +7633,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
 {
        struct io_timeout_rem *tr = &req->timeout_rem;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
        if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
                return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
                return -EINVAL;
 
        tr->ltimeout = false;
@@ -6702,10 +7676,10 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
        int ret;
 
        if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+               struct io_cancel_data cd = { .data = tr->addr, };
+
                spin_lock(&ctx->completion_lock);
-               spin_lock_irq(&ctx->timeout_lock);
-               ret = io_timeout_cancel(ctx, tr->addr);
-               spin_unlock_irq(&ctx->timeout_lock);
+               ret = io_timeout_cancel(ctx, &cd);
                spin_unlock(&ctx->completion_lock);
        } else {
                enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
@@ -6731,10 +7705,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        unsigned flags;
        u32 off = READ_ONCE(sqe->off);
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
-           sqe->splice_fd_in)
+       if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
                return -EINVAL;
        if (off && is_timeout_link)
                return -EINVAL;
@@ -6766,6 +7737,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
                return -EINVAL;
 
+       INIT_LIST_HEAD(&req->timeout.list);
        data->mode = io_translate_timeout_mode(flags);
        hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
 
@@ -6832,30 +7804,42 @@ add:
        return 0;
 }
 
-struct io_cancel_data {
-       struct io_ring_ctx *ctx;
-       u64 user_data;
-};
-
 static bool io_cancel_cb(struct io_wq_work *work, void *data)
 {
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        struct io_cancel_data *cd = data;
 
-       return req->ctx == cd->ctx && req->user_data == cd->user_data;
+       if (req->ctx != cd->ctx)
+               return false;
+       if (cd->flags & IORING_ASYNC_CANCEL_ANY) {
+               ;
+       } else if (cd->flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->file != cd->file)
+                       return false;
+       } else {
+               if (req->cqe.user_data != cd->data)
+                       return false;
+       }
+       if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+               if (cd->seq == req->work.cancel_seq)
+                       return false;
+               req->work.cancel_seq = cd->seq;
+       }
+       return true;
 }
 
-static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
-                              struct io_ring_ctx *ctx)
+static int io_async_cancel_one(struct io_uring_task *tctx,
+                              struct io_cancel_data *cd)
 {
-       struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
        enum io_wq_cancel cancel_ret;
        int ret = 0;
+       bool all;
 
        if (!tctx || !tctx->io_wq)
                return -ENOENT;
 
-       cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
+       all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+       cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
        switch (cancel_ret) {
        case IO_WQ_CANCEL_OK:
                ret = 0;
@@ -6871,14 +7855,14 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
        return ret;
 }
 
-static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
+static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
 {
        struct io_ring_ctx *ctx = req->ctx;
        int ret;
 
        WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
 
-       ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
+       ret = io_async_cancel_one(req->task->io_uring, cd);
        /*
         * Fall-through even for -EALREADY, as we may have poll armed
         * that need unarming.
@@ -6887,56 +7871,98 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
                return 0;
 
        spin_lock(&ctx->completion_lock);
-       ret = io_poll_cancel(ctx, sqe_addr, false);
+       ret = io_poll_cancel(ctx, cd);
        if (ret != -ENOENT)
                goto out;
-
-       spin_lock_irq(&ctx->timeout_lock);
-       ret = io_timeout_cancel(ctx, sqe_addr);
-       spin_unlock_irq(&ctx->timeout_lock);
+       if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
+               ret = io_timeout_cancel(ctx, cd);
 out:
        spin_unlock(&ctx->completion_lock);
        return ret;
 }
 
+#define CANCEL_FLAGS   (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
+                        IORING_ASYNC_CANCEL_ANY)
+
 static int io_async_cancel_prep(struct io_kiocb *req,
                                const struct io_uring_sqe *sqe)
 {
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+       if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
                return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
-           sqe->splice_fd_in)
+       if (sqe->off || sqe->len || sqe->splice_fd_in)
                return -EINVAL;
 
        req->cancel.addr = READ_ONCE(sqe->addr);
+       req->cancel.flags = READ_ONCE(sqe->cancel_flags);
+       if (req->cancel.flags & ~CANCEL_FLAGS)
+               return -EINVAL;
+       if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY)
+                       return -EINVAL;
+               req->cancel.fd = READ_ONCE(sqe->fd);
+       }
+
        return 0;
 }
 
-static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req,
+                            unsigned int issue_flags)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-       u64 sqe_addr = req->cancel.addr;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+       bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+       struct io_ring_ctx *ctx = cd->ctx;
        struct io_tctx_node *node;
-       int ret;
+       int ret, nr = 0;
 
-       ret = io_try_cancel_userdata(req, sqe_addr);
-       if (ret != -ENOENT)
-               goto done;
+       do {
+               ret = io_try_cancel(req, cd);
+               if (ret == -ENOENT)
+                       break;
+               if (!all)
+                       return ret;
+               nr++;
+       } while (1);
 
        /* slow path, try all io-wq's */
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = -ENOENT;
        list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
                struct io_uring_task *tctx = node->task->io_uring;
 
-               ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
-               if (ret != -ENOENT)
-                       break;
+               ret = io_async_cancel_one(tctx, cd);
+               if (ret != -ENOENT) {
+                       if (!all)
+                               break;
+                       nr++;
+               }
+       }
+       io_ring_submit_unlock(ctx, issue_flags);
+       return all ? nr : ret;
+}
+
+static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_cancel_data cd = {
+               .ctx    = req->ctx,
+               .data   = req->cancel.addr,
+               .flags  = req->cancel.flags,
+               .seq    = atomic_inc_return(&req->ctx->cancel_seq),
+       };
+       int ret;
+
+       if (cd.flags & IORING_ASYNC_CANCEL_FD) {
+               if (req->flags & REQ_F_FIXED_FILE)
+                       req->file = io_file_get_fixed(req, req->cancel.fd,
+                                                       issue_flags);
+               else
+                       req->file = io_file_get_normal(req, req->cancel.fd);
+               if (!req->file) {
+                       ret = -EBADF;
+                       goto done;
+               }
+               cd.file = req->file;
        }
-       io_ring_submit_unlock(ctx, needs_lock);
+
+       ret = __io_async_cancel(&cd, req, issue_flags);
 done:
        if (ret < 0)
                req_set_fail(req);
@@ -6949,7 +7975,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 {
        if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
                return -EINVAL;
-       if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+       if (sqe->rw_flags || sqe->splice_fd_in)
                return -EINVAL;
 
        req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -6963,7 +7989,6 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
 static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        struct io_uring_rsrc_update2 up;
        int ret;
 
@@ -6972,11 +7997,12 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
        up.nr = 0;
        up.tags = 0;
        up.resv = 0;
+       up.resv2 = 0;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
                                        &up, req->rsrc_update.nr_args);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
 
        if (ret < 0)
                req_set_fail(req);
@@ -6988,15 +8014,14 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        switch (req->opcode) {
        case IORING_OP_NOP:
-               return 0;
+               return io_nop_prep(req, sqe);
        case IORING_OP_READV:
        case IORING_OP_READ_FIXED:
        case IORING_OP_READ:
-               return io_read_prep(req, sqe);
        case IORING_OP_WRITEV:
        case IORING_OP_WRITE_FIXED:
        case IORING_OP_WRITE:
-               return io_write_prep(req, sqe);
+               return io_prep_rw(req, sqe);
        case IORING_OP_POLL_ADD:
                return io_poll_add_prep(req, sqe);
        case IORING_OP_POLL_REMOVE:
@@ -7063,6 +8088,18 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return io_linkat_prep(req, sqe);
        case IORING_OP_MSG_RING:
                return io_msg_ring_prep(req, sqe);
+       case IORING_OP_FSETXATTR:
+               return io_fsetxattr_prep(req, sqe);
+       case IORING_OP_SETXATTR:
+               return io_setxattr_prep(req, sqe);
+       case IORING_OP_FGETXATTR:
+               return io_fgetxattr_prep(req, sqe);
+       case IORING_OP_GETXATTR:
+               return io_getxattr_prep(req, sqe);
+       case IORING_OP_SOCKET:
+               return io_socket_prep(req, sqe);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep(req, sqe);
        }
 
        printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7072,7 +8109,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 static int io_req_prep_async(struct io_kiocb *req)
 {
-       if (!io_op_defs[req->opcode].needs_async_setup)
+       const struct io_op_def *def = &io_op_defs[req->opcode];
+
+       /* assign early for deferred execution for non-fixed file */
+       if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
+               req->file = io_file_get_normal(req, req->cqe.fd);
+       if (!def->needs_async_setup)
                return 0;
        if (WARN_ON_ONCE(req_has_async_data(req)))
                return -EFAULT;
@@ -7090,6 +8132,8 @@ static int io_req_prep_async(struct io_kiocb *req)
                return io_recvmsg_prep_async(req);
        case IORING_OP_CONNECT:
                return io_connect_prep_async(req);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep_async(req);
        }
        printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
                    req->opcode);
@@ -7099,9 +8143,10 @@ static int io_req_prep_async(struct io_kiocb *req)
 static u32 io_get_sequence(struct io_kiocb *req)
 {
        u32 seq = req->ctx->cached_sq_head;
+       struct io_kiocb *cur;
 
        /* need original cached_sq_head, but it was increased for each req */
-       io_for_each_link(req, req)
+       io_for_each_link(cur, req)
                seq--;
        return seq;
 }
@@ -7144,7 +8189,7 @@ fail:
                goto queue;
        }
 
-       trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
+       trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode);
        de->req = req;
        de->seq = seq;
        list_add_tail(&de->list, &ctx->defer_list);
@@ -7179,11 +8224,6 @@ static void io_clean_op(struct io_kiocb *req)
                        kfree(io->free_iov);
                        break;
                        }
-               case IORING_OP_SPLICE:
-               case IORING_OP_TEE:
-                       if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED))
-                               io_put_file(req->splice.file_in);
-                       break;
                case IORING_OP_OPENAT:
                case IORING_OP_OPENAT2:
                        if (req->open.filename)
@@ -7211,6 +8251,12 @@ static void io_clean_op(struct io_kiocb *req)
                        if (req->statx.filename)
                                putname(req->statx.filename);
                        break;
+               case IORING_OP_SETXATTR:
+               case IORING_OP_FSETXATTR:
+               case IORING_OP_GETXATTR:
+               case IORING_OP_FGETXATTR:
+                       __io_xattr_finish(req);
+                       break;
                }
        }
        if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -7218,11 +8264,6 @@ static void io_clean_op(struct io_kiocb *req)
                kfree(req->apoll);
                req->apoll = NULL;
        }
-       if (req->flags & REQ_F_INFLIGHT) {
-               struct io_uring_task *tctx = req->task->io_uring;
-
-               atomic_dec(&tctx->inflight_tracked);
-       }
        if (req->flags & REQ_F_CREDS)
                put_cred(req->creds);
        if (req->flags & REQ_F_ASYNC_DATA) {
@@ -7232,11 +8273,27 @@ static void io_clean_op(struct io_kiocb *req)
        req->flags &= ~IO_REQ_CLEAN_FLAGS;
 }
 
+static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
+{
+       if (req->file || !io_op_defs[req->opcode].needs_file)
+               return true;
+
+       if (req->flags & REQ_F_FIXED_FILE)
+               req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
+       else
+               req->file = io_file_get_normal(req, req->cqe.fd);
+
+       return !!req->file;
+}
+
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
        const struct cred *creds = NULL;
        int ret;
 
+       if (unlikely(!io_assign_file(req, issue_flags)))
+               return -EBADF;
+
        if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
                creds = override_creds(req->creds);
 
@@ -7356,6 +8413,24 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
        case IORING_OP_MSG_RING:
                ret = io_msg_ring(req, issue_flags);
                break;
+       case IORING_OP_FSETXATTR:
+               ret = io_fsetxattr(req, issue_flags);
+               break;
+       case IORING_OP_SETXATTR:
+               ret = io_setxattr(req, issue_flags);
+               break;
+       case IORING_OP_FGETXATTR:
+               ret = io_fgetxattr(req, issue_flags);
+               break;
+       case IORING_OP_GETXATTR:
+               ret = io_getxattr(req, issue_flags);
+               break;
+       case IORING_OP_SOCKET:
+               ret = io_socket(req, issue_flags);
+               break;
+       case IORING_OP_URING_CMD:
+               ret = io_uring_cmd(req, issue_flags);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -7386,10 +8461,10 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
 static void io_wq_submit_work(struct io_wq_work *work)
 {
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+       const struct io_op_def *def = &io_op_defs[req->opcode];
        unsigned int issue_flags = IO_URING_F_UNLOCKED;
        bool needs_poll = false;
-       struct io_kiocb *timeout;
-       int ret = 0;
+       int ret = 0, err = -ECANCELED;
 
        /* one will be dropped by ->io_free_work() after returning to io-wq */
        if (!(req->flags & REQ_F_REFCOUNT))
@@ -7397,18 +8472,21 @@ static void io_wq_submit_work(struct io_wq_work *work)
        else
                req_ref_get(req);
 
-       timeout = io_prep_linked_timeout(req);
-       if (timeout)
-               io_queue_linked_timeout(timeout);
+       io_arm_ltimeout(req);
 
        /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
        if (work->flags & IO_WQ_WORK_CANCEL) {
-               io_req_task_queue_fail(req, -ECANCELED);
+fail:
+               io_req_task_queue_fail(req, err);
                return;
        }
+       if (!io_assign_file(req, issue_flags)) {
+               err = -EBADF;
+               work->flags |= IO_WQ_WORK_CANCEL;
+               goto fail;
+       }
 
        if (req->flags & REQ_F_FORCE_ASYNC) {
-               const struct io_op_def *def = &io_op_defs[req->opcode];
                bool opcode_poll = def->pollin || def->pollout;
 
                if (opcode_poll && file_can_poll(req->file)) {
@@ -7427,6 +8505,8 @@ static void io_wq_submit_work(struct io_wq_work *work)
                 * wait for request slots on the block side.
                 */
                if (!needs_poll) {
+                       if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
+                               break;
                        cond_resched();
                        continue;
                }
@@ -7465,54 +8545,69 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file
        file_slot->file_ptr = file_ptr;
 }
 
-static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
-                                            struct io_kiocb *req, int fd)
+static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
+                                            unsigned int issue_flags)
 {
-       struct file *file;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct file *file = NULL;
        unsigned long file_ptr;
 
+       io_ring_submit_lock(ctx, issue_flags);
+
        if (unlikely((unsigned int)fd >= ctx->nr_user_files))
-               return NULL;
+               goto out;
        fd = array_index_nospec(fd, ctx->nr_user_files);
        file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
        file = (struct file *) (file_ptr & FFS_MASK);
        file_ptr &= ~FFS_MASK;
        /* mask in overlapping REQ_F and FFS bits */
        req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
-       io_req_set_rsrc_node(req, ctx);
+       io_req_set_rsrc_node(req, ctx, 0);
+       WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
+out:
+       io_ring_submit_unlock(ctx, issue_flags);
        return file;
 }
 
-static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
-                                      struct io_kiocb *req, int fd)
+/*
+ * Drop the file for requeue operations. Only used of req->file is the
+ * io_uring descriptor itself.
+ */
+static void io_drop_inflight_file(struct io_kiocb *req)
+{
+       if (unlikely(req->flags & REQ_F_INFLIGHT)) {
+               fput(req->file);
+               req->file = NULL;
+               req->flags &= ~REQ_F_INFLIGHT;
+       }
+}
+
+static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
 {
        struct file *file = fget(fd);
 
-       trace_io_uring_file_get(ctx, req, req->user_data, fd);
+       trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd);
 
        /* we don't allow fixed io_uring files */
-       if (file && unlikely(file->f_op == &io_uring_fops))
-               io_req_track_inflight(req);
+       if (file && file->f_op == &io_uring_fops)
+               req->flags |= REQ_F_INFLIGHT;
        return file;
 }
 
-static inline struct file *io_file_get(struct io_ring_ctx *ctx,
-                                      struct io_kiocb *req, int fd, bool fixed)
-{
-       if (fixed)
-               return io_file_get_fixed(ctx, req, fd);
-       else
-               return io_file_get_normal(ctx, req, fd);
-}
-
 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 {
        struct io_kiocb *prev = req->timeout.prev;
        int ret = -ENOENT;
 
        if (prev) {
-               if (!(req->task->flags & PF_EXITING))
-                       ret = io_try_cancel_userdata(req, prev->user_data);
+               if (!(req->task->flags & PF_EXITING)) {
+                       struct io_cancel_data cd = {
+                               .ctx            = req->ctx,
+                               .data           = prev->cqe.user_data,
+                       };
+
+                       ret = io_try_cancel(req, &cd);
+               }
                io_req_complete_post(req, ret ?: -ETIME, 0);
                io_put_req(prev);
        } else {
@@ -7546,7 +8641,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
        spin_unlock_irqrestore(&ctx->timeout_lock, flags);
 
        req->io_task_work.func = io_req_task_link_timeout;
-       io_req_task_work_add(req, false);
+       io_req_task_work_add(req);
        return HRTIMER_NORESTART;
 }
 
@@ -7572,10 +8667,17 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
        io_put_req(req);
 }
 
-static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
+static void io_queue_async(struct io_kiocb *req, int ret)
        __must_hold(&req->ctx->uring_lock)
 {
-       struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+       struct io_kiocb *linked_timeout;
+
+       if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
+               io_req_complete_failed(req, ret);
+               return;
+       }
+
+       linked_timeout = io_prep_linked_timeout(req);
 
        switch (io_arm_poll_handler(req, 0)) {
        case IO_APOLL_READY:
@@ -7586,7 +8688,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
                 * Queued up for async execution, worker will release
                 * submit reference when the iocb is actually submitted.
                 */
-               io_queue_async_work(req, NULL);
+               io_queue_iowq(req, NULL);
                break;
        case IO_APOLL_OK:
                break;
@@ -7596,10 +8698,9 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
                io_queue_linked_timeout(linked_timeout);
 }
 
-static inline void __io_queue_sqe(struct io_kiocb *req)
+static inline void io_queue_sqe(struct io_kiocb *req)
        __must_hold(&req->ctx->uring_lock)
 {
-       struct io_kiocb *linked_timeout;
        int ret;
 
        ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
@@ -7612,22 +8713,23 @@ static inline void __io_queue_sqe(struct io_kiocb *req)
         * We async punt it if the file wasn't marked NOWAIT, or if the file
         * doesn't support non-blocking read/write attempts
         */
-       if (likely(!ret)) {
-               linked_timeout = io_prep_linked_timeout(req);
-               if (linked_timeout)
-                       io_queue_linked_timeout(linked_timeout);
-       } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
-               io_queue_sqe_arm_apoll(req);
-       } else {
-               io_req_complete_failed(req, ret);
-       }
+       if (likely(!ret))
+               io_arm_ltimeout(req);
+       else
+               io_queue_async(req, ret);
 }
 
 static void io_queue_sqe_fallback(struct io_kiocb *req)
        __must_hold(&req->ctx->uring_lock)
 {
-       if (req->flags & REQ_F_FAIL) {
-               io_req_complete_fail_submit(req);
+       if (unlikely(req->flags & REQ_F_FAIL)) {
+               /*
+                * We don't submit, fail them all, for that replace hardlinks
+                * with normal links. Extra REQ_F_LINK is tolerated.
+                */
+               req->flags &= ~REQ_F_HARDLINK;
+               req->flags |= REQ_F_LINK;
+               io_req_complete_failed(req, req->cqe.res);
        } else if (unlikely(req->ctx->drain_active)) {
                io_drain_req(req);
        } else {
@@ -7636,19 +8738,10 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
                if (unlikely(ret))
                        io_req_complete_failed(req, ret);
                else
-                       io_queue_async_work(req, NULL);
+                       io_queue_iowq(req, NULL);
        }
 }
 
-static inline void io_queue_sqe(struct io_kiocb *req)
-       __must_hold(&req->ctx->uring_lock)
-{
-       if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))))
-               __io_queue_sqe(req);
-       else
-               io_queue_sqe_fallback(req);
-}
-
 /*
  * Check SQE restrictions (opcode and flags).
  *
@@ -7703,9 +8796,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        req->opcode = opcode = READ_ONCE(sqe->opcode);
        /* same numerical values with corresponding REQ_F_*, safe to copy */
        req->flags = sqe_flags = READ_ONCE(sqe->flags);
-       req->user_data = READ_ONCE(sqe->user_data);
+       req->cqe.user_data = READ_ONCE(sqe->user_data);
        req->file = NULL;
-       req->fixed_rsrc_refs = NULL;
+       req->rsrc_node = NULL;
        req->task = current;
 
        if (unlikely(opcode >= IORING_OP_LAST)) {
@@ -7716,9 +8809,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                /* enforce forwards compatibility on users */
                if (sqe_flags & ~SQE_VALID_FLAGS)
                        return -EINVAL;
-               if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
-                   !io_op_defs[opcode].buffer_select)
-                       return -EOPNOTSUPP;
+               if (sqe_flags & IOSQE_BUFFER_SELECT) {
+                       if (!io_op_defs[opcode].buffer_select)
+                               return -EOPNOTSUPP;
+                       req->buf_index = READ_ONCE(sqe->buf_group);
+               }
                if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
                        ctx->drain_disabled = true;
                if (sqe_flags & IOSQE_IO_DRAIN) {
@@ -7741,9 +8836,16 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                }
        }
 
+       if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+               return -EINVAL;
+       if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        if (io_op_defs[opcode].needs_file) {
                struct io_submit_state *state = &ctx->submit_state;
 
+               req->cqe.fd = READ_ONCE(sqe->fd);
+
                /*
                 * Plug now if we have more than 2 IO left after this, and the
                 * target is potentially a read/write to block based storage.
@@ -7753,11 +8855,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                        state->need_plug = false;
                        blk_start_plug_nr_ios(&state->plug, state->submit_nr);
                }
-
-               req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
-                                       (sqe_flags & IOSQE_FIXED_FILE));
-               if (unlikely(!req->file))
-                       return -EBADF;
        }
 
        personality = READ_ONCE(sqe->personality);
@@ -7779,7 +8876,44 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        return io_req_prep(req, sqe);
 }
 
-static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
+                                     struct io_kiocb *req, int ret)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_submit_link *link = &ctx->submit_state.link;
+       struct io_kiocb *head = link->head;
+
+       trace_io_uring_req_failed(sqe, ctx, req, ret);
+
+       /*
+        * Avoid breaking links in the middle as it renders links with SQPOLL
+        * unusable. Instead of failing eagerly, continue assembling the link if
+        * applicable and mark the head with REQ_F_FAIL. The link flushing code
+        * should find the flag and handle the rest.
+        */
+       req_fail_link_node(req, ret);
+       if (head && !(head->flags & REQ_F_FAIL))
+               req_fail_link_node(head, -ECANCELED);
+
+       if (!(req->flags & IO_REQ_LINK_FLAGS)) {
+               if (head) {
+                       link->last->link = req;
+                       link->head = NULL;
+                       req = head;
+               }
+               io_queue_sqe_fallback(req);
+               return ret;
+       }
+
+       if (head)
+               link->last->link = req;
+       else
+               link->head = req;
+       link->last = req;
+       return 0;
+}
+
+static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
                         const struct io_uring_sqe *sqe)
        __must_hold(&ctx->uring_lock)
 {
@@ -7787,35 +8921,11 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
        int ret;
 
        ret = io_init_req(ctx, req, sqe);
-       if (unlikely(ret)) {
-               trace_io_uring_req_failed(sqe, ctx, req, ret);
-
-               /* fail even hard links since we don't submit */
-               if (link->head) {
-                       /*
-                        * we can judge a link req is failed or cancelled by if
-                        * REQ_F_FAIL is set, but the head is an exception since
-                        * it may be set REQ_F_FAIL because of other req's failure
-                        * so let's leverage req->result to distinguish if a head
-                        * is set REQ_F_FAIL because of its failure or other req's
-                        * failure so that we can set the correct ret code for it.
-                        * init result here to avoid affecting the normal path.
-                        */
-                       if (!(link->head->flags & REQ_F_FAIL))
-                               req_fail_link_node(link->head, -ECANCELED);
-               } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-                       /*
-                        * the current req is a normal req, we should return
-                        * error and thus break the submittion loop.
-                        */
-                       io_req_complete_failed(req, ret);
-                       return ret;
-               }
-               req_fail_link_node(req, ret);
-       }
+       if (unlikely(ret))
+               return io_submit_fail_init(sqe, req, ret);
 
        /* don't need @sqe from now on */
-       trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
+       trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode,
                                  req->flags, true,
                                  ctx->flags & IORING_SETUP_SQPOLL);
 
@@ -7826,29 +8936,32 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
         * submitted sync once the chain is complete. If none of those
         * conditions are true (normal request), then just queue it.
         */
-       if (link->head) {
-               struct io_kiocb *head = link->head;
-
-               if (!(req->flags & REQ_F_FAIL)) {
-                       ret = io_req_prep_async(req);
-                       if (unlikely(ret)) {
-                               req_fail_link_node(req, ret);
-                               if (!(head->flags & REQ_F_FAIL))
-                                       req_fail_link_node(head, -ECANCELED);
-                       }
-               }
-               trace_io_uring_link(ctx, req, head);
+       if (unlikely(link->head)) {
+               ret = io_req_prep_async(req);
+               if (unlikely(ret))
+                       return io_submit_fail_init(sqe, req, ret);
+
+               trace_io_uring_link(ctx, req, link->head);
                link->last->link = req;
                link->last = req;
 
-               if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
+               if (req->flags & IO_REQ_LINK_FLAGS)
                        return 0;
-               /* last request of a link, enqueue the link */
+               /* last request of the link, flush it */
+               req = link->head;
                link->head = NULL;
-               req = head;
-       } else if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
-               link->head = req;
-               link->last = req;
+               if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))
+                       goto fallback;
+
+       } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS |
+                                         REQ_F_FORCE_ASYNC | REQ_F_FAIL))) {
+               if (req->flags & IO_REQ_LINK_FLAGS) {
+                       link->head = req;
+                       link->last = req;
+               } else {
+fallback:
+                       io_queue_sqe_fallback(req);
+               }
                return 0;
        }
 
@@ -7863,8 +8976,8 @@ static void io_submit_state_end(struct io_ring_ctx *ctx)
 {
        struct io_submit_state *state = &ctx->submit_state;
 
-       if (state->link.head)
-               io_queue_sqe(state->link.head);
+       if (unlikely(state->link.head))
+               io_queue_sqe_fallback(state->link.head);
        /* flush only after queuing links as they can generate completions */
        io_submit_flush_completions(ctx);
        if (state->plug_started)
@@ -7918,8 +9031,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
         *    though the application is the one updating it.
         */
        head = READ_ONCE(ctx->sq_array[sq_idx]);
-       if (likely(head < ctx->sq_entries))
+       if (likely(head < ctx->sq_entries)) {
+               /* double index for 128-byte SQEs, twice as long */
+               if (ctx->flags & IORING_SETUP_SQE128)
+                       head <<= 1;
                return &ctx->sq_sqes[head];
+       }
 
        /* drop invalid entries */
        ctx->cq_extra--;
@@ -7932,54 +9049,52 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
        __must_hold(&ctx->uring_lock)
 {
        unsigned int entries = io_sqring_entries(ctx);
-       int submitted = 0;
+       unsigned int left;
+       int ret;
 
        if (unlikely(!entries))
                return 0;
        /* make sure SQ entry isn't read before tail */
-       nr = min3(nr, ctx->sq_entries, entries);
-       io_get_task_refs(nr);
+       ret = left = min3(nr, ctx->sq_entries, entries);
+       io_get_task_refs(left);
+       io_submit_state_start(&ctx->submit_state, left);
 
-       io_submit_state_start(&ctx->submit_state, nr);
        do {
                const struct io_uring_sqe *sqe;
                struct io_kiocb *req;
 
-               if (unlikely(!io_alloc_req_refill(ctx))) {
-                       if (!submitted)
-                               submitted = -EAGAIN;
+               if (unlikely(!io_alloc_req_refill(ctx)))
                        break;
-               }
                req = io_alloc_req(ctx);
                sqe = io_get_sqe(ctx);
                if (unlikely(!sqe)) {
-                       wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+                       io_req_add_to_cache(req, ctx);
                        break;
                }
-               /* will complete beyond this point, count as submitted */
-               submitted++;
-               if (io_submit_sqe(ctx, req, sqe)) {
-                       /*
-                        * Continue submitting even for sqe failure if the
-                        * ring was setup with IORING_SETUP_SUBMIT_ALL
-                        */
-                       if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
-                               break;
-               }
-       } while (submitted < nr);
 
-       if (unlikely(submitted != nr)) {
-               int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
-               int unused = nr - ref_used;
+               /*
+                * Continue submitting even for sqe failure if the
+                * ring was setup with IORING_SETUP_SUBMIT_ALL
+                */
+               if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
+                   !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
+                       left--;
+                       break;
+               }
+       } while (--left);
 
-               current->io_uring->cached_refs += unused;
+       if (unlikely(left)) {
+               ret -= left;
+               /* try again if it submitted nothing and can't allocate a req */
+               if (!ret && io_req_cache_empty(ctx))
+                       ret = -EAGAIN;
+               current->io_uring->cached_refs += left;
        }
 
        io_submit_state_end(ctx);
         /* Commit SQ ring head once we've consumed and submitted all SQEs */
        io_commit_sqring(ctx);
-
-       return submitted;
+       return ret;
 }
 
 static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
@@ -7987,23 +9102,6 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
        return READ_ONCE(sqd->state);
 }
 
-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
-{
-       /* Tell userspace we may need a wakeup call */
-       spin_lock(&ctx->completion_lock);
-       WRITE_ONCE(ctx->rings->sq_flags,
-                  ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
-       spin_unlock(&ctx->completion_lock);
-}
-
-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
-{
-       spin_lock(&ctx->completion_lock);
-       WRITE_ONCE(ctx->rings->sq_flags,
-                  ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
-       spin_unlock(&ctx->completion_lock);
-}
-
 static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 {
        unsigned int to_submit;
@@ -8032,13 +9130,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
                    !(ctx->flags & IORING_SETUP_R_DISABLED))
                        ret = io_submit_sqes(ctx, to_submit);
                mutex_unlock(&ctx->uring_lock);
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               spin_lock(&ctx->napi_lock);
-               if (!list_empty(&ctx->napi_list) &&
-                   io_napi_busy_loop(&ctx->napi_list))
-                       ++ret;
-               spin_unlock(&ctx->napi_lock);
-#endif
+
                if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
                        wake_up(&ctx->sqo_sq_wait);
                if (creds)
@@ -8125,8 +9217,8 @@ static int io_sq_thread(void *data)
                        bool needs_sched = true;
 
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
-                               io_ring_set_wakeup_flag(ctx);
-
+                               atomic_or(IORING_SQ_NEED_WAKEUP,
+                                               &ctx->rings->sq_flags);
                                if ((ctx->flags & IORING_SETUP_IOPOLL) &&
                                    !wq_list_empty(&ctx->iopoll_list)) {
                                        needs_sched = false;
@@ -8137,7 +9229,7 @@ static int io_sq_thread(void *data)
                                 * Ensure the store of the wakeup flag is not
                                 * reordered with the load of the SQ tail
                                 */
-                               smp_mb();
+                               smp_mb__after_atomic();
 
                                if (io_sqring_entries(ctx)) {
                                        needs_sched = false;
@@ -8151,7 +9243,8 @@ static int io_sq_thread(void *data)
                                mutex_lock(&sqd->lock);
                        }
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-                               io_ring_clear_wakeup_flag(ctx);
+                               atomic_andnot(IORING_SQ_NEED_WAKEUP,
+                                               &ctx->rings->sq_flags);
                }
 
                finish_wait(&sqd->wait, &wait);
@@ -8161,7 +9254,7 @@ static int io_sq_thread(void *data)
        io_uring_cancel_generic(true, sqd);
        sqd->thread = NULL;
        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-               io_ring_set_wakeup_flag(ctx);
+               atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
        io_run_task_work();
        mutex_unlock(&sqd->lock);
 
@@ -8176,9 +9269,6 @@ struct io_wait_queue {
        struct io_ring_ctx *ctx;
        unsigned cq_tail;
        unsigned nr_timeouts;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       unsigned busy_poll_to;
-#endif
 };
 
 static inline bool io_should_wake(struct io_wait_queue *iowq)
@@ -8204,7 +9294,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
         * Cannot safely flush overflowed CQEs from here, ensure we wake up
         * the task, and the next invocation will do it.
         */
-       if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
+       if (io_should_wake(iowq) ||
+           test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &iowq->ctx->check_cq))
                return autoremove_wake_function(curr, mode, wake_flags, key);
        return -1;
 }
@@ -8226,101 +9317,23 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
                                          ktime_t timeout)
 {
        int ret;
+       unsigned long check_cq;
 
        /* make sure we run task_work before checking for signals */
        ret = io_run_task_work_sig();
        if (ret || io_should_wake(iowq))
                return ret;
+       check_cq = READ_ONCE(ctx->check_cq);
        /* let the caller flush overflows, retry */
-       if (test_bit(0, &ctx->check_cq_overflow))
+       if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
                return 1;
-
+       if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+               return -EBADR;
        if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
                return -ETIME;
        return 1;
 }
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-static void io_adjust_busy_loop_timeout(struct timespec64 *ts,
-                                       struct io_wait_queue *iowq)
-{
-       unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
-       struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to);
-
-       if (timespec64_compare(ts, &pollto) > 0) {
-               *ts = timespec64_sub(*ts, pollto);
-               iowq->busy_poll_to = busy_poll_to;
-       } else {
-               u64 to = timespec64_to_ns(ts);
-
-               do_div(to, 1000);
-               iowq->busy_poll_to = to;
-               ts->tv_sec = 0;
-               ts->tv_nsec = 0;
-       }
-}
-
-static inline bool io_busy_loop_timeout(unsigned long start_time,
-                                       unsigned long bp_usec)
-{
-       if (bp_usec) {
-               unsigned long end_time = start_time + bp_usec;
-               unsigned long now = busy_loop_current_time();
-
-               return time_after(now, end_time);
-       }
-       return true;
-}
-
-static bool io_busy_loop_end(void *p, unsigned long start_time)
-{
-       struct io_wait_queue *iowq = p;
-
-       return signal_pending(current) ||
-              io_should_wake(iowq) ||
-              io_busy_loop_timeout(start_time, iowq->busy_poll_to);
-}
-
-static void io_blocking_napi_busy_loop(struct list_head *napi_list,
-                                      struct io_wait_queue *iowq)
-{
-       unsigned long start_time =
-               list_is_singular(napi_list) ? 0 :
-               busy_loop_current_time();
-
-       do {
-               if (list_is_singular(napi_list)) {
-                       struct napi_entry *ne =
-                               list_first_entry(napi_list,
-                                                struct napi_entry, list);
-
-                       napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq,
-                                      true, BUSY_POLL_BUDGET);
-                       io_check_napi_entry_timeout(ne);
-                       break;
-               }
-       } while (io_napi_busy_loop(napi_list) &&
-                !io_busy_loop_end(iowq, start_time));
-}
-
-static void io_putback_napi_list(struct io_ring_ctx *ctx,
-                                struct list_head *napi_list)
-{
-       struct napi_entry *cne, *lne;
-
-       spin_lock(&ctx->napi_lock);
-       list_for_each_entry(cne, &ctx->napi_list, list)
-               list_for_each_entry(lne, napi_list, list)
-                       if (cne->napi_id == lne->napi_id) {
-                               list_del(&lne->list);
-                               kfree(lne);
-                               break;
-                       }
-       list_splice(napi_list, &ctx->napi_list);
-       spin_unlock(&ctx->napi_lock);
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
 /*
  * Wait until events become available, if we don't already have some. The
  * application must reap them itself, as they reside on the shared cq ring.
@@ -8333,9 +9346,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        struct io_rings *rings = ctx->rings;
        ktime_t timeout = KTIME_MAX;
        int ret;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       LIST_HEAD(local_napi_list);
-#endif
 
        do {
                io_cqring_overflow_flush(ctx);
@@ -8358,29 +9368,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                        return ret;
        }
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       iowq.busy_poll_to = 0;
-       if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
-               spin_lock(&ctx->napi_lock);
-               list_splice_init(&ctx->napi_list, &local_napi_list);
-               spin_unlock(&ctx->napi_lock);
-       }
-#endif
        if (uts) {
                struct timespec64 ts;
 
                if (get_timespec64(&ts, uts))
                        return -EFAULT;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               if (!list_empty(&local_napi_list))
-                       io_adjust_busy_loop_timeout(&ts, &iowq);
-#endif
                timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
        }
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       else if (!list_empty(&local_napi_list))
-               iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
-#endif
 
        init_waitqueue_func_entry(&iowq.wq, io_wake_function);
        iowq.wq.private = current;
@@ -8390,12 +9384,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
 
        trace_io_uring_cqring_wait(ctx, min_events);
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       if (iowq.busy_poll_to)
-               io_blocking_napi_busy_loop(&local_napi_list, &iowq);
-       if (!list_empty(&local_napi_list))
-               io_putback_napi_list(ctx, &local_napi_list);
-#endif
        do {
                /* if we can't even flush overflow, don't wait for more */
                if (!io_cqring_overflow_flush(ctx)) {
@@ -8405,10 +9393,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
                                                TASK_INTERRUPTIBLE);
                ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
-               finish_wait(&ctx->cq_wait, &iowq.wq);
                cond_resched();
        } while (ret > 0);
 
+       finish_wait(&ctx->cq_wait, &iowq.wq);
        restore_saved_sigmask_unless(ret == -EINTR);
 
        return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
@@ -8646,17 +9634,57 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
 {
        table->files = kvcalloc(nr_files, sizeof(table->files[0]),
                                GFP_KERNEL_ACCOUNT);
-       return !!table->files;
+       if (unlikely(!table->files))
+               return false;
+
+       table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
+       if (unlikely(!table->bitmap)) {
+               kvfree(table->files);
+               return false;
+       }
+
+       return true;
 }
 
 static void io_free_file_tables(struct io_file_table *table)
 {
        kvfree(table->files);
+       bitmap_free(table->bitmap);
        table->files = NULL;
+       table->bitmap = NULL;
+}
+
+static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
+{
+       WARN_ON_ONCE(test_bit(bit, table->bitmap));
+       __set_bit(bit, table->bitmap);
+       if (bit == table->alloc_hint)
+               table->alloc_hint++;
+}
+
+static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
+{
+       __clear_bit(bit, table->bitmap);
+       table->alloc_hint = bit;
 }
 
 static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
+#if !defined(IO_URING_SCM_ALL)
+       int i;
+
+       for (i = 0; i < ctx->nr_user_files; i++) {
+               struct file *file = io_file_from_index(ctx, i);
+
+               if (!file)
+                       continue;
+               if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
+                       continue;
+               io_file_bitmap_clear(&ctx->file_table, i);
+               fput(file);
+       }
+#endif
+
 #if defined(CONFIG_UNIX)
        if (ctx->ring_sock) {
                struct sock *sock = ctx->ring_sock->sk;
@@ -8665,16 +9693,6 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
                while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
                        kfree_skb(skb);
        }
-#else
-       int i;
-
-       for (i = 0; i < ctx->nr_user_files; i++) {
-               struct file *file;
-
-               file = io_file_from_index(ctx, i);
-               if (file)
-                       fput(file);
-       }
 #endif
        io_free_file_tables(&ctx->file_table);
        io_rsrc_data_free(ctx->file_data);
@@ -8819,103 +9837,66 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
        return sqd;
 }
 
-#if defined(CONFIG_UNIX)
 /*
  * Ensure the UNIX gc is aware of our file set, so we are certain that
  * the io_uring can be safely unregistered on process exit, even if we have
- * loops in the file referencing.
+ * loops in the file referencing. We account only files that can hold other
+ * files because otherwise they can't form a loop and so are not interesting
+ * for GC.
  */
-static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
+static int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
 {
+#if defined(CONFIG_UNIX)
        struct sock *sk = ctx->ring_sock->sk;
+       struct sk_buff_head *head = &sk->sk_receive_queue;
        struct scm_fp_list *fpl;
        struct sk_buff *skb;
-       int i, nr_files;
-
-       fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
-       if (!fpl)
-               return -ENOMEM;
 
-       skb = alloc_skb(0, GFP_KERNEL);
-       if (!skb) {
-               kfree(fpl);
-               return -ENOMEM;
-       }
+       if (likely(!io_file_need_scm(file)))
+               return 0;
 
-       skb->sk = sk;
+       /*
+        * See if we can merge this file into an existing skb SCM_RIGHTS
+        * file set. If there's no room, fall back to allocating a new skb
+        * and filling it in.
+        */
+       spin_lock_irq(&head->lock);
+       skb = skb_peek(head);
+       if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
+               __skb_unlink(skb, head);
+       else
+               skb = NULL;
+       spin_unlock_irq(&head->lock);
 
-       nr_files = 0;
-       fpl->user = get_uid(current_user());
-       for (i = 0; i < nr; i++) {
-               struct file *file = io_file_from_index(ctx, i + offset);
+       if (!skb) {
+               fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+               if (!fpl)
+                       return -ENOMEM;
 
-               if (!file)
-                       continue;
-               fpl->fp[nr_files] = get_file(file);
-               unix_inflight(fpl->user, fpl->fp[nr_files]);
-               nr_files++;
-       }
+               skb = alloc_skb(0, GFP_KERNEL);
+               if (!skb) {
+                       kfree(fpl);
+                       return -ENOMEM;
+               }
 
-       if (nr_files) {
+               fpl->user = get_uid(current_user());
                fpl->max = SCM_MAX_FD;
-               fpl->count = nr_files;
+               fpl->count = 0;
+
                UNIXCB(skb).fp = fpl;
+               skb->sk = sk;
                skb->destructor = unix_destruct_scm;
                refcount_add(skb->truesize, &sk->sk_wmem_alloc);
-               skb_queue_head(&sk->sk_receive_queue, skb);
-
-               for (i = 0; i < nr_files; i++)
-                       fput(fpl->fp[i]);
-       } else {
-               kfree_skb(skb);
-               free_uid(fpl->user);
-               kfree(fpl);
-       }
-
-       return 0;
-}
-
-/*
- * If UNIX sockets are enabled, fd passing can cause a reference cycle which
- * causes regular reference counting to break down. We rely on the UNIX
- * garbage collection to take care of this problem for us.
- */
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
-       unsigned left, total;
-       int ret = 0;
-
-       total = 0;
-       left = ctx->nr_user_files;
-       while (left) {
-               unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
-
-               ret = __io_sqe_files_scm(ctx, this_files, total);
-               if (ret)
-                       break;
-               left -= this_files;
-               total += this_files;
-       }
-
-       if (!ret)
-               return 0;
-
-       while (total < ctx->nr_user_files) {
-               struct file *file = io_file_from_index(ctx, total);
-
-               if (file)
-                       fput(file);
-               total++;
        }
-
-       return ret;
-}
-#else
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
+
+       fpl = UNIXCB(skb).fp;
+       fpl->fp[fpl->count++] = get_file(file);
+       unix_inflight(fpl->user, file);
+       skb_queue_head(head, skb);
+       fput(file);
+#endif
        return 0;
 }
-#endif
 
 static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
 {
@@ -8926,6 +9907,11 @@ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
        struct sk_buff *skb;
        int i;
 
+       if (!io_file_need_scm(file)) {
+               fput(file);
+               return;
+       }
+
        __skb_queue_head_init(&list);
 
        /*
@@ -8990,15 +9976,17 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
                list_del(&prsrc->list);
 
                if (prsrc->tag) {
-                       bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
+                       if (ctx->flags & IORING_SETUP_IOPOLL)
+                               mutex_lock(&ctx->uring_lock);
 
-                       io_ring_submit_lock(ctx, lock_ring);
                        spin_lock(&ctx->completion_lock);
                        io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
                        io_commit_cqring(ctx);
                        spin_unlock(&ctx->completion_lock);
                        io_cqring_ev_posted(ctx);
-                       io_ring_submit_unlock(ctx, lock_ring);
+
+                       if (ctx->flags & IORING_SETUP_IOPOLL)
+                               mutex_unlock(&ctx->uring_lock);
                }
 
                rsrc_data->do_put(ctx, prsrc);
@@ -9052,27 +10040,31 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
        if (ret)
                return ret;
 
-       ret = -ENOMEM;
-       if (!io_alloc_file_tables(&ctx->file_table, nr_args))
-               goto out_free;
+       if (!io_alloc_file_tables(&ctx->file_table, nr_args)) {
+               io_rsrc_data_free(ctx->file_data);
+               ctx->file_data = NULL;
+               return -ENOMEM;
+       }
 
        for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
-               if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+               struct io_fixed_file *file_slot;
+
+               if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
                        ret = -EFAULT;
-                       goto out_fput;
+                       goto fail;
                }
                /* allow sparse sets */
-               if (fd == -1) {
+               if (!fds || fd == -1) {
                        ret = -EINVAL;
                        if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
-                               goto out_fput;
+                               goto fail;
                        continue;
                }
 
                file = fget(fd);
                ret = -EBADF;
                if (unlikely(!file))
-                       goto out_fput;
+                       goto fail;
 
                /*
                 * Don't allow io_uring instances to be registered. If UNIX
@@ -9083,86 +10075,37 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
                 */
                if (file->f_op == &io_uring_fops) {
                        fput(file);
-                       goto out_fput;
+                       goto fail;
                }
-               io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
-       }
-
-       ret = io_sqe_files_scm(ctx);
-       if (ret) {
-               __io_sqe_files_unregister(ctx);
-               return ret;
-       }
-
-       io_rsrc_node_switch(ctx, NULL);
-       return ret;
-out_fput:
-       for (i = 0; i < ctx->nr_user_files; i++) {
-               file = io_file_from_index(ctx, i);
-               if (file)
+               ret = io_scm_file_account(ctx, file);
+               if (ret) {
                        fput(file);
-       }
-       io_free_file_tables(&ctx->file_table);
-       ctx->nr_user_files = 0;
-out_free:
-       io_rsrc_data_free(ctx->file_data);
-       ctx->file_data = NULL;
-       return ret;
-}
-
-static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
-                               int index)
-{
-#if defined(CONFIG_UNIX)
-       struct sock *sock = ctx->ring_sock->sk;
-       struct sk_buff_head *head = &sock->sk_receive_queue;
-       struct sk_buff *skb;
-
-       /*
-        * See if we can merge this file into an existing skb SCM_RIGHTS
-        * file set. If there's no room, fall back to allocating a new skb
-        * and filling it in.
-        */
-       spin_lock_irq(&head->lock);
-       skb = skb_peek(head);
-       if (skb) {
-               struct scm_fp_list *fpl = UNIXCB(skb).fp;
-
-               if (fpl->count < SCM_MAX_FD) {
-                       __skb_unlink(skb, head);
-                       spin_unlock_irq(&head->lock);
-                       fpl->fp[fpl->count] = get_file(file);
-                       unix_inflight(fpl->user, fpl->fp[fpl->count]);
-                       fpl->count++;
-                       spin_lock_irq(&head->lock);
-                       __skb_queue_head(head, skb);
-               } else {
-                       skb = NULL;
+                       goto fail;
                }
-       }
-       spin_unlock_irq(&head->lock);
-
-       if (skb) {
-               fput(file);
-               return 0;
+               file_slot = io_fixed_file_slot(&ctx->file_table, i);
+               io_fixed_file_set(file_slot, file);
+               io_file_bitmap_set(&ctx->file_table, i);
        }
 
-       return __io_sqe_files_scm(ctx, 1, index);
-#else
+       io_rsrc_node_switch(ctx, NULL);
        return 0;
-#endif
+fail:
+       __io_sqe_files_unregister(ctx);
+       return ret;
 }
 
 static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
                                 struct io_rsrc_node *node, void *rsrc)
 {
+       u64 *tag_slot = io_get_tag_slot(data, idx);
        struct io_rsrc_put *prsrc;
 
        prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
        if (!prsrc)
                return -ENOMEM;
 
-       prsrc->tag = *io_get_tag_slot(data, idx);
+       prsrc->tag = *tag_slot;
+       *tag_slot = 0;
        prsrc->rsrc = rsrc;
        list_add(&prsrc->list, &node->rsrc_list);
        return 0;
@@ -9172,12 +10115,11 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                                 unsigned int issue_flags, u32 slot_index)
 {
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        bool needs_switch = false;
        struct io_fixed_file *file_slot;
        int ret = -EBADF;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        if (file->f_op == &io_uring_fops)
                goto err;
        ret = -ENXIO;
@@ -9203,22 +10145,20 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
                if (ret)
                        goto err;
                file_slot->file_ptr = 0;
+               io_file_bitmap_clear(&ctx->file_table, slot_index);
                needs_switch = true;
        }
 
-       *io_get_tag_slot(ctx->file_data, slot_index) = 0;
-       io_fixed_file_set(file_slot, file);
-       ret = io_sqe_file_register(ctx, file, slot_index);
-       if (ret) {
-               file_slot->file_ptr = 0;
-               goto err;
+       ret = io_scm_file_account(ctx, file);
+       if (!ret) {
+               *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+               io_fixed_file_set(file_slot, file);
+               io_file_bitmap_set(&ctx->file_table, slot_index);
        }
-
-       ret = 0;
 err:
        if (needs_switch)
                io_rsrc_node_switch(ctx, ctx->file_data);
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        if (ret)
                fput(file);
        return ret;
@@ -9228,12 +10168,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
 {
        unsigned int offset = req->close.file_slot - 1;
        struct io_ring_ctx *ctx = req->ctx;
-       bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
        struct io_fixed_file *file_slot;
        struct file *file;
-       int ret, i;
+       int ret;
 
-       io_ring_submit_lock(ctx, needs_lock);
+       io_ring_submit_lock(ctx, issue_flags);
        ret = -ENXIO;
        if (unlikely(!ctx->file_data))
                goto out;
@@ -9244,8 +10183,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
        if (ret)
                goto out;
 
-       i = array_index_nospec(offset, ctx->nr_user_files);
-       file_slot = io_fixed_file_slot(&ctx->file_table, i);
+       offset = array_index_nospec(offset, ctx->nr_user_files);
+       file_slot = io_fixed_file_slot(&ctx->file_table, offset);
        ret = -EBADF;
        if (!file_slot->file_ptr)
                goto out;
@@ -9256,10 +10195,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
                goto out;
 
        file_slot->file_ptr = 0;
+       io_file_bitmap_clear(&ctx->file_table, offset);
        io_rsrc_node_switch(ctx, ctx->file_data);
        ret = 0;
 out:
-       io_ring_submit_unlock(ctx, needs_lock);
+       io_ring_submit_unlock(ctx, issue_flags);
        return ret;
 }
 
@@ -9301,11 +10241,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
                if (file_slot->file_ptr) {
                        file = (struct file *)(file_slot->file_ptr & FFS_MASK);
-                       err = io_queue_rsrc_removal(data, up->offset + done,
-                                                   ctx->rsrc_node, file);
+                       err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file);
                        if (err)
                                break;
                        file_slot->file_ptr = 0;
+                       io_file_bitmap_clear(&ctx->file_table, i);
                        needs_switch = true;
                }
                if (fd != -1) {
@@ -9327,14 +10267,14 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
                                err = -EBADF;
                                break;
                        }
-                       *io_get_tag_slot(data, up->offset + done) = tag;
-                       io_fixed_file_set(file_slot, file);
-                       err = io_sqe_file_register(ctx, file, i);
+                       err = io_scm_file_account(ctx, file);
                        if (err) {
-                               file_slot->file_ptr = 0;
                                fput(file);
                                break;
                        }
+                       *io_get_tag_slot(data, i) = tag;
+                       io_fixed_file_set(file_slot, file);
+                       io_file_bitmap_set(&ctx->file_table, i);
                }
        }
 
@@ -9411,11 +10351,10 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
        xa_init(&tctx->xa);
        init_waitqueue_head(&tctx->wait);
        atomic_set(&tctx->in_idle, 0);
-       atomic_set(&tctx->inflight_tracked, 0);
        task->io_uring = tctx;
        spin_lock_init(&tctx->task_lock);
        INIT_WQ_LIST(&tctx->task_list);
-       INIT_WQ_LIST(&tctx->prior_task_list);
+       INIT_WQ_LIST(&tctx->prio_task_list);
        init_task_work(&tctx->task_work, tctx_task_work);
        return 0;
 }
@@ -9593,8 +10532,8 @@ static void *io_mem_alloc(size_t size)
        return (void *) __get_free_pages(gfp, get_order(size));
 }
 
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
-                               size_t *sq_offset)
+static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
+                               unsigned int cq_entries, size_t *sq_offset)
 {
        struct io_rings *rings;
        size_t off, sq_array_size;
@@ -9602,6 +10541,10 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
        off = struct_size(rings, cqes, cq_entries);
        if (off == SIZE_MAX)
                return SIZE_MAX;
+       if (ctx->flags & IORING_SETUP_CQE32) {
+               if (check_shl_overflow(off, 1, &off))
+                       return SIZE_MAX;
+       }
 
 #ifdef CONFIG_SMP
        off = ALIGN(off, SMP_CACHE_BYTES);
@@ -9763,30 +10706,18 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
        return ret;
 }
 
-static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
-                                 struct io_mapped_ubuf **pimu,
-                                 struct page **last_hpage)
+static struct page **io_pin_pages(unsigned long ubuf, unsigned long len,
+                                 int *npages)
 {
-       struct io_mapped_ubuf *imu = NULL;
+       unsigned long start, end, nr_pages;
        struct vm_area_struct **vmas = NULL;
        struct page **pages = NULL;
-       unsigned long off, start, end, ubuf;
-       size_t size;
-       int ret, pret, nr_pages, i;
-
-       if (!iov->iov_base) {
-               *pimu = ctx->dummy_ubuf;
-               return 0;
-       }
+       int i, pret, ret = -ENOMEM;
 
-       ubuf = (unsigned long) iov->iov_base;
-       end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        start = ubuf >> PAGE_SHIFT;
        nr_pages = end - start;
 
-       *pimu = NULL;
-       ret = -ENOMEM;
-
        pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                goto done;
@@ -9796,10 +10727,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
        if (!vmas)
                goto done;
 
-       imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
-       if (!imu)
-               goto done;
-
        ret = 0;
        mmap_read_lock(current->mm);
        pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
@@ -9817,6 +10744,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                                break;
                        }
                }
+               *npages = nr_pages;
        } else {
                ret = pret < 0 ? pret : -EFAULT;
        }
@@ -9830,14 +10758,53 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                        unpin_user_pages(pages, pret);
                goto done;
        }
+       ret = 0;
+done:
+       kvfree(vmas);
+       if (ret < 0) {
+               kvfree(pages);
+               pages = ERR_PTR(ret);
+       }
+       return pages;
+}
+
+static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
+                                 struct io_mapped_ubuf **pimu,
+                                 struct page **last_hpage)
+{
+       struct io_mapped_ubuf *imu = NULL;
+       struct page **pages = NULL;
+       unsigned long off;
+       size_t size;
+       int ret, nr_pages, i;
+
+       if (!iov->iov_base) {
+               *pimu = ctx->dummy_ubuf;
+               return 0;
+       }
+
+       *pimu = NULL;
+       ret = -ENOMEM;
+
+       pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
+                               &nr_pages);
+       if (IS_ERR(pages)) {
+               ret = PTR_ERR(pages);
+               pages = NULL;
+               goto done;
+       }
+
+       imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+       if (!imu)
+               goto done;
 
-       ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
+       ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
        if (ret) {
-               unpin_user_pages(pages, pret);
+               unpin_user_pages(pages, nr_pages);
                goto done;
        }
 
-       off = ubuf & ~PAGE_MASK;
+       off = (unsigned long) iov->iov_base & ~PAGE_MASK;
        size = iov->iov_len;
        for (i = 0; i < nr_pages; i++) {
                size_t vec_len;
@@ -9850,8 +10817,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                size -= vec_len;
        }
        /* store original address for later verification */
-       imu->ubuf = ubuf;
-       imu->ubuf_end = ubuf + iov->iov_len;
+       imu->ubuf = (unsigned long) iov->iov_base;
+       imu->ubuf_end = imu->ubuf + iov->iov_len;
        imu->nr_bvecs = nr_pages;
        *pimu = imu;
        ret = 0;
@@ -9859,7 +10826,6 @@ done:
        if (ret)
                kvfree(imu);
        kvfree(pages);
-       kvfree(vmas);
        return ret;
 }
 
@@ -9918,12 +10884,17 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
        }
 
        for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
-               ret = io_copy_iov(ctx, &iov, arg, i);
-               if (ret)
-                       break;
-               ret = io_buffer_validate(&iov);
-               if (ret)
-                       break;
+               if (arg) {
+                       ret = io_copy_iov(ctx, &iov, arg, i);
+                       if (ret)
+                               break;
+                       ret = io_buffer_validate(&iov);
+                       if (ret)
+                               break;
+               } else {
+                       memset(&iov, 0, sizeof(iov));
+               }
+
                if (!iov.iov_base && *io_get_tag_slot(data, i)) {
                        ret = -EINVAL;
                        break;
@@ -9986,7 +10957,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 
                i = array_index_nospec(offset, ctx->nr_user_bufs);
                if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
-                       err = io_queue_rsrc_removal(ctx->buf_data, offset,
+                       err = io_queue_rsrc_removal(ctx->buf_data, i,
                                                    ctx->rsrc_node, ctx->user_bufs[i]);
                        if (unlikely(err)) {
                                io_buffer_unmap(ctx, &imu);
@@ -10062,19 +11033,19 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
+       struct io_buffer_list *bl;
+       unsigned long index;
        int i;
 
-       for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
-               struct list_head *list = &ctx->io_buffers[i];
-
-               while (!list_empty(list)) {
-                       struct io_buffer_list *bl;
+       for (i = 0; i < BGID_ARRAY; i++) {
+               if (!ctx->io_bl)
+                       break;
+               __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+       }
 
-                       bl = list_first_entry(list, struct io_buffer_list, list);
-                       __io_remove_buffers(ctx, bl, -1U);
-                       list_del(&bl->list);
-                       kfree(bl);
-               }
+       xa_for_each(&ctx->io_bl_xa, index, bl) {
+               xa_erase(&ctx->io_bl_xa, bl->bgid);
+               __io_remove_buffers(ctx, bl, -1U);
        }
 
        while (!list_empty(&ctx->io_buffers_pages)) {
@@ -10094,7 +11065,7 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
        mutex_lock(&ctx->uring_lock);
        io_flush_cached_locked_reqs(ctx, state);
 
-       while (state->free_list.next) {
+       while (!io_req_cache_empty(ctx)) {
                struct io_wq_work_node *node;
                struct io_kiocb *req;
 
@@ -10181,10 +11152,10 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
        io_req_caches_free(ctx);
        if (ctx->hash_map)
                io_wq_put_hash(ctx->hash_map);
-       io_free_napi_list(ctx);
        kfree(ctx->cancel_hash);
        kfree(ctx->dummy_ubuf);
-       kfree(ctx->io_buffers);
+       kfree(ctx->io_bl);
+       xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
 }
 
@@ -10215,7 +11186,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
         * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
         * pushs them to do the flush.
         */
-       if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
+       if (io_cqring_events(ctx) ||
+           test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
                mask |= EPOLLIN | EPOLLRDNORM;
 
        return mask;
@@ -10347,8 +11319,7 @@ static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx,
                }
        }
        spin_unlock_irq(&ctx->timeout_lock);
-       if (canceled != 0)
-               io_commit_cqring(ctx);
+       io_commit_cqring(ctx);
        spin_unlock(&ctx->completion_lock);
        if (canceled != 0)
                io_cqring_ev_posted(ctx);
@@ -10368,11 +11339,13 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
                io_unregister_personality(ctx, index);
        mutex_unlock(&ctx->uring_lock);
 
-       io_kill_timeouts(ctx, NULL, true);
-       io_poll_remove_all(ctx, NULL, true);
-
-       /* if we failed setting up the ctx, we might not have any rings */
-       io_iopoll_try_reap_events(ctx);
+       /* failed during ring init, it couldn't have issued any requests */
+       if (ctx->rings) {
+               io_kill_timeouts(ctx, NULL, true);
+               io_poll_remove_all(ctx, NULL, true);
+               /* if we failed setting up the ctx, we might not have any rings */
+               io_iopoll_try_reap_events(ctx);
+       }
 
        INIT_WORK(&ctx->exit_work, io_ring_exit_work);
        /*
@@ -10464,6 +11437,10 @@ static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
        struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
        struct io_uring_task *tctx = task ? task->io_uring : NULL;
 
+       /* failed during ring init, it couldn't have issued any requests */
+       if (!ctx->rings)
+               return;
+
        while (1) {
                enum io_wq_cancel cret;
                bool ret = false;
@@ -10604,7 +11581,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 {
        if (tracked)
-               return atomic_read(&tctx->inflight_tracked);
+               return 0;
        return percpu_counter_sum(&tctx->inflight);
 }
 
@@ -10755,6 +11732,11 @@ static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
                        break;
                }
 
+               if (reg.resv) {
+                       ret = -EINVAL;
+                       break;
+               }
+
                if (reg.offset == -1U) {
                        start = 0;
                        end = IO_RINGFD_REG_MAX;
@@ -10801,7 +11783,7 @@ static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
                        ret = -EFAULT;
                        break;
                }
-               if (reg.offset >= IO_RINGFD_REG_MAX) {
+               if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) {
                        ret = -EINVAL;
                        break;
                }
@@ -10904,6 +11886,19 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
        return 0;
 }
 
+static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
+{
+       if (flags & IORING_ENTER_EXT_ARG) {
+               struct io_uring_getevents_arg arg;
+
+               if (argsz != sizeof(arg))
+                       return -EINVAL;
+               if (copy_from_user(&arg, argp, sizeof(arg)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
 static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
                          struct __kernel_timespec __user **ts,
                          const sigset_t __user **sig)
@@ -10928,6 +11923,8 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz
                return -EINVAL;
        if (copy_from_user(&arg, argp, sizeof(arg)))
                return -EFAULT;
+       if (arg.pad)
+               return -EINVAL;
        *sig = u64_to_user_ptr(arg.sigmask);
        *argsz = arg.sigmask_sz;
        *ts = u64_to_user_ptr(arg.ts);
@@ -10939,7 +11936,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                size_t, argsz)
 {
        struct io_ring_ctx *ctx;
-       int submitted = 0;
        struct fd f;
        long ret;
 
@@ -11002,39 +11998,64 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                        if (ret)
                                goto out;
                }
-               submitted = to_submit;
+               ret = to_submit;
        } else if (to_submit) {
                ret = io_uring_add_tctx_node(ctx);
                if (unlikely(ret))
                        goto out;
-               mutex_lock(&ctx->uring_lock);
-               submitted = io_submit_sqes(ctx, to_submit);
-               mutex_unlock(&ctx->uring_lock);
 
-               if (submitted != to_submit)
+               mutex_lock(&ctx->uring_lock);
+               ret = io_submit_sqes(ctx, to_submit);
+               if (ret != to_submit) {
+                       mutex_unlock(&ctx->uring_lock);
                        goto out;
+               }
+               if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
+                       goto iopoll_locked;
+               mutex_unlock(&ctx->uring_lock);
        }
        if (flags & IORING_ENTER_GETEVENTS) {
-               const sigset_t __user *sig;
-               struct __kernel_timespec __user *ts;
-
-               ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
-               if (unlikely(ret))
-                       goto out;
+               int ret2;
+               if (ctx->syscall_iopoll) {
+                       /*
+                        * We disallow the app entering submit/complete with
+                        * polling, but we still need to lock the ring to
+                        * prevent racing with polled issue that got punted to
+                        * a workqueue.
+                        */
+                       mutex_lock(&ctx->uring_lock);
+iopoll_locked:
+                       ret2 = io_validate_ext_arg(flags, argp, argsz);
+                       if (likely(!ret2)) {
+                               min_complete = min(min_complete,
+                                                  ctx->cq_entries);
+                               ret2 = io_iopoll_check(ctx, min_complete);
+                       }
+                       mutex_unlock(&ctx->uring_lock);
+               } else {
+                       const sigset_t __user *sig;
+                       struct __kernel_timespec __user *ts;
+
+                       ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+                       if (likely(!ret2)) {
+                               min_complete = min(min_complete,
+                                                  ctx->cq_entries);
+                               ret2 = io_cqring_wait(ctx, min_complete, sig,
+                                                     argsz, ts);
+                       }
+               }
 
-               min_complete = min(min_complete, ctx->cq_entries);
+               if (!ret) {
+                       ret = ret2;
 
-               /*
-                * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
-                * space applications don't need to do io completion events
-                * polling again, they can rely on io_sq_thread to do polling
-                * work, which can reduce cpu usage and uring_lock contention.
-                */
-               if (ctx->flags & IORING_SETUP_IOPOLL &&
-                   !(ctx->flags & IORING_SETUP_SQPOLL)) {
-                       ret = io_iopoll_check(ctx, min_complete);
-               } else {
-                       ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
+                       /*
+                        * EBADR indicates that one or more CQE were dropped.
+                        * Once the user has been informed we can clear the bit
+                        * as they are obviously ok with those drops.
+                        */
+                       if (unlikely(ret2 == -EBADR))
+                               clear_bit(IO_CHECK_CQ_DROPPED_BIT,
+                                         &ctx->check_cq);
                }
        }
 
@@ -11043,7 +12064,7 @@ out:
 out_fput:
        if (!(flags & IORING_ENTER_REGISTERED_RING))
                fdput(f);
-       return submitted ? submitted : ret;
+       return ret;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -11090,10 +12111,15 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
        unsigned int sq_tail = READ_ONCE(r->sq.tail);
        unsigned int cq_head = READ_ONCE(r->cq.head);
        unsigned int cq_tail = READ_ONCE(r->cq.tail);
+       unsigned int cq_shift = 0;
        unsigned int sq_entries, cq_entries;
        bool has_lock;
+       bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
        unsigned int i;
 
+       if (is_cqe32)
+               cq_shift = 1;
+
        /*
         * we may get imprecise sqe and cqe info if uring is actively running
         * since we get cached_sq_head and cached_cq_tail without uring_lock
@@ -11126,11 +12152,18 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
        cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
        for (i = 0; i < cq_entries; i++) {
                unsigned int entry = i + cq_head;
-               struct io_uring_cqe *cqe = &r->cqes[entry & cq_mask];
+               struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift];
 
-               seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
+               if (!is_cqe32) {
+                       seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
                           entry & cq_mask, cqe->user_data, cqe->res,
                           cqe->flags);
+               } else {
+                       seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, "
+                               "extra1:%llu, extra2:%llu\n",
+                               entry & cq_mask, cqe->user_data, cqe->res,
+                               cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]);
+               }
        }
 
        /*
@@ -11233,7 +12266,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
        ctx->sq_entries = p->sq_entries;
        ctx->cq_entries = p->cq_entries;
 
-       size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+       size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset);
        if (size == SIZE_MAX)
                return -EOVERFLOW;
 
@@ -11248,7 +12281,10 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
        rings->sq_ring_entries = p->sq_entries;
        rings->cq_ring_entries = p->cq_entries;
 
-       size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+       if (p->flags & IORING_SETUP_SQE128)
+               size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
+       else
+               size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
        if (size == SIZE_MAX) {
                io_mem_free(ctx->rings);
                ctx->rings = NULL;
@@ -11360,10 +12396,40 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
        ctx = io_ring_ctx_alloc(p);
        if (!ctx)
                return -ENOMEM;
+
+       /*
+        * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+        * space applications don't need to do io completion events
+        * polling again, they can rely on io_sq_thread to do polling
+        * work, which can reduce cpu usage and uring_lock contention.
+        */
+       if (ctx->flags & IORING_SETUP_IOPOLL &&
+           !(ctx->flags & IORING_SETUP_SQPOLL))
+               ctx->syscall_iopoll = 1;
+
        ctx->compat = in_compat_syscall();
        if (!capable(CAP_IPC_LOCK))
                ctx->user = get_uid(current_user());
 
+       /*
+        * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
+        * COOP_TASKRUN is set, then IPIs are never needed by the app.
+        */
+       ret = -EINVAL;
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               /* IPI related flags don't make sense with SQPOLL */
+               if (ctx->flags & (IORING_SETUP_COOP_TASKRUN |
+                                 IORING_SETUP_TASKRUN_FLAG))
+                       goto err;
+               ctx->notify_method = TWA_SIGNAL_NO_IPI;
+       } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
+               ctx->notify_method = TWA_SIGNAL_NO_IPI;
+       } else {
+               if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+                       goto err;
+               ctx->notify_method = TWA_SIGNAL;
+       }
+
        /*
         * This is just grabbed for accounting purposes. When a process exits,
         * the mm is exited and dropped before the files, hence we need to hang
@@ -11409,7 +12475,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
                        IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
                        IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
                        IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
-                       IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP;
+                       IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
+                       IORING_FEAT_LINKED_FILE;
 
        if (copy_to_user(params, p, sizeof(*p))) {
                ret = -EFAULT;
@@ -11460,10 +12527,12 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
        if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
                        IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
                        IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+                       IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+                       IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
+                       IORING_SETUP_SQE128 | IORING_SETUP_CQE32))
                return -EINVAL;
 
-       return  io_uring_create(entries, &p, params);
+       return io_uring_create(entries, &p, params);
 }
 
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
@@ -11620,8 +12689,6 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
        __u32 tmp;
        int err;
 
-       if (up->resv)
-               return -EINVAL;
        if (check_add_overflow(up->offset, nr_args, &tmp))
                return -EOVERFLOW;
        err = io_rsrc_node_switch_start(ctx);
@@ -11647,6 +12714,8 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
        memset(&up, 0, sizeof(up));
        if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
                return -EFAULT;
+       if (up.resv || up.resv2)
+               return -EINVAL;
        return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
 }
 
@@ -11659,7 +12728,7 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
                return -EINVAL;
        if (copy_from_user(&up, arg, sizeof(up)))
                return -EFAULT;
-       if (!up.nr || up.resv)
+       if (!up.nr || up.resv || up.resv2)
                return -EINVAL;
        return __io_register_rsrc_update(ctx, type, &up, up.nr);
 }
@@ -11676,14 +12745,20 @@ static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
        memset(&rr, 0, sizeof(rr));
        if (copy_from_user(&rr, arg, size))
                return -EFAULT;
-       if (!rr.nr || rr.resv || rr.resv2)
+       if (!rr.nr || rr.resv2)
+               return -EINVAL;
+       if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
                return -EINVAL;
 
        switch (type) {
        case IORING_RSRC_FILE:
+               if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+                       break;
                return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
                                             rr.nr, u64_to_user_ptr(rr.tags));
        case IORING_RSRC_BUFFER:
+               if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+                       break;
                return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
                                               rr.nr, u64_to_user_ptr(rr.tags));
        }
@@ -11707,7 +12782,15 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
        if (len > cpumask_size())
                len = cpumask_size();
 
-       if (copy_from_user(new_mask, arg, len)) {
+       if (in_compat_syscall()) {
+               ret = compat_get_bitmap(cpumask_bits(new_mask),
+                                       (const compat_ulong_t __user *)arg,
+                                       len * 8 /* CHAR_BIT */);
+       } else {
+               ret = copy_from_user(new_mask, arg, len);
+       }
+
+       if (ret) {
                free_cpumask_var(new_mask);
                return -EFAULT;
        }
@@ -11810,6 +12893,85 @@ err:
        return ret;
 }
 
+static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+       struct io_uring_buf_ring *br;
+       struct io_uring_buf_reg reg;
+       struct io_buffer_list *bl;
+       struct page **pages;
+       int nr_pages;
+
+       if (copy_from_user(&reg, arg, sizeof(reg)))
+               return -EFAULT;
+
+       if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+               return -EINVAL;
+       if (!reg.ring_addr)
+               return -EFAULT;
+       if (reg.ring_addr & ~PAGE_MASK)
+               return -EINVAL;
+       if (!is_power_of_2(reg.ring_entries))
+               return -EINVAL;
+
+       if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
+               int ret = io_init_bl_list(ctx);
+               if (ret)
+                       return ret;
+       }
+
+       bl = io_buffer_get_list(ctx, reg.bgid);
+       if (bl) {
+               /* if mapped buffer ring OR classic exists, don't allow */
+               if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
+                       return -EEXIST;
+       } else {
+               bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+               if (!bl)
+                       return -ENOMEM;
+       }
+
+       pages = io_pin_pages(reg.ring_addr,
+                            struct_size(br, bufs, reg.ring_entries),
+                            &nr_pages);
+       if (IS_ERR(pages)) {
+               kfree(bl);
+               return PTR_ERR(pages);
+       }
+
+       br = page_address(pages[0]);
+       bl->buf_pages = pages;
+       bl->buf_nr_pages = nr_pages;
+       bl->nr_entries = reg.ring_entries;
+       bl->buf_ring = br;
+       bl->mask = reg.ring_entries - 1;
+       io_buffer_add_list(ctx, bl, reg.bgid);
+       return 0;
+}
+
+static int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+       struct io_uring_buf_reg reg;
+       struct io_buffer_list *bl;
+
+       if (copy_from_user(&reg, arg, sizeof(reg)))
+               return -EFAULT;
+       if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+               return -EINVAL;
+
+       bl = io_buffer_get_list(ctx, reg.bgid);
+       if (!bl)
+               return -ENOENT;
+       if (!bl->buf_nr_pages)
+               return -EINVAL;
+
+       __io_remove_buffers(ctx, bl, -1U);
+       if (bl->bgid >= BGID_ARRAY) {
+               xa_erase(&ctx->io_bl_xa, bl->bgid);
+               kfree(bl);
+       }
+       return 0;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                               void __user *arg, unsigned nr_args)
        __releases(ctx->uring_lock)
@@ -11835,6 +12997,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 
        switch (opcode) {
        case IORING_REGISTER_BUFFERS:
+               ret = -EFAULT;
+               if (!arg)
+                       break;
                ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
                break;
        case IORING_UNREGISTER_BUFFERS:
@@ -11844,6 +13009,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                ret = io_sqe_buffers_unregister(ctx);
                break;
        case IORING_REGISTER_FILES:
+               ret = -EFAULT;
+               if (!arg)
+                       break;
                ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
                break;
        case IORING_UNREGISTER_FILES:
@@ -11938,6 +13106,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
        case IORING_UNREGISTER_RING_FDS:
                ret = io_ringfd_unregister(ctx, arg, nr_args);
                break;
+       case IORING_REGISTER_PBUF_RING:
+               ret = -EINVAL;
+               if (!arg || nr_args != 1)
+                       break;
+               ret = io_register_pbuf_ring(ctx, arg);
+               break;
+       case IORING_UNREGISTER_PBUF_RING:
+               ret = -EINVAL;
+               if (!arg || nr_args != 1)
+                       break;
+               ret = io_unregister_pbuf_ring(ctx, arg);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -12014,6 +13194,7 @@ static int __init io_uring_init(void)
        BUILD_BUG_SQE_ELEM(42, __u16,  personality);
        BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
        BUILD_BUG_SQE_ELEM(44, __u32,  file_index);
+       BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 
        BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
                     sizeof(struct io_uring_rsrc_update));
@@ -12022,6 +13203,10 @@ static int __init io_uring_init(void)
 
        /* ->buf_index is u16 */
        BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+       BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE);
+       BUILD_BUG_ON(offsetof(struct io_uring_buf_ring, bufs) != 0);
+       BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) !=
+                    offsetof(struct io_uring_buf_ring, tail));
 
        /* should fit into one byte */
        BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
@@ -12031,6 +13216,10 @@ static int __init io_uring_init(void)
        BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
        BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
 
+       BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
+
+       BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
+
        req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
                                SLAB_ACCOUNT);
        return 0;
index b08f5dc31780da7ae5288c711ce09328a60f46bc..80f9b047aa1b6298523daf76638ed67ae823bc24 100644 (file)
@@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 {
        atomic_inc(&dio->ref);
 
-       if (dio->iocb->ki_flags & IOCB_HIPRI) {
+       /* Sync dio can't be polled reliably */
+       if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
                bio_set_polled(bio, dio->iocb);
                dio->submit.poll_bio = bio;
        }
@@ -265,8 +266,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
                 * cache flushes on IO completion.
                 */
                if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-                   (dio->flags & IOMAP_DIO_WRITE_FUA) &&
-                   blk_queue_fua(bdev_get_queue(iomap->bdev)))
+                   (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
                        use_fua = true;
        }
 
@@ -654,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                        if (!READ_ONCE(dio->submit.waiter))
                                break;
 
-                       if (!dio->submit.poll_bio ||
-                           !bio_poll(dio->submit.poll_bio, NULL, 0))
-                               blk_io_schedule();
+                       blk_io_schedule();
                }
                __set_current_state(TASK_RUNNING);
        }
index 5b9408e3b370d00d995e82934ad9eb96719c9b8b..ac7f067b7bddb794bc40344058602162eeda3739 100644 (file)
@@ -488,7 +488,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        jbd2_journal_wait_updates(journal);
 
        commit_transaction->t_state = T_SWITCH;
-       write_unlock(&journal->j_state_lock);
 
        J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
                        journal->j_max_transaction_buffers);
@@ -508,6 +507,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * has reserved.  This is consistent with the existing behaviour
         * that multiple jbd2_journal_get_write_access() calls to the same
         * buffer are perfectly permissible.
+        * We use journal->j_state_lock here to serialize processing of
+        * t_reserved_list with eviction of buffers from journal_unmap_buffer().
         */
        while (commit_transaction->t_reserved_list) {
                jh = commit_transaction->t_reserved_list;
@@ -527,6 +528,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                jbd2_journal_refile_buffer(journal, jh);
        }
 
+       write_unlock(&journal->j_state_lock);
        /*
         * Now try to drop any written-back buffers from the journal's
         * checkpoint lists.  We do this *before* commit because it potentially
index fcacafa4510d173d144a00b6bc0a5936766f969e..c0cbeeaec2d1aa33d0ff97217ae02e2205c248ba 100644 (file)
@@ -1762,7 +1762,6 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
        unsigned long block, log_offset; /* logical */
        unsigned long long phys_block, block_start, block_stop; /* physical */
        loff_t byte_start, byte_stop, byte_count;
-       struct request_queue *q = bdev_get_queue(journal->j_dev);
 
        /* flags must be set to either discard or zeroout */
        if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
@@ -1770,10 +1769,8 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
                        (flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
                return -EINVAL;
 
-       if (!q)
-               return -ENXIO;
-
-       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+           !bdev_max_discard_sectors(journal->j_dev))
                return -EOPNOTSUPP;
 
        /*
@@ -1828,7 +1825,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
                        err = blkdev_issue_discard(journal->j_dev,
                                        byte_start >> SECTOR_SHIFT,
                                        byte_count >> SECTOR_SHIFT,
-                                       GFP_NOFS, 0);
+                                       GFP_NOFS);
                } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
                        err = blkdev_issue_zeroout(journal->j_dev,
                                        byte_start >> SECTOR_SHIFT,
index 03a845ab4f009c30e6fc2a941bb2196f21a65200..1e7b177ece60579f5d66a461863fdd3f69baab5c 100644 (file)
@@ -110,14 +110,13 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                s64 ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q)) {
+               if (!bdev_max_discard_sectors(sb->s_bdev)) {
                        jfs_warn("FITRIM not supported on device");
                        return -EOPNOTSUPP;
                }
@@ -127,7 +126,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        return -EFAULT;
 
                range.minlen = max_t(unsigned int, range.minlen,
-                       q->limits.discard_granularity);
+                                    bdev_discard_granularity(sb->s_bdev));
 
                ret = jfs_ioc_trim(inode, &range);
                if (ret < 0)
index f1a13a74cddf3df17f6026af202f57de9969506b..85d4f44f2ac4dfa6da236263d2ba326a78b23100 100644 (file)
@@ -372,19 +372,16 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
                }
 
                case Opt_discard:
-               {
-                       struct request_queue *q = bdev_get_queue(sb->s_bdev);
                        /* if set to 1, even copying files will cause
                         * trimming :O
                         * -> user has more control over the online trimming
                         */
                        sbi->minblks_trim = 64;
-                       if (blk_queue_discard(q))
+                       if (bdev_max_discard_sectors(sb->s_bdev))
                                *flag |= JFS_DISCARD;
                        else
                                pr_err("JFS: discard option not supported on device\n");
                        break;
-               }
 
                case Opt_nodiscard:
                        *flag &= ~JFS_DISCARD;
@@ -392,10 +389,9 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 
                case Opt_discard_minblk:
                {
-                       struct request_queue *q = bdev_get_queue(sb->s_bdev);
                        char *minblks_trim = args[0].from;
                        int rc;
-                       if (blk_queue_discard(q)) {
+                       if (bdev_max_discard_sectors(sb->s_bdev)) {
                                *flag |= JFS_DISCARD;
                                rc = kstrtouint(minblks_trim, 0,
                                                &sbi->minblks_trim);
index 61a8edc4ba8b5956edb27afda129d51a3c780866..e205fde7163abb2ab589bc85f03c5d9f270285ec 100644 (file)
@@ -1406,7 +1406,12 @@ static void __kernfs_remove(struct kernfs_node *kn)
  */
 void kernfs_remove(struct kernfs_node *kn)
 {
-       struct kernfs_root *root = kernfs_root(kn);
+       struct kernfs_root *root;
+
+       if (!kn)
+               return;
+
+       root = kernfs_root(kn);
 
        down_write(&root->kernfs_rwsem);
        __kernfs_remove(kn);
index 60e7ac62c9172f5cac831b677f184f045a4a0245..1e2076a53bed59ee08f0d7b9cb0ccfd8c2bea56e 100644 (file)
@@ -158,19 +158,41 @@ out:
  * Return : windows path string or error
  */
 
-char *convert_to_nt_pathname(char *filename)
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+                            struct path *path)
 {
-       char *ab_pathname;
+       char *pathname, *ab_pathname, *nt_pathname;
+       int share_path_len = share->path_sz;
 
-       if (strlen(filename) == 0)
-               filename = "\\";
+       pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+       if (!pathname)
+               return ERR_PTR(-EACCES);
 
-       ab_pathname = kstrdup(filename, GFP_KERNEL);
-       if (!ab_pathname)
-               return NULL;
+       ab_pathname = d_path(path, pathname, PATH_MAX);
+       if (IS_ERR(ab_pathname)) {
+               nt_pathname = ERR_PTR(-EACCES);
+               goto free_pathname;
+       }
+
+       if (strncmp(ab_pathname, share->path, share_path_len)) {
+               nt_pathname = ERR_PTR(-EACCES);
+               goto free_pathname;
+       }
+
+       nt_pathname = kzalloc(strlen(&ab_pathname[share_path_len]) + 2, GFP_KERNEL);
+       if (!nt_pathname) {
+               nt_pathname = ERR_PTR(-ENOMEM);
+               goto free_pathname;
+       }
+       if (ab_pathname[share_path_len] == '\0')
+               strcpy(nt_pathname, "/");
+       strcat(nt_pathname, &ab_pathname[share_path_len]);
+
+       ksmbd_conv_path_to_windows(nt_pathname);
 
-       ksmbd_conv_path_to_windows(ab_pathname);
-       return ab_pathname;
+free_pathname:
+       kfree(pathname);
+       return nt_pathname;
 }
 
 int get_nlink(struct kstat *st)
index 253366bd0951aa987d91c69143acc9a5d34efaa3..aae2a252945f871d80b1db861d711871fd05c860 100644 (file)
@@ -14,7 +14,8 @@ struct ksmbd_file;
 int match_pattern(const char *str, size_t len, const char *pattern);
 int ksmbd_validate_filename(char *filename);
 int parse_stream_name(char *filename, char **stream_name, int *s_type);
-char *convert_to_nt_pathname(char *filename);
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+                            struct path *path);
 int get_nlink(struct kstat *st);
 void ksmbd_conv_path_to_unix(char *path);
 void ksmbd_strip_last_slash(char *path);
index 23871b18a4292275a9cf9f6ffde22307f249f6d5..8b5560574d4c59eb40b3d91c68c9e6473efd4f27 100644 (file)
@@ -1694,33 +1694,3 @@ out:
        read_unlock(&lease_list_lock);
        return ret_op;
 }
-
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
-                             struct lease_ctx_info *lctx, char *name)
-{
-       struct oplock_info *opinfo = opinfo_get(fp);
-       int ret = 0;
-
-       if (opinfo && opinfo->is_lease) {
-               if (!lctx) {
-                       pr_err("open does not include lease\n");
-                       ret = -EBADF;
-                       goto out;
-               }
-               if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
-                          SMB2_LEASE_KEY_SIZE)) {
-                       pr_err("invalid lease key\n");
-                       ret = -EBADF;
-                       goto out;
-               }
-               if (name && strcmp(fp->filename, name)) {
-                       pr_err("invalid name reconnect %s\n", name);
-                       ret = -EINVAL;
-                       goto out;
-               }
-       }
-out:
-       if (opinfo)
-               opinfo_put(opinfo);
-       return ret;
-}
index 0cf7a2b5bbc0657c44d49f52d0f92f638fb545e7..09753448f7798de7ff5af0bc1aa38096861d05b5 100644 (file)
@@ -124,6 +124,4 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
 int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
                        struct lease_ctx_info *lctx);
 void destroy_lease_table(struct ksmbd_conn *conn);
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
-                             struct lease_ctx_info *lctx, char *name);
 #endif /* __KSMBD_OPLOCK_H */
index 3bf6c56c654cfbe806fd0ec634bca17f12bcb717..16c803a9d996f3f429b14f273f391d07576b2890 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/statfs.h>
 #include <linux/ethtool.h>
 #include <linux/falloc.h>
+#include <linux/mount.h>
 
 #include "glob.h"
 #include "smbfsctl.h"
@@ -2918,7 +2919,6 @@ int smb2_open(struct ksmbd_work *work)
                goto err_out;
        }
 
-       fp->filename = name;
        fp->cdoption = req->CreateDisposition;
        fp->daccess = daccess;
        fp->saccess = req->ShareAccess;
@@ -3270,14 +3270,13 @@ err_out1:
                if (!rsp->hdr.Status)
                        rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
 
-               if (!fp || !fp->filename)
-                       kfree(name);
                if (fp)
                        ksmbd_fd_put(work, fp);
                smb2_set_err_rsp(work);
                ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status);
        }
 
+       kfree(name);
        kfree(lc);
 
        return 0;
@@ -3895,8 +3894,6 @@ int smb2_query_dir(struct ksmbd_work *work)
                ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr);
        }
 
-       ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename);
-
        if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
                ksmbd_debug(SMB, "Restart directory scan\n");
                generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
@@ -4390,9 +4387,9 @@ static int get_file_all_info(struct ksmbd_work *work,
                return -EACCES;
        }
 
-       filename = convert_to_nt_pathname(fp->filename);
-       if (!filename)
-               return -ENOMEM;
+       filename = convert_to_nt_pathname(work->tcon->share_conf, &fp->filp->f_path);
+       if (IS_ERR(filename))
+               return PTR_ERR(filename);
 
        inode = file_inode(fp->filp);
        generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
@@ -4999,15 +4996,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
        case FS_SECTOR_SIZE_INFORMATION:
        {
                struct smb3_fs_ss_info *info;
+               unsigned int sector_size =
+                       min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096);
 
                info = (struct smb3_fs_ss_info *)(rsp->Buffer);
 
-               info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize);
+               info->LogicalBytesPerSector = cpu_to_le32(sector_size);
                info->PhysicalBytesPerSectorForAtomicity =
-                               cpu_to_le32(stfs.f_bsize);
-               info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize);
+                               cpu_to_le32(sector_size);
+               info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size);
                info->FSEffPhysicalBytesPerSectorForAtomicity =
-                               cpu_to_le32(stfs.f_bsize);
+                               cpu_to_le32(sector_size);
                info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE |
                                    SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE);
                info->ByteOffsetForSectorAlignment = 0;
@@ -5683,8 +5682,7 @@ static int set_file_allocation_info(struct ksmbd_work *work,
                size = i_size_read(inode);
                rc = ksmbd_vfs_truncate(work, fp, alloc_blks * 512);
                if (rc) {
-                       pr_err("truncate failed! filename : %s, err %d\n",
-                              fp->filename, rc);
+                       pr_err("truncate failed!, err %d\n", rc);
                        return rc;
                }
                if (size < alloc_blks * 512)
@@ -5714,12 +5712,10 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
         * truncated range.
         */
        if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
-               ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n",
-                           fp->filename, newsize);
+               ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize);
                rc = ksmbd_vfs_truncate(work, fp, newsize);
                if (rc) {
-                       ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n",
-                                   fp->filename, rc);
+                       ksmbd_debug(SMB, "truncate failed!, err %d\n", rc);
                        if (rc != -EAGAIN)
                                rc = -EBADF;
                        return rc;
@@ -5765,8 +5761,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
        if (parent_fp) {
                if (parent_fp->daccess & FILE_DELETE_LE) {
                        pr_err("parent dir is opened with delete access\n");
+                       ksmbd_fd_put(work, parent_fp);
                        return -ESHARE;
                }
+               ksmbd_fd_put(work, parent_fp);
        }
 next:
        return smb2_rename(work, fp, user_ns, rename_info,
index 9cebb6ba555b6d69841d1ae2eb4beb3eb2d9c300..dcdd07c6efffd58250dd37868de3a7acbb2a5476 100644 (file)
@@ -398,8 +398,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
 
        nbytes = kernel_read(filp, rbuf, count, pos);
        if (nbytes < 0) {
-               pr_err("smb read failed for (%s), err = %zd\n",
-                      fp->filename, nbytes);
+               pr_err("smb read failed, err = %zd\n", nbytes);
                return nbytes;
        }
 
@@ -875,8 +874,7 @@ int ksmbd_vfs_truncate(struct ksmbd_work *work,
 
        err = vfs_truncate(&filp->f_path, size);
        if (err)
-               pr_err("truncate failed for filename : %s err %d\n",
-                      fp->filename, err);
+               pr_err("truncate failed, err %d\n", err);
        return err;
 }
 
index 29c1db66bd0f73844775fe4c3c4966a19e2e9547..c4d59d2735f00fd490efe3e6fe36b635cfdebee8 100644 (file)
@@ -328,7 +328,6 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
                kfree(smb_lock);
        }
 
-       kfree(fp->filename);
        if (ksmbd_stream_fd(fp))
                kfree(fp->stream.name);
        kmem_cache_free(filp_cache, fp);
@@ -497,6 +496,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
        list_for_each_entry(lfp, &ci->m_fp_list, node) {
                if (inode == file_inode(lfp->filp)) {
                        atomic_dec(&ci->m_count);
+                       lfp = ksmbd_fp_get(lfp);
                        read_unlock(&ci->m_lock);
                        return lfp;
                }
index 36239ce31afd5af201befe17693f04cbb94106ac..fcb13413fa8d9366c12e5a0d506412d3e8159060 100644 (file)
@@ -62,7 +62,6 @@ struct ksmbd_inode {
 
 struct ksmbd_file {
        struct file                     *filp;
-       char                            *filename;
        u64                             persistent_id;
        u64                             volatile_id;
 
index 3f1829b3ab5b7cda8e2a48071efd6ac23cff4bac..509657fdf4f56dd23ebd9497fbe9073fe1904608 100644 (file)
@@ -3673,18 +3673,14 @@ static struct dentry *filename_create(int dfd, struct filename *name,
 {
        struct dentry *dentry = ERR_PTR(-EEXIST);
        struct qstr last;
+       bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
+       unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
+       unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
        int type;
        int err2;
        int error;
-       bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
 
-       /*
-        * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
-        * other flags passed in are ignored!
-        */
-       lookup_flags &= LOOKUP_REVAL;
-
-       error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+       error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
        if (error)
                return ERR_PTR(error);
 
@@ -3698,11 +3694,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
        /* don't fail immediately if it's r/o, at least try to report other errors */
        err2 = mnt_want_write(path->mnt);
        /*
-        * Do the final lookup.
+        * Do the final lookup.  Suppress 'create' if there is a trailing
+        * '/', and a directory wasn't requested.
         */
-       lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+       if (last.name[last.len] && !want_dir)
+               create_flags = 0;
        inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
-       dentry = __lookup_hash(&last, path->dentry, lookup_flags);
+       dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
        if (IS_ERR(dentry))
                goto unlock;
 
@@ -3716,7 +3714,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
         * all is fine. Let's be bastards - you had / on the end, you've
         * been asking for (non-existent) directory. -ENOENT for you.
         */
-       if (unlikely(!is_dir && last.name[last.len])) {
+       if (unlikely(!create_flags)) {
                error = -ENOENT;
                goto fail;
        }
index a0a36bfa3aa0543b75da0c6784dbacb50f53d805..afe2b64b14f1fa8e8bd9fa1dc1b2726f2df87faf 100644 (file)
@@ -4058,10 +4058,22 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
        if (err) {
                struct mount *p;
 
-               for (p = mnt; p != m; p = next_mnt(p, mnt)) {
+               /*
+                * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will
+                * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all
+                * mounts and needs to take care to include the first mount.
+                */
+               for (p = mnt; p; p = next_mnt(p, mnt)) {
                        /* If we had to hold writers unblock them. */
                        if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
                                mnt_unhold_writers(p);
+
+                       /*
+                        * We're done once the first mount we changed got
+                        * MNT_WRITE_HOLD unset.
+                        */
+                       if (p == m)
+                               break;
                }
        }
        return err;
index 47a53b3362b628001e7d7fa6f21d15f748b01a09..14a72224b6571b9617d586f766e39dcb5f1772eb 100644 (file)
@@ -4,10 +4,6 @@ config NFS_FS
        depends on INET && FILE_LOCKING && MULTIUSER
        select LOCKD
        select SUNRPC
-       select CRYPTO
-       select CRYPTO_HASH
-       select XXHASH
-       select CRYPTO_XXHASH
        select NFS_ACL_SUPPORT if NFS_V3_ACL
        help
          Choose Y here if you want to access files residing on other
index bac4cf1a308efe8c9ef55b582d8fef9ace5d9430..c6b263b5faf1fccb3ac1fe22ac2fb469ae7a740a 100644 (file)
@@ -39,7 +39,7 @@
 #include <linux/sched.h>
 #include <linux/kmemleak.h>
 #include <linux/xattr.h>
-#include <linux/xxhash.h>
+#include <linux/hash.h>
 
 #include "delegation.h"
 #include "iostat.h"
@@ -350,10 +350,7 @@ out:
  * of directory cookies. Content is addressed by the value of the
  * cookie index of the first readdir entry in a page.
  *
- * The xxhash algorithm is chosen because it is fast, and is supposed
- * to result in a decent flat distribution of hashes.
- *
- * We then select only the first 18 bits to avoid issues with excessive
+ * We select only the first 18 bits to avoid issues with excessive
  * memory use for the page cache XArray. 18 bits should allow the caching
  * of 262144 pages of sequences of readdir entries. Since each page holds
  * 127 readdir entries for a typical 64-bit system, that works out to a
@@ -363,7 +360,7 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
 {
        if (cookie == 0)
                return 0;
-       return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK;
+       return hash_64(cookie, 18);
 }
 
 static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
@@ -1991,16 +1988,6 @@ const struct dentry_operations nfs4_dentry_operations = {
 };
 EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
 
-static fmode_t flags_to_mode(int flags)
-{
-       fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
-       if ((flags & O_ACCMODE) != O_WRONLY)
-               res |= FMODE_READ;
-       if ((flags & O_ACCMODE) != O_RDONLY)
-               res |= FMODE_WRITE;
-       return res;
-}
-
 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
 {
        return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
index e2d59bb5e6bbe788953443e6fa54b6ead01c7d7d..9a16897e8dc6b2036c30148f8793604e046097ec 100644 (file)
@@ -517,7 +517,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
                if (result.negated)
                        ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
                else
-                       ctx->flags &= NFS_MOUNT_SOFTREVAL;
+                       ctx->flags |= NFS_MOUNT_SOFTREVAL;
                break;
        case Opt_posix:
                if (result.negated)
index 7eb3b08d702f89311ea44efda0125c331fc2f500..b4e46b0ffa2dc04d268827312286af190fef1ab3 100644 (file)
@@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp)
        nfs_fscache_open_file(inode, filp);
        return 0;
 }
-EXPORT_SYMBOL_GPL(nfs_open);
 
 /*
  * This function is called whenever some part of NFS notices that
index 57b0497105c80306c1ddc4652c21d319326234e8..7eefa16ed381bf6f32689ce884d069ba5406d5bd 100644 (file)
@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
        return true;
 }
 
+static inline fmode_t flags_to_mode(int flags)
+{
+       fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
+       if ((flags & O_ACCMODE) != O_WRONLY)
+               res |= FMODE_READ;
+       if ((flags & O_ACCMODE) != O_RDONLY)
+               res |= FMODE_WRITE;
+       return res;
+}
+
 /*
  * Note: RFC 1813 doesn't limit the number of auth flavors that
  * a server can return, so make something up.
index ad3405c64b9e428815fba562792f37fcaa4a8025..e7b34f7e0614b6ff47836777c98fd21a07ba054d 100644 (file)
@@ -997,7 +997,7 @@ int __init nfs4_xattr_cache_init(void)
 
        nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
            sizeof(struct nfs4_xattr_cache), 0,
-           (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+           (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
            nfs4_xattr_cache_init_once);
        if (nfs4_xattr_cache_cachep == NULL)
                return -ENOMEM;
index d258933cf8c881ab9242f264cb3b7bc65e658047..7b861e4f0533ac0a83dfa1653e35f4ea36d7c108 100644 (file)
@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        struct dentry *parent = NULL;
        struct inode *dir;
        unsigned openflags = filp->f_flags;
+       fmode_t f_mode;
        struct iattr attr;
        int err;
 
@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        if (err)
                return err;
 
+       f_mode = filp->f_mode;
        if ((openflags & O_ACCMODE) == 3)
-               return nfs_open(inode, filp);
+               f_mode |= flags_to_mode(openflags);
 
        /* We can't create new files here */
        openflags &= ~(O_CREAT|O_EXCL);
@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        parent = dget_parent(dentry);
        dir = d_inode(parent);
 
-       ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
+       ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp);
        err = PTR_ERR(ctx);
        if (IS_ERR(ctx))
                goto out;
index e3f5b380cefe95e39ddf0151bdabc3004a27b544..a79f66432bd3985cdfc486560ac5461ed6dd306f 100644 (file)
@@ -363,6 +363,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
        kunmap_atomic(start);
 }
 
+static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version)
+{
+       if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) {
+               fattr->pre_change_attr = version;
+               fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
+       }
+}
+
 static void nfs4_test_and_free_stateid(struct nfs_server *server,
                nfs4_stateid *stateid,
                const struct cred *cred)
@@ -6553,7 +6561,9 @@ static void nfs4_delegreturn_release(void *calldata)
                pnfs_roc_release(&data->lr.arg, &data->lr.res,
                                 data->res.lr_ret);
        if (inode) {
-               nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+               nfs4_fattr_set_prechange(&data->fattr,
+                                        inode_peek_iversion_raw(inode));
+               nfs_refresh_inode(inode, &data->fattr);
                nfs_iput_and_deactive(inode);
        }
        kfree(calldata);
@@ -9615,6 +9625,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
        nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
 
        task = rpc_run_task(&task_setup_data);
+       if (IS_ERR(task))
+               return ERR_CAST(task);
 
        status = rpc_wait_for_completion_task(task);
        if (status != 0)
index 5fa11e1aca4c2759358aa37c7c36691c1bc4bcd4..6f325e10056cebecf767d4a988715c1d5b58737c 100644 (file)
@@ -347,6 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (data == NULL)
                return ERR_PTR(-ENOMEM);
+       task_setup_data.task = &data->task;
        task_setup_data.callback_data = data;
 
        data->cred = get_current_cred();
index c08882f5867b2a3268cc0358f7e13704840df183..2c1b027774d41c84c6f8e334c7e4a9f6bfaefaf4 100644 (file)
@@ -236,6 +236,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
        return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
 }
 
+static void
+nfsd_file_flush(struct nfsd_file *nf)
+{
+       if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
+               nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+}
+
 static void
 nfsd_file_do_unhash(struct nfsd_file *nf)
 {
@@ -295,19 +302,15 @@ nfsd_file_put_noref(struct nfsd_file *nf)
 void
 nfsd_file_put(struct nfsd_file *nf)
 {
-       bool is_hashed;
-
        set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
-       if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+       if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+               nfsd_file_flush(nf);
                nfsd_file_put_noref(nf);
-               return;
+       } else {
+               nfsd_file_put_noref(nf);
+               if (nf->nf_file)
+                       nfsd_file_schedule_laundrette();
        }
-
-       filemap_flush(nf->nf_file->f_mapping);
-       is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
-       nfsd_file_put_noref(nf);
-       if (is_hashed)
-               nfsd_file_schedule_laundrette();
        if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
                nfsd_file_gc();
 }
@@ -328,6 +331,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
        while(!list_empty(dispose)) {
                nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
                list_del(&nf->nf_lru);
+               nfsd_file_flush(nf);
                nfsd_file_put_noref(nf);
        }
 }
@@ -341,6 +345,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
        while(!list_empty(dispose)) {
                nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
                list_del(&nf->nf_lru);
+               nfsd_file_flush(nf);
                if (!refcount_dec_and_test(&nf->nf_ref))
                        continue;
                if (nfsd_file_free(nf))
index 367551bddfc63e512d51d7b0012d6aa89cf2c570..b5760801d3775a723bb021f2a1145b615edadfea 100644 (file)
@@ -249,34 +249,34 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
        int w;
 
        if (!svcxdr_encode_stat(xdr, resp->status))
-               return 0;
+               return false;
 
        if (dentry == NULL || d_really_is_negative(dentry))
-               return 1;
+               return true;
        inode = d_inode(dentry);
 
        if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-               return 0;
+               return false;
        if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
-               return 0;
+               return false;
 
        rqstp->rq_res.page_len = w = nfsacl_size(
                (resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
                (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
        while (w > 0) {
                if (!*(rqstp->rq_next_page++))
-                       return 1;
+                       return true;
                w -= PAGE_SIZE;
        }
 
        if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
                                   resp->mask & NFS_ACL, 0))
-               return 0;
+               return false;
        if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
                                   resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
-               return 0;
+               return false;
 
-       return 1;
+       return true;
 }
 
 /* ACCESS */
@@ -286,17 +286,17 @@ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
        struct nfsd3_accessres *resp = rqstp->rq_resp;
 
        if (!svcxdr_encode_stat(xdr, resp->status))
-               return 0;
+               return false;
        switch (resp->status) {
        case nfs_ok:
                if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-                       return 0;
+                       return false;
                if (xdr_stream_encode_u32(xdr, resp->access) < 0)
-                       return 0;
+                       return false;
                break;
        }
 
-       return 1;
+       return true;
 }
 
 /*
index fec194a666f4b771798876a2d6b22a16145047bc..87e1004b606d20d9726a8b58faabe5460eb4b1d7 100644 (file)
@@ -1052,20 +1052,20 @@ out:
 static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
 {
        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-       struct request_queue *q = bdev_get_queue(nilfs->ns_bdev);
        struct fstrim_range range;
        int ret;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(nilfs->ns_bdev))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&range, argp, sizeof(range)))
                return -EFAULT;
 
-       range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity);
+       range.minlen = max_t(u64, range.minlen,
+                            bdev_discard_granularity(nilfs->ns_bdev));
 
        down_read(&nilfs->ns_segctor_sem);
        ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range);
index e385cca2004a7bb9b8b38408989a1e07fd68e6e3..77ff8e95421fa86b3b3a0b0afa1b89936717db91 100644 (file)
@@ -1100,7 +1100,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                                ret = blkdev_issue_discard(nilfs->ns_bdev,
                                                start * sects_per_block,
                                                nblocks * sects_per_block,
-                                               GFP_NOFS, 0);
+                                               GFP_NOFS);
                                if (ret < 0) {
                                        put_bh(su_bh);
                                        goto out_sem;
@@ -1134,7 +1134,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                        ret = blkdev_issue_discard(nilfs->ns_bdev,
                                        start * sects_per_block,
                                        nblocks * sects_per_block,
-                                       GFP_NOFS, 0);
+                                       GFP_NOFS);
                        if (!ret)
                                ndiscarded += nblocks;
                }
index dd48a8f74d577c76aa9af5844029405ac9b16684..3b4a079c9617c78438e84c5a91817b329b57b9d8 100644 (file)
@@ -672,7 +672,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
                        ret = blkdev_issue_discard(nilfs->ns_bdev,
                                                   start * sects_per_block,
                                                   nblocks * sects_per_block,
-                                                  GFP_NOFS, 0);
+                                                  GFP_NOFS);
                        if (ret < 0)
                                return ret;
                        nblocks = 0;
@@ -682,7 +682,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
                ret = blkdev_issue_discard(nilfs->ns_bdev,
                                           start * sects_per_block,
                                           nblocks * sects_per_block,
-                                          GFP_NOFS, 0);
+                                          GFP_NOFS);
        return ret;
 }
 
index 9b32b76a9c303c408709dd33f739f2867cec5244..a792e21c530993a0d721921ad945b65fcabc095d 100644 (file)
@@ -1657,6 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
        else
                mnt = path.mnt;
 
+       /*
+        * FAN_RENAME is not allowed on non-dir (for now).
+        * We shouldn't have allowed setting any dirent events in mask of
+        * non-dir, but because we always allowed it, error only if group
+        * was initialized with the new flag FAN_REPORT_TARGET_FID.
+        */
+       ret = -ENOTDIR;
+       if (inode && !S_ISDIR(inode->i_mode) &&
+           ((mask & FAN_RENAME) ||
+            ((mask & FANOTIFY_DIRENT_EVENTS) &&
+             FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
+               goto path_put_and_out;
+
        /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
        if (mnt || !S_ISDIR(inode->i_mode)) {
                mask &= ~FAN_EVENT_ON_CHILD;
index 787b53b984ee17ae07aff29f2972319d437387b7..15806eeae217a048eb091d3c5cc7e4f31fef0834 100644 (file)
@@ -22,20 +22,20 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
 {
        struct fstrim_range __user *user_range;
        struct fstrim_range range;
-       struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
        int err;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(sbi->sb->s_bdev))
                return -EOPNOTSUPP;
 
        user_range = (struct fstrim_range __user *)arg;
        if (copy_from_user(&range, user_range, sizeof(range)))
                return -EFAULT;
 
-       range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity);
+       range.minlen = max_t(u32, range.minlen,
+                            bdev_discard_granularity(sbi->sb->s_bdev));
 
        err = ntfs_trim_fs(sbi, &range);
        if (err < 0)
index 278dcf502410231b3af89241b9cc0eef0af382db..5781b9e8e3d85b54e44f682d2cc6b49b89f97669 100644 (file)
@@ -882,7 +882,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
        int err;
        struct ntfs_sb_info *sbi = sb->s_fs_info;
        struct block_device *bdev = sb->s_bdev;
-       struct request_queue *rq;
        struct inode *inode;
        struct ntfs_inode *ni;
        size_t i, tt;
@@ -912,15 +911,14 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
                goto out;
        }
 
-       rq = bdev_get_queue(bdev);
-       if (blk_queue_discard(rq) && rq->limits.discard_granularity) {
-               sbi->discard_granularity = rq->limits.discard_granularity;
+       if (bdev_max_discard_sectors(bdev) && bdev_discard_granularity(bdev)) {
+               sbi->discard_granularity = bdev_discard_granularity(bdev);
                sbi->discard_granularity_mask_inv =
                        ~(u64)(sbi->discard_granularity - 1);
        }
 
        /* Parse boot. */
-       err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512,
+       err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev),
                                  bdev_nr_bytes(bdev));
        if (err)
                goto out;
@@ -1335,7 +1333,7 @@ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len)
                return 0;
 
        err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9,
-                                  GFP_NOFS, 0);
+                                  GFP_NOFS);
 
        if (err == -EOPNOTSUPP)
                sbi->flags |= NTFS_FLAGS_NODISCARD;
index f59461d85da4595dafd9119f9df09a77fefa8ead..afd54ec661030d08587764ee48c49c0359bc22c4 100644 (file)
@@ -903,20 +903,19 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (!blk_queue_discard(q))
+               if (!bdev_max_discard_sectors(sb->s_bdev))
                        return -EOPNOTSUPP;
 
                if (copy_from_user(&range, argp, sizeof(range)))
                        return -EFAULT;
 
-               range.minlen = max_t(u64, q->limits.discard_granularity,
+               range.minlen = max_t(u64, bdev_discard_granularity(sb->s_bdev),
                                     range.minlen);
                ret = ocfs2_trim_fs(sb, &range);
                if (ret < 0)
index 9648ac15164a2194fa6e28812777aac0ffc550fe..e140ea150bbb14e0ab0c83808e0b155b158bfc6a 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -804,7 +804,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
        if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
                goto out_revert_acct;
 
-       pipe->bufs = kvcalloc(pipe_bufs, sizeof(struct pipe_buffer),
+       pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
                             GFP_KERNEL_ACCOUNT);
 
        if (pipe->bufs) {
@@ -849,7 +849,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 #endif
        if (pipe->tmp_page)
                __free_page(pipe->tmp_page);
-       kvfree(pipe->bufs);
+       kfree(pipe->bufs);
        kfree(pipe);
 }
 
@@ -1264,7 +1264,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
        if (nr_slots < n)
                return -EBUSY;
 
-       bufs = kvcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT);
+       bufs = kcalloc(nr_slots, sizeof(*bufs),
+                      GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
        if (unlikely(!bufs))
                return -ENOMEM;
 
@@ -1291,7 +1292,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
        head = n;
        tail = 0;
 
-       kvfree(pipe->bufs);
+       kfree(pipe->bufs);
        pipe->bufs = bufs;
        pipe->ring_size = nr_slots;
        if (pipe->max_usage > nr_slots)
index 80acb6885cf90b6fce2aa5b64d7b6c3b10d6687a..962d32468eb487afd42a14da7afd7589aaeaac98 100644 (file)
@@ -759,9 +759,14 @@ static void posix_acl_fix_xattr_userns(
 }
 
 void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                  struct inode *inode,
                                   void *value, size_t size)
 {
        struct user_namespace *user_ns = current_user_ns();
+
+       /* Leave ids untouched on non-idmapped mounts. */
+       if (no_idmapping(mnt_userns, i_user_ns(inode)))
+               mnt_userns = &init_user_ns;
        if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
                return;
        posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
@@ -769,9 +774,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
 }
 
 void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                struct inode *inode,
                                 void *value, size_t size)
 {
        struct user_namespace *user_ns = current_user_ns();
+
+       /* Leave ids untouched on non-idmapped mounts. */
+       if (no_idmapping(mnt_userns, i_user_ns(inode)))
+               mnt_userns = &init_user_ns;
        if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
                return;
        posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
index 419760fd77bdd82b1fa1d1565bbcc8d13ae25967..f38bda5b83ec4418091fa52ea7e9758de56a6efe 100644 (file)
@@ -5,14 +5,10 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-__weak void arch_freq_prepare_all(void)
-{
-}
-
 extern const struct seq_operations cpuinfo_op;
+
 static int cpuinfo_open(struct inode *inode, struct file *file)
 {
-       arch_freq_prepare_all();
        return seq_open(file, &cpuinfo_op);
 }
 
index 172c86270b3127571e745528544583ae55ff449c..913bef0d2a36c4ed0d49dfba1b18cac612cdbe5f 100644 (file)
@@ -72,7 +72,7 @@ out:
        return 0;
 }
 
-static int seq_fdinfo_open(struct inode *inode, struct file *file)
+static int proc_fdinfo_access_allowed(struct inode *inode)
 {
        bool allowed = false;
        struct task_struct *task = get_proc_task(inode);
@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
        if (!allowed)
                return -EACCES;
 
+       return 0;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+       int ret = proc_fdinfo_access_allowed(inode);
+
+       if (ret)
+               return ret;
+
        return single_open(file, seq_show, inode);
 }
 
@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
                                  proc_fdinfo_instantiate);
 }
 
+static int proc_open_fdinfo(struct inode *inode, struct file *file)
+{
+       int ret = proc_fdinfo_access_allowed(inode);
+
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
 const struct inode_operations proc_fdinfo_inode_operations = {
        .lookup         = proc_lookupfdinfo,
        .setattr        = proc_setattr,
 };
 
 const struct file_operations proc_fdinfo_operations = {
+       .open           = proc_open_fdinfo,
        .read           = generic_read_dir,
        .iterate_shared = proc_readfdinfo,
        .llseek         = generic_file_llseek,
index 622c844f6d118650ba573ebf78fa1fd07027af74..8879d052f96c6a0053096e61cb3d400beee78b38 100644 (file)
@@ -86,17 +86,10 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
        int error, i;
        struct bio *bio;
 
-       if (page_count <= BIO_MAX_VECS) {
-               bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO);
-       } else {
-               bio = bio_kmalloc(GFP_NOIO, page_count);
-               bio_set_dev(bio, sb->s_bdev);
-               bio->bi_opf = REQ_OP_READ;
-       }
-
+       bio = bio_kmalloc(page_count, GFP_NOIO);
        if (!bio)
                return -ENOMEM;
-
+       bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ);
        bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
 
        for (i = 0; i < page_count; ++i) {
@@ -126,7 +119,8 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 
 out_free_bio:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
        return error;
 }
 
@@ -190,7 +184,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
                        length |= data[0] << 8;
                }
                bio_free_pages(bio);
-               bio_put(bio);
+               bio_uninit(bio);
+               kfree(bio);
 
                compressed = SQUASHFS_COMPRESSED(length);
                length = SQUASHFS_COMPRESSED_SIZE(length);
@@ -224,7 +219,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 
 out_free_bio:
        bio_free_pages(bio);
-       bio_put(bio);
+       bio_uninit(bio);
+       kfree(bio);
 out:
        if (res < 0) {
                ERROR("Failed to read block 0x%llx: %d\n", index, res);
index 7f734be0e57ec9f5d69927a672d7e4b20a4b006e..5c2c94464e8b0242430dc050f96678c35511509d 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -348,9 +348,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
 #  define choose_32_64(a,b) b
 #endif
 
-#define valid_dev(x)  choose_32_64(old_valid_dev(x),true)
-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
-
 #ifndef INIT_STRUCT_STAT_PADDING
 #  define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
 #endif
@@ -359,7 +356,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 {
        struct stat tmp;
 
-       if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
+       if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+               return -EOVERFLOW;
+       if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
                return -EOVERFLOW;
 #if BITS_PER_LONG == 32
        if (stat->size > MAX_NON_LFS)
@@ -367,7 +366,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 #endif
 
        INIT_STRUCT_STAT_PADDING(tmp);
-       tmp.st_dev = encode_dev(stat->dev);
+       tmp.st_dev = new_encode_dev(stat->dev);
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
@@ -377,7 +376,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
                return -EOVERFLOW;
        SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
        SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-       tmp.st_rdev = encode_dev(stat->rdev);
+       tmp.st_rdev = new_encode_dev(stat->rdev);
        tmp.st_size = stat->size;
        tmp.st_atime = stat->atime.tv_sec;
        tmp.st_mtime = stat->mtime.tv_sec;
@@ -665,11 +664,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 {
        struct compat_stat tmp;
 
-       if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+       if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+               return -EOVERFLOW;
+       if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
                return -EOVERFLOW;
 
        memset(&tmp, 0, sizeof(tmp));
-       tmp.st_dev = old_encode_dev(stat->dev);
+       tmp.st_dev = new_encode_dev(stat->dev);
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
@@ -679,7 +680,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
                return -EOVERFLOW;
        SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
        SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-       tmp.st_rdev = old_encode_dev(stat->rdev);
+       tmp.st_rdev = new_encode_dev(stat->rdev);
        if ((u64) stat->size > MAX_NON_LFS)
                return -EOVERFLOW;
        tmp.st_size = stat->size;
index f1d4a193602d673b8aaf435d9d2d1c25388221b1..60f57c7bc0a69a0cb931a957c354ed740aaba9f0 100644 (file)
@@ -1204,7 +1204,7 @@ static int set_bdev_super(struct super_block *s, void *data)
        s->s_dev = s->s_bdev->bd_dev;
        s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
 
-       if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
+       if (bdev_stable_writes(s->s_bdev))
                s->s_iflags |= SB_I_STABLE_WRITES;
        return 0;
 }
index 42dcf96881b688f3474a76bb6b618eade3c2b959..a12ac0356c69cd409a856b5a551ce1768cf7ad16 100644 (file)
@@ -703,19 +703,6 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid)
 
        ktype = get_ktype(kobj);
        if (ktype) {
-               struct attribute **kattr;
-
-               /*
-                * Change owner of the default attributes associated with the
-                * ktype of @kobj.
-                */
-               for (kattr = ktype->default_attrs; kattr && *kattr; kattr++) {
-                       error = sysfs_file_change_owner(kobj, (*kattr)->name,
-                                                       kuid, kgid);
-                       if (error)
-                               return error;
-               }
-
                /*
                 * Change owner of the default groups associated with the
                 * ktype of @kobj.
index 0ed4861b038f6a3dc06fcf3d7eb26444981aa21b..b3d5f97f16cdb174b2e91ff4514caae2f96dfe26 100644 (file)
@@ -75,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
 
        if (fileident) {
                if (adinicb || (offset + lfi < 0)) {
-                       memcpy(udf_get_fi_ident(sfi), fileident, lfi);
+                       memcpy(sfi->impUse + liu, fileident, lfi);
                } else if (offset >= 0) {
                        memcpy(fibh->ebh->b_data + offset, fileident, lfi);
                } else {
-                       memcpy(udf_get_fi_ident(sfi), fileident, -offset);
+                       memcpy(sfi->impUse + liu, fileident, -offset);
                        memcpy(fibh->ebh->b_data, fileident - offset,
                                lfi + offset);
                }
@@ -88,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
        offset += lfi;
 
        if (adinicb || (offset + padlen < 0)) {
-               memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
+               memset(sfi->impUse + liu + lfi, 0x00, padlen);
        } else if (offset >= 0) {
                memset(fibh->ebh->b_data + offset, 0x00, padlen);
        } else {
-               memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
+               memset(sfi->impUse + liu + lfi, 0x00, -offset);
                memset(fibh->ebh->b_data, 0x00, padlen + offset);
        }
 
index 5c8c5175b385c274eaac9a239ba0749b9372ff98..e8dd03e4561e81a812e52f7730771d389d96169f 100644 (file)
@@ -25,6 +25,8 @@
 
 #include <linux/uaccess.h>
 
+#include "internal.h"
+
 static const char *
 strcmp_prefix(const char *a, const char *a_prefix)
 {
@@ -539,43 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
 /*
  * Extended attribute SET operations
  */
-static long
-setxattr(struct user_namespace *mnt_userns, struct dentry *d,
-        const char __user *name, const void __user *value, size_t size,
-        int flags)
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
 {
        int error;
-       void *kvalue = NULL;
-       char kname[XATTR_NAME_MAX + 1];
 
-       if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
+       if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
                return -EINVAL;
 
-       error = strncpy_from_user(kname, name, sizeof(kname));
-       if (error == 0 || error == sizeof(kname))
-               error = -ERANGE;
+       error = strncpy_from_user(ctx->kname->name, name,
+                               sizeof(ctx->kname->name));
+       if (error == 0 || error == sizeof(ctx->kname->name))
+               return  -ERANGE;
        if (error < 0)
                return error;
 
-       if (size) {
-               if (size > XATTR_SIZE_MAX)
+       error = 0;
+       if (ctx->size) {
+               if (ctx->size > XATTR_SIZE_MAX)
                        return -E2BIG;
-               kvalue = kvmalloc(size, GFP_KERNEL);
-               if (!kvalue)
-                       return -ENOMEM;
-               if (copy_from_user(kvalue, value, size)) {
-                       error = -EFAULT;
-                       goto out;
+
+               ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
+               if (IS_ERR(ctx->kvalue)) {
+                       error = PTR_ERR(ctx->kvalue);
+                       ctx->kvalue = NULL;
                }
-               if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
-                   (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-                       posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size);
        }
 
-       error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
-out:
-       kvfree(kvalue);
+       return error;
+}
+
+static void setxattr_convert(struct user_namespace *mnt_userns,
+                            struct dentry *d, struct xattr_ctx *ctx)
+{
+       if (ctx->size &&
+               ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+               (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+               posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
+                                               ctx->kvalue, ctx->size);
+}
+
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct xattr_ctx *ctx)
+{
+       setxattr_convert(mnt_userns, dentry, ctx);
+       return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
+                       ctx->kvalue, ctx->size, ctx->flags);
+}
+
+static long
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+       const char __user *name, const void __user *value, size_t size,
+       int flags)
+{
+       struct xattr_name kname;
+       struct xattr_ctx ctx = {
+               .cvalue   = value,
+               .kvalue   = NULL,
+               .size     = size,
+               .kname    = &kname,
+               .flags    = flags,
+       };
+       int error;
 
+       error = setxattr_copy(name, &ctx);
+       if (error)
+               return error;
+
+       error = do_setxattr(mnt_userns, d, &ctx);
+
+       kvfree(ctx.kvalue);
        return error;
 }
 
@@ -641,43 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 /*
  * Extended attribute GET operations
  */
-static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
-        const char __user *name, void __user *value, size_t size)
+ssize_t
+do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+       struct xattr_ctx *ctx)
 {
        ssize_t error;
-       void *kvalue = NULL;
-       char kname[XATTR_NAME_MAX + 1];
-
-       error = strncpy_from_user(kname, name, sizeof(kname));
-       if (error == 0 || error == sizeof(kname))
-               error = -ERANGE;
-       if (error < 0)
-               return error;
+       char *kname = ctx->kname->name;
 
-       if (size) {
-               if (size > XATTR_SIZE_MAX)
-                       size = XATTR_SIZE_MAX;
-               kvalue = kvzalloc(size, GFP_KERNEL);
-               if (!kvalue)
+       if (ctx->size) {
+               if (ctx->size > XATTR_SIZE_MAX)
+                       ctx->size = XATTR_SIZE_MAX;
+               ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
+               if (!ctx->kvalue)
                        return -ENOMEM;
        }
 
-       error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+       error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
        if (error > 0) {
                if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
                    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-                       posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error);
-               if (size && copy_to_user(value, kvalue, error))
+                       posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
+                                                       ctx->kvalue, error);
+               if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
                        error = -EFAULT;
-       } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+       } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
                /* The file system tried to returned a value bigger
                   than XATTR_SIZE_MAX bytes. Not possible. */
                error = -E2BIG;
        }
 
-       kvfree(kvalue);
+       return error;
+}
+
+static ssize_t
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+        const char __user *name, void __user *value, size_t size)
+{
+       ssize_t error;
+       struct xattr_name kname;
+       struct xattr_ctx ctx = {
+               .value    = value,
+               .kvalue   = NULL,
+               .size     = size,
+               .kname    = &kname,
+               .flags    = 0,
+       };
+
+       error = strncpy_from_user(kname.name, name, sizeof(kname.name));
+       if (error == 0 || error == sizeof(kname.name))
+               error = -ERANGE;
+       if (error < 0)
+               return error;
+
+       error =  do_getxattr(mnt_userns, d, &ctx);
 
+       kvfree(ctx.kvalue);
        return error;
 }
 
index e1afb9e503e1679d562e0c17cc28bda04eefa45a..bf4e60871068205eb30a4909654717782d6bbe84 100644 (file)
@@ -406,7 +406,7 @@ xfs_buf_alloc_pages(
 STATIC int
 _xfs_buf_map_pages(
        struct xfs_buf          *bp,
-       uint                    flags)
+       xfs_buf_flags_t         flags)
 {
        ASSERT(bp->b_flags & _XBF_PAGES);
        if (bp->b_page_count == 1) {
@@ -868,7 +868,7 @@ xfs_buf_read_uncached(
        struct xfs_buftarg      *target,
        xfs_daddr_t             daddr,
        size_t                  numblks,
-       int                     flags,
+       xfs_buf_flags_t         flags,
        struct xfs_buf          **bpp,
        const struct xfs_buf_ops *ops)
 {
@@ -903,7 +903,7 @@ int
 xfs_buf_get_uncached(
        struct xfs_buftarg      *target,
        size_t                  numblks,
-       int                     flags,
+       xfs_buf_flags_t         flags,
        struct xfs_buf          **bpp)
 {
        int                     error;
index edcb6254fa6a87b3d7dbd88413670075dab0a19d..1ee3056ff9cfe9d476da9d5fe74a54e9f9d4d056 100644 (file)
@@ -22,28 +22,28 @@ struct xfs_buf;
 
 #define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
 
-#define XBF_READ        (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE       (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD  (1 << 2) /* asynchronous read-ahead */
-#define XBF_NO_IOACCT   (1 << 3) /* bypass I/O accounting (non-LRU bufs) */
-#define XBF_ASYNC       (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_STALE       (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL  (1 << 7) /* async writes have failed on this buffer */
+#define XBF_READ        (1u << 0) /* buffer intended for reading from device */
+#define XBF_WRITE       (1u << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD  (1u << 2) /* asynchronous read-ahead */
+#define XBF_NO_IOACCT   (1u << 3) /* bypass I/O accounting (non-LRU bufs) */
+#define XBF_ASYNC       (1u << 4) /* initiator will not wait for completion */
+#define XBF_DONE        (1u << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE       (1u << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL  (1u << 7) /* async writes have failed on this buffer */
 
 /* buffer type flags for write callbacks */
-#define _XBF_INODES     (1 << 16)/* inode buffer */
-#define _XBF_DQUOTS     (1 << 17)/* dquot buffer */
-#define _XBF_LOGRECOVERY        (1 << 18)/* log recovery buffer */
+#define _XBF_INODES     (1u << 16)/* inode buffer */
+#define _XBF_DQUOTS     (1u << 17)/* dquot buffer */
+#define _XBF_LOGRECOVERY (1u << 18)/* log recovery buffer */
 
 /* flags used only internally */
-#define _XBF_PAGES      (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM       (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q   (1 << 22)/* buffer on a delwri queue */
+#define _XBF_PAGES      (1u << 20)/* backed by refcounted pages */
+#define _XBF_KMEM       (1u << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q   (1u << 22)/* buffer on a delwri queue */
 
 /* flags used only as arguments to access routines */
-#define XBF_TRYLOCK     (1 << 30)/* lock requested, but do not wait */
-#define XBF_UNMAPPED    (1 << 31)/* do not map the buffer */
+#define XBF_TRYLOCK     (1u << 30)/* lock requested, but do not wait */
+#define XBF_UNMAPPED    (1u << 31)/* do not map the buffer */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -58,7 +58,7 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_WRITE_FAIL,       "WRITE_FAIL" }, \
        { _XBF_INODES,          "INODES" }, \
        { _XBF_DQUOTS,          "DQUOTS" }, \
-       { _XBF_LOGRECOVERY,             "LOG_RECOVERY" }, \
+       { _XBF_LOGRECOVERY,     "LOG_RECOVERY" }, \
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_KMEM,            "KMEM" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
@@ -247,11 +247,11 @@ xfs_buf_readahead(
        return xfs_buf_readahead_map(target, &map, 1, ops);
 }
 
-int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
-               struct xfs_buf **bpp);
+int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
+               xfs_buf_flags_t flags, struct xfs_buf **bpp);
 int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
-                         size_t numblks, int flags, struct xfs_buf **bpp,
-                         const struct xfs_buf_ops *ops);
+               size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp,
+               const struct xfs_buf_ops *ops);
 int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
 void xfs_buf_hold(struct xfs_buf *bp);
 
index 0191de8ce9cedd822ff2ec3a2191fefb37296cd2..c6fe3f6ebb6b01be44ec4a4d73de9ee669348204 100644 (file)
@@ -114,7 +114,7 @@ xfs_trim_extents(
                }
 
                trace_xfs_discard_extent(mp, agno, fbno, flen);
-               error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
+               error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
                if (error)
                        goto out_del_cursor;
                *blocks_trimmed += flen;
@@ -152,8 +152,8 @@ xfs_ioc_trim(
        struct xfs_mount                *mp,
        struct fstrim_range __user      *urange)
 {
-       struct request_queue    *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
-       unsigned int            granularity = q->limits.discard_granularity;
+       unsigned int            granularity =
+               bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
        struct fstrim_range     range;
        xfs_daddr_t             start, end, minlen;
        xfs_agnumber_t          start_agno, end_agno, agno;
@@ -162,7 +162,7 @@ xfs_ioc_trim(
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-       if (!blk_queue_discard(q))
+       if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
                return -EOPNOTSUPP;
 
        /*
index 9de6205fe134a926838384e1b5a952cbc8ce63f2..39ae53efb3ab6c6c68ce342c2b5137f988e82d5c 100644 (file)
@@ -2594,14 +2594,13 @@ xfs_ifree_cluster(
 }
 
 /*
- * This is called to return an inode to the inode free list.
- * The inode should already be truncated to 0 length and have
- * no pages associated with it.  This routine also assumes that
- * the inode is already a part of the transaction.
+ * This is called to return an inode to the inode free list.  The inode should
+ * already be truncated to 0 length and have no pages associated with it.  This
+ * routine also assumes that the inode is already a part of the transaction.
  *
- * The on-disk copy of the inode will have been added to the list
- * of unlinked inodes in the AGI. We need to remove the inode from
- * that list atomically with respect to freeing it here.
+ * The on-disk copy of the inode will have been added to the list of unlinked
+ * inodes in the AGI. We need to remove the inode from that list atomically with
+ * respect to freeing it here.
  */
 int
 xfs_ifree(
@@ -2623,13 +2622,16 @@ xfs_ifree(
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
 
        /*
-        * Pull the on-disk inode from the AGI unlinked list.
+        * Free the inode first so that we guarantee that the AGI lock is going
+        * to be taken before we remove the inode from the unlinked list. This
+        * makes the AGI lock -> unlinked list modification order the same as
+        * used in O_TMPFILE creation.
         */
-       error = xfs_iunlink_remove(tp, pag, ip);
+       error = xfs_difree(tp, pag, ip->i_ino, &xic);
        if (error)
-               goto out;
+               return error;
 
-       error = xfs_difree(tp, pag, ip->i_ino, &xic);
+       error = xfs_iunlink_remove(tp, pag, ip);
        if (error)
                goto out;
 
index ba57323bfdcea38d0c02cc308c43260cd89d5e29..c9f55e4f095710219677be4b1478c160fa90fb57 100644 (file)
@@ -605,7 +605,7 @@ xlog_discard_busy_extents(
                error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
                                XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
                                XFS_FSB_TO_BB(mp, busyp->length),
-                               GFP_NOFS, 0, &bio);
+                               GFP_NOFS, &bio);
                if (error && error != -EOPNOTSUPP) {
                        xfs_info(mp,
         "discard failed for extent [0x%llx,%u], error %d",
index 54be9d64093edacfaea509ff2d1e73cf61a58493..a276b8111f636b72415dcb8262579c04c8ec8bbe 100644 (file)
@@ -1608,14 +1608,10 @@ xfs_fs_fill_super(
                        goto out_filestream_unmount;
        }
 
-       if (xfs_has_discard(mp)) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
-               if (!blk_queue_discard(q)) {
-                       xfs_warn(mp, "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-                       mp->m_features &= ~XFS_FEAT_DISCARD;
-               }
+       if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
+               xfs_warn(mp,
+       "mounting with \"discard\" option, but the device does not support discard");
+               mp->m_features &= ~XFS_FEAT_DISCARD;
        }
 
        if (xfs_has_reflink(mp)) {
index de177842b951cafd27dd22e166b1bd23a200d4de..0c82673238f4d81377fc999f187b35cd5cbb615b 100644 (file)
@@ -175,7 +175,7 @@ xfs_trans_get_buf(
        struct xfs_buftarg      *target,
        xfs_daddr_t             blkno,
        int                     numblks,
-       uint                    flags,
+       xfs_buf_flags_t         flags,
        struct xfs_buf          **bpp)
 {
        DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
index 33c1a4f1132e7d444d2a3c402d61c05605d0bd40..9fe54f5319f2226d5a149b98e2f47a5fb7591e05 100644 (file)
@@ -3,4 +3,4 @@ ccflags-y                               += -I$(src)
 
 obj-$(CONFIG_ZONEFS_FS) += zonefs.o
 
-zonefs-y       := super.o
+zonefs-y       := super.o sysfs.o
index 3614c7834007dd0ea587f01778ee7c7b0460a4e6..b3b0b71fdf6c4c373d6d627dd036c233abc592ad 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+/*
+ * Manage the active zone count. Called with zi->i_truncate_mutex held.
+ */
+static void zonefs_account_active(struct inode *inode)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+       lockdep_assert_held(&zi->i_truncate_mutex);
+
+       if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+               return;
+
+       /*
+        * If the zone is active, that is, if it is explicitly open or
+        * partially written, check if it was already accounted as active.
+        */
+       if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
+           (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
+               if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
+                       zi->i_flags |= ZONEFS_ZONE_ACTIVE;
+                       atomic_inc(&sbi->s_active_seq_files);
+               }
+               return;
+       }
+
+       /* The zone is not active. If it was, update the active count */
+       if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
+               zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
+               atomic_dec(&sbi->s_active_seq_files);
+       }
+}
+
 static inline int zonefs_zone_mgmt(struct inode *inode,
                                   enum req_opf op)
 {
@@ -35,6 +68,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode,
 
        lockdep_assert_held(&zi->i_truncate_mutex);
 
+       /*
+        * With ZNS drives, closing an explicitly open zone that has not been
+        * written will change the zone state to "closed", that is, the zone
+        * will remain active. Since this can then cause failure of explicit
+        * open operation on other zones if the drive active zone resources
+        * are exceeded, make sure that the zone does not remain active by
+        * resetting it.
+        */
+       if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
+               op = REQ_OP_ZONE_RESET;
+
        trace_zonefs_zone_mgmt(inode, op);
        ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
                               zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
@@ -57,8 +101,13 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
         * A full zone is no longer open/active and does not need
         * explicit closing.
         */
-       if (isize >= zi->i_max_size)
-               zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+       if (isize >= zi->i_max_size) {
+               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+
+               if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
+                       atomic_dec(&sbi->s_active_seq_files);
+               zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+       }
 }
 
 static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -386,6 +435,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
        zonefs_update_stats(inode, data_size);
        zonefs_i_size_write(inode, data_size);
        zi->i_wpoffset = data_size;
+       zonefs_account_active(inode);
 
        return 0;
 }
@@ -497,6 +547,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
        zonefs_update_stats(inode, isize);
        truncate_setsize(inode, isize);
        zi->i_wpoffset = isize;
+       zonefs_account_active(inode);
 
 unlock:
        mutex_unlock(&zi->i_truncate_mutex);
@@ -678,13 +729,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file_inode(iocb->ki_filp);
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        struct block_device *bdev = inode->i_sb->s_bdev;
-       unsigned int max;
+       unsigned int max = bdev_max_zone_append_sectors(bdev);
        struct bio *bio;
        ssize_t size;
        int nr_pages;
        ssize_t ret;
 
-       max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
        max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
        iov_iter_truncate(from, max);
 
@@ -855,8 +905,15 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
            (ret > 0 || ret == -EIOCBQUEUED)) {
                if (ret > 0)
                        count = ret;
+
+               /*
+                * Update the zone write pointer offset assuming the write
+                * operation succeeded. If it did not, the error recovery path
+                * will correct it. Also do active seq file accounting.
+                */
                mutex_lock(&zi->i_truncate_mutex);
                zi->i_wpoffset += count;
+               zonefs_account_active(inode);
                mutex_unlock(&zi->i_truncate_mutex);
        }
 
@@ -998,13 +1055,13 @@ inode_unlock:
        return ret;
 }
 
-static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file)
+/*
+ * Write open accounting is done only for sequential files.
+ */
+static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+                                           struct file *file)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
-       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
-
-       if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN))
-               return false;
 
        if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
                return false;
@@ -1015,28 +1072,34 @@ static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *fi
        return true;
 }
 
-static int zonefs_open_zone(struct inode *inode)
+static int zonefs_seq_file_write_open(struct inode *inode)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
-       struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
        int ret = 0;
 
        mutex_lock(&zi->i_truncate_mutex);
 
        if (!zi->i_wr_refcnt) {
-               if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
-                       atomic_dec(&sbi->s_open_zones);
-                       ret = -EBUSY;
-                       goto unlock;
-               }
+               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+               unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
 
-               if (i_size_read(inode) < zi->i_max_size) {
-                       ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
-                       if (ret) {
-                               atomic_dec(&sbi->s_open_zones);
+               if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+
+                       if (wro > sbi->s_max_wro_seq_files) {
+                               atomic_dec(&sbi->s_wro_seq_files);
+                               ret = -EBUSY;
                                goto unlock;
                        }
-                       zi->i_flags |= ZONEFS_ZONE_OPEN;
+
+                       if (i_size_read(inode) < zi->i_max_size) {
+                               ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+                               if (ret) {
+                                       atomic_dec(&sbi->s_wro_seq_files);
+                                       goto unlock;
+                               }
+                               zi->i_flags |= ZONEFS_ZONE_OPEN;
+                               zonefs_account_active(inode);
+                       }
                }
        }
 
@@ -1056,30 +1119,31 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
        if (ret)
                return ret;
 
-       if (zonefs_file_use_exp_open(inode, file))
-               return zonefs_open_zone(inode);
+       if (zonefs_seq_file_need_wro(inode, file))
+               return zonefs_seq_file_write_open(inode);
 
        return 0;
 }
 
-static void zonefs_close_zone(struct inode *inode)
+static void zonefs_seq_file_write_close(struct inode *inode)
 {
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct super_block *sb = inode->i_sb;
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
        int ret = 0;
 
        mutex_lock(&zi->i_truncate_mutex);
-       zi->i_wr_refcnt--;
-       if (!zi->i_wr_refcnt) {
-               struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
-               struct super_block *sb = inode->i_sb;
 
-               /*
-                * If the file zone is full, it is not open anymore and we only
-                * need to decrement the open count.
-                */
-               if (!(zi->i_flags & ZONEFS_ZONE_OPEN))
-                       goto dec;
+       zi->i_wr_refcnt--;
+       if (zi->i_wr_refcnt)
+               goto unlock;
 
+       /*
+        * The file zone may not be open anymore (e.g. the file was truncated to
+        * its maximum size or it was fully written). For this case, we only
+        * need to decrement the write open count.
+        */
+       if (zi->i_flags & ZONEFS_ZONE_OPEN) {
                ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
                if (ret) {
                        __zonefs_io_error(inode, false);
@@ -1091,14 +1155,23 @@ static void zonefs_close_zone(struct inode *inode)
                         */
                        if (zi->i_flags & ZONEFS_ZONE_OPEN &&
                            !(sb->s_flags & SB_RDONLY)) {
-                               zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n");
+                               zonefs_warn(sb,
+                                       "closing zone at %llu failed %d\n",
+                                       zi->i_zsector, ret);
+                               zonefs_warn(sb,
+                                       "remounting filesystem read-only\n");
                                sb->s_flags |= SB_RDONLY;
                        }
+                       goto unlock;
                }
+
                zi->i_flags &= ~ZONEFS_ZONE_OPEN;
-dec:
-               atomic_dec(&sbi->s_open_zones);
+               zonefs_account_active(inode);
        }
+
+       atomic_dec(&sbi->s_wro_seq_files);
+
+unlock:
        mutex_unlock(&zi->i_truncate_mutex);
 }
 
@@ -1110,8 +1183,8 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
         * the zone has gone offline or read-only). Make sure we don't fail the
         * close(2) for user-space.
         */
-       if (zonefs_file_use_exp_open(inode, file))
-               zonefs_close_zone(inode);
+       if (zonefs_seq_file_need_wro(inode, file))
+               zonefs_seq_file_write_close(inode);
 
        return 0;
 }
@@ -1142,6 +1215,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
        inode_init_once(&zi->i_vnode);
        mutex_init(&zi->i_truncate_mutex);
        zi->i_wr_refcnt = 0;
+       zi->i_flags = 0;
 
        return &zi->i_vnode;
 }
@@ -1293,12 +1367,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
        inc_nlink(parent);
 }
 
-static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
-                                  enum zonefs_ztype type)
+static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+                                 enum zonefs_ztype type)
 {
        struct super_block *sb = inode->i_sb;
        struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       int ret;
 
        inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
        inode->i_mode = S_IFREG | sbi->s_perm;
@@ -1323,6 +1398,29 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
        sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
        sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
        sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+
+       mutex_lock(&zi->i_truncate_mutex);
+
+       /*
+        * For sequential zones, make sure that any open zone is closed first
+        * to ensure that the initial number of open zones is 0, in sync with
+        * the open zone accounting done when the mount option
+        * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
+        */
+       if (type == ZONEFS_ZTYPE_SEQ &&
+           (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
+            zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
+               ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+               if (ret)
+                       goto unlock;
+       }
+
+       zonefs_account_active(inode);
+
+unlock:
+       mutex_unlock(&zi->i_truncate_mutex);
+
+       return 0;
 }
 
 static struct dentry *zonefs_create_inode(struct dentry *parent,
@@ -1332,6 +1430,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
        struct inode *dir = d_inode(parent);
        struct dentry *dentry;
        struct inode *inode;
+       int ret;
 
        dentry = d_alloc_name(parent, name);
        if (!dentry)
@@ -1342,10 +1441,16 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
                goto dput;
 
        inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
-       if (zone)
-               zonefs_init_file_inode(inode, zone, type);
-       else
+       if (zone) {
+               ret = zonefs_init_file_inode(inode, zone, type);
+               if (ret) {
+                       iput(inode);
+                       goto dput;
+               }
+       } else {
                zonefs_init_dir_inode(dir, inode, type);
+       }
+
        d_add(dentry, inode);
        dir->i_size++;
 
@@ -1652,14 +1757,18 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_gid = GLOBAL_ROOT_GID;
        sbi->s_perm = 0640;
        sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
-       sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
-       atomic_set(&sbi->s_open_zones, 0);
-       if (!sbi->s_max_open_zones &&
+
+       atomic_set(&sbi->s_wro_seq_files, 0);
+       sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
+       if (!sbi->s_max_wro_seq_files &&
            sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
                zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
                sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
        }
 
+       atomic_set(&sbi->s_active_seq_files, 0);
+       sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
+
        ret = zonefs_read_super(sb);
        if (ret)
                return ret;
@@ -1674,6 +1783,10 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
        if (ret)
                goto cleanup;
 
+       ret = zonefs_sysfs_register(sb);
+       if (ret)
+               goto cleanup;
+
        zonefs_info(sb, "Mounting %u zones",
                    blkdev_nr_zones(sb->s_bdev->bd_disk));
 
@@ -1719,6 +1832,8 @@ static void zonefs_kill_super(struct super_block *sb)
 
        if (sb->s_root)
                d_genocide(sb->s_root);
+
+       zonefs_sysfs_unregister(sb);
        kill_block_super(sb);
        kfree(sbi);
 }
@@ -1766,16 +1881,26 @@ static int __init zonefs_init(void)
                return ret;
 
        ret = register_filesystem(&zonefs_type);
-       if (ret) {
-               zonefs_destroy_inodecache();
-               return ret;
-       }
+       if (ret)
+               goto destroy_inodecache;
+
+       ret = zonefs_sysfs_init();
+       if (ret)
+               goto unregister_fs;
 
        return 0;
+
+unregister_fs:
+       unregister_filesystem(&zonefs_type);
+destroy_inodecache:
+       zonefs_destroy_inodecache();
+
+       return ret;
 }
 
 static void __exit zonefs_exit(void)
 {
+       zonefs_sysfs_exit();
        zonefs_destroy_inodecache();
        unregister_filesystem(&zonefs_type);
 }
diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c
new file mode 100644 (file)
index 0000000..9cb6755
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple file system for zoned block devices exposing zones as files.
+ *
+ * Copyright (C) 2022 Western Digital Corporation or its affiliates.
+ */
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/blkdev.h>
+
+#include "zonefs.h"
+
+struct zonefs_sysfs_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf);
+};
+
+static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr)
+{
+       return container_of(attr, struct zonefs_sysfs_attr, attr);
+}
+
+#define ZONEFS_SYSFS_ATTR_RO(name) \
+static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &zonefs_sysfs_attr_##name.attr
+
+static ssize_t zonefs_sysfs_attr_show(struct kobject *kobj,
+                                     struct attribute *attr, char *buf)
+{
+       struct zonefs_sb_info *sbi =
+               container_of(kobj, struct zonefs_sb_info, s_kobj);
+       struct zonefs_sysfs_attr *zonefs_attr =
+               container_of(attr, struct zonefs_sysfs_attr, attr);
+
+       if (!zonefs_attr->show)
+               return 0;
+
+       return zonefs_attr->show(sbi, buf);
+}
+
+static ssize_t max_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%u\n", sbi->s_max_wro_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_wro_seq_files);
+
+static ssize_t nr_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_wro_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_wro_seq_files);
+
+static ssize_t max_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%u\n", sbi->s_max_active_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_active_seq_files);
+
+static ssize_t nr_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_active_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_active_seq_files);
+
+static struct attribute *zonefs_sysfs_attrs[] = {
+       ATTR_LIST(max_wro_seq_files),
+       ATTR_LIST(nr_wro_seq_files),
+       ATTR_LIST(max_active_seq_files),
+       ATTR_LIST(nr_active_seq_files),
+       NULL,
+};
+ATTRIBUTE_GROUPS(zonefs_sysfs);
+
+static void zonefs_sysfs_sb_release(struct kobject *kobj)
+{
+       struct zonefs_sb_info *sbi =
+               container_of(kobj, struct zonefs_sb_info, s_kobj);
+
+       complete(&sbi->s_kobj_unregister);
+}
+
+static const struct sysfs_ops zonefs_sysfs_attr_ops = {
+       .show   = zonefs_sysfs_attr_show,
+};
+
+static struct kobj_type zonefs_sb_ktype = {
+       .default_groups = zonefs_sysfs_groups,
+       .sysfs_ops      = &zonefs_sysfs_attr_ops,
+       .release        = zonefs_sysfs_sb_release,
+};
+
+static struct kobject *zonefs_sysfs_root;
+
+int zonefs_sysfs_register(struct super_block *sb)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+       int ret;
+
+       init_completion(&sbi->s_kobj_unregister);
+       ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype,
+                                  zonefs_sysfs_root, "%s", sb->s_id);
+       if (ret) {
+               kobject_put(&sbi->s_kobj);
+               wait_for_completion(&sbi->s_kobj_unregister);
+               return ret;
+       }
+
+       sbi->s_sysfs_registered = true;
+
+       return 0;
+}
+
+void zonefs_sysfs_unregister(struct super_block *sb)
+{
+       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+
+       if (!sbi || !sbi->s_sysfs_registered)
+               return;
+
+       kobject_del(&sbi->s_kobj);
+       kobject_put(&sbi->s_kobj);
+       wait_for_completion(&sbi->s_kobj_unregister);
+}
+
+int __init zonefs_sysfs_init(void)
+{
+       zonefs_sysfs_root = kobject_create_and_add("zonefs", fs_kobj);
+       if (!zonefs_sysfs_root)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void zonefs_sysfs_exit(void)
+{
+       kobject_put(zonefs_sysfs_root);
+       zonefs_sysfs_root = NULL;
+}
index 7b147907c328ed2615b718c4cb17e2e18f148dad..4b3de66c323342ab8ac10f4691e492197e7e6998 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/uuid.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/kobject.h>
 
 /*
  * Maximum length of file names: this only needs to be large enough to fit
@@ -39,6 +40,7 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
 }
 
 #define ZONEFS_ZONE_OPEN       (1 << 0)
+#define ZONEFS_ZONE_ACTIVE     (1 << 1)
 
 /*
  * In-memory inode data.
@@ -182,8 +184,15 @@ struct zonefs_sb_info {
        loff_t                  s_blocks;
        loff_t                  s_used_blocks;
 
-       unsigned int            s_max_open_zones;
-       atomic_t                s_open_zones;
+       unsigned int            s_max_wro_seq_files;
+       atomic_t                s_wro_seq_files;
+
+       unsigned int            s_max_active_seq_files;
+       atomic_t                s_active_seq_files;
+
+       bool                    s_sysfs_registered;
+       struct kobject          s_kobj;
+       struct completion       s_kobj_unregister;
 };
 
 static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
@@ -198,4 +207,9 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
 #define zonefs_warn(sb, format, args...)       \
        pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
 
+int zonefs_sysfs_register(struct super_block *sb);
+void zonefs_sysfs_unregister(struct super_block *sb);
+int zonefs_sysfs_init(void);
+void zonefs_sysfs_exit(void);
+
 #endif
index 3f7f01f03869059f1390b91a0d63831060cafb50..c4b78c21d793056f8087711b83bfcdedc801f90c 100644 (file)
@@ -509,7 +509,6 @@ extern int unregister_acpi_notifier(struct notifier_block *);
  * External Functions
  */
 
-int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
 struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle);
 acpi_status acpi_bus_get_status_handle(acpi_handle handle,
                                       unsigned long long *sta);
index edb0e2a602a89e66d70e196637d8c90a794e075b..ba1f860af38ba2ffd3e87ff778489c8fd116b61b 100644 (file)
 #include <linux/panic.h>
 #include <linux/printk.h>
 
+struct warn_args;
+struct pt_regs;
+
+void __warn(const char *file, int line, void *caller, unsigned taint,
+           struct pt_regs *regs, struct warn_args *args);
+
 #ifdef CONFIG_BUG
 
 #ifdef CONFIG_GENERIC_BUG
@@ -110,11 +116,6 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 #endif
 
 /* used internally by panic.c */
-struct warn_args;
-struct pt_regs;
-
-void __warn(const char *file, int line, void *caller, unsigned taint,
-           struct pt_regs *regs, struct warn_args *args);
 
 #ifndef WARN_ON
 #define WARN_ON(condition) ({                                          \
index c08758b6b364206d656a15979cc166e0e4fb3e58..c05d2ce9b6cd85bb9c2c439cf35160b4cdbcc838 100644 (file)
@@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void);
 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
 void hyperv_cleanup(void);
 bool hv_query_ext_cap(u64 cap_query);
+void hv_setup_dma_ops(struct device *dev, bool coherent);
 void *hv_map_memory(void *addr, unsigned long size);
 void hv_unmap_memory(void *addr);
 #else /* CONFIG_HYPERV */
index fd7feb5c789485864f0e6a596eaa209f04819996..eee6f7763a39f87e63faf4b2ed7ce32877d45733 100644 (file)
@@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)       \
        do {                                                    \
                unsigned long _sz = huge_page_size(h);          \
-               if (_sz == PMD_SIZE)                            \
-                       tlb_flush_pmd_range(tlb, address, _sz); \
-               else if (_sz == PUD_SIZE)                       \
+               if (_sz >= P4D_SIZE)                            \
+                       tlb_flush_p4d_range(tlb, address, _sz); \
+               else if (_sz >= PUD_SIZE)                       \
                        tlb_flush_pud_range(tlb, address, _sz); \
+               else if (_sz >= PMD_SIZE)                       \
+                       tlb_flush_pmd_range(tlb, address, _sz); \
+               else                                            \
+                       tlb_flush_pte_range(tlb, address, _sz); \
                __tlb_remove_tlb_entry(tlb, ptep, address);     \
        } while (0)
 
index 8fc637379899fd38b37bdbecaca6358a2f917dc8..df30f11b4a46011fdbcefc3d0a9287ebc62e8daa 100644 (file)
@@ -143,7 +143,7 @@ static inline void put_unaligned_be48(const u64 val, void *p)
 
 static inline u64 __get_unaligned_be48(const u8 *p)
 {
-       return (u64)p[0] << 40 | (u64)p[1] << 32 | p[2] << 24 |
+       return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 |
                p[3] << 16 | p[4] << 8 | p[5];
 }
 
index 73f2a93248578f740c1fc23e7048726c6ccc76d9..4048669bf756493bbc878163a3cd79cdb57c76cc 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
 /*
  * Daire McNamara,<daire.mcnamara@microchip.com>
- * Copyright (C) 2020 Microchip Technology Inc.  All rights reserved.
+ * Copyright (C) 2020-2022 Microchip Technology Inc.  All rights reserved.
  */
 
 #ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
@@ -42,4 +42,7 @@
 #define CLK_ATHENA     31
 #define CLK_CFM                32
 
+#define CLK_RTCREF     33
+#define CLK_MSSPLL     34
+
 #endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */
index d06134ac6245f5fbb055b490873e5162b7f7280b..cece70231138855972c5f471b58f3e8632cc1d1a 100644 (file)
@@ -339,7 +339,7 @@ static inline void audit_uring_entry(u8 op)
 }
 static inline void audit_uring_exit(int success, long code)
 {
-       if (unlikely(!audit_dummy_context()))
+       if (unlikely(audit_context()))
                __audit_uring_exit(success, code);
 }
 static inline void audit_syscall_entry(int major, unsigned long a0,
index 87ce24d238f3459e3174805282b7cf37e38624df..2bd073fa6bb53ebd0efad023e3ba8c836c56b691 100644 (file)
@@ -17,8 +17,6 @@
 #include <linux/backing-dev-defs.h>
 #include <linux/slab.h>
 
-struct blkcg;
-
 static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
 {
        kref_get(&bdi->refcnt);
@@ -154,7 +152,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css,
                                    gfp_t gfp);
 void wb_memcg_offline(struct mem_cgroup *memcg);
-void wb_blkcg_offline(struct blkcg *blkcg);
+void wb_blkcg_offline(struct cgroup_subsys_state *css);
 
 /**
  * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -378,7 +376,7 @@ static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 {
 }
 
-static inline void wb_blkcg_offline(struct blkcg *blkcg)
+static inline void wb_blkcg_offline(struct cgroup_subsys_state *css)
 {
 }
 
index 278cc81cc1e7fa7c264bcdda2c7e1dacbeedf95b..1cf3738ef1ea6d794271bc619a3072c1474ce46a 100644 (file)
@@ -269,6 +269,7 @@ struct folio_iter {
        size_t offset;
        size_t length;
        /* private: for use by the iterator */
+       struct folio *_next;
        size_t _seg_count;
        int _i;
 };
@@ -283,6 +284,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
                        PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
        fi->_seg_count = bvec->bv_len;
        fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count);
+       fi->_next = folio_next(fi->folio);
        fi->_i = i;
 }
 
@@ -290,9 +292,10 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
 {
        fi->_seg_count -= fi->length;
        if (fi->_seg_count) {
-               fi->folio = folio_next(fi->folio);
+               fi->folio = fi->_next;
                fi->offset = 0;
                fi->length = min(folio_size(fi->folio), fi->_seg_count);
+               fi->_next = folio_next(fi->folio);
        } else if (fi->_i + 1 < bio->bi_vcnt) {
                bio_first_folio(fi, bio, fi->_i + 1);
        } else {
@@ -405,9 +408,7 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
                             unsigned int opf, gfp_t gfp_mask,
                             struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
-               unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
 extern void bio_put(struct bio *);
 
 struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
@@ -782,6 +783,12 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
                bio->bi_opf |= REQ_NOWAIT;
 }
 
+static inline void bio_clear_polled(struct bio *bio)
+{
+       /* can't support alloc cache if we turn off polling */
+       bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE);
+}
+
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
                unsigned int nr_pages, unsigned int opf, gfp_t gfp);
 
index 652cd05b0924c3d687716a0f4bb42e5c7f8d331e..9f40dbc65f82c0e8ad44eaefb7b25a27accc5bf3 100644 (file)
  *                   Nauman Rafique <nauman@google.com>
  */
 
-#include <linux/cgroup.h>
-#include <linux/percpu.h>
-#include <linux/percpu_counter.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/seq_file.h>
-#include <linux/radix-tree.h>
-#include <linux/blkdev.h>
-#include <linux/atomic.h>
-#include <linux/kthread.h>
-#include <linux/fs.h>
+#include <linux/types.h>
+
+struct bio;
+struct cgroup_subsys_state;
+struct request_queue;
 
 #define FC_APPID_LEN              129
 
 #ifdef CONFIG_BLK_CGROUP
-
-enum blkg_iostat_type {
-       BLKG_IOSTAT_READ,
-       BLKG_IOSTAT_WRITE,
-       BLKG_IOSTAT_DISCARD,
-
-       BLKG_IOSTAT_NR,
-};
-
-struct blkcg_gq;
-struct blkg_policy_data;
-
-struct blkcg {
-       struct cgroup_subsys_state      css;
-       spinlock_t                      lock;
-       refcount_t                      online_pin;
-
-       struct radix_tree_root          blkg_tree;
-       struct blkcg_gq __rcu           *blkg_hint;
-       struct hlist_head               blkg_list;
-
-       struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
-
-       struct list_head                all_blkcgs_node;
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
-       char                            fc_app_id[FC_APPID_LEN];
-#endif
-#ifdef CONFIG_CGROUP_WRITEBACK
-       struct list_head                cgwb_list;
-#endif
-};
-
-struct blkg_iostat {
-       u64                             bytes[BLKG_IOSTAT_NR];
-       u64                             ios[BLKG_IOSTAT_NR];
-};
-
-struct blkg_iostat_set {
-       struct u64_stats_sync           sync;
-       struct blkg_iostat              cur;
-       struct blkg_iostat              last;
-};
-
-/* association between a blk cgroup and a request queue */
-struct blkcg_gq {
-       /* Pointer to the associated request_queue */
-       struct request_queue            *q;
-       struct list_head                q_node;
-       struct hlist_node               blkcg_node;
-       struct blkcg                    *blkcg;
-
-       /* all non-root blkcg_gq's are guaranteed to have access to parent */
-       struct blkcg_gq                 *parent;
-
-       /* reference count */
-       struct percpu_ref               refcnt;
-
-       /* is this blkg online? protected by both blkcg and q locks */
-       bool                            online;
-
-       struct blkg_iostat_set __percpu *iostat_cpu;
-       struct blkg_iostat_set          iostat;
-
-       struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
-
-       spinlock_t                      async_bio_lock;
-       struct bio_list                 async_bios;
-       union {
-               struct work_struct      async_bio_work;
-               struct work_struct      free_work;
-       };
-
-       atomic_t                        use_delay;
-       atomic64_t                      delay_nsec;
-       atomic64_t                      delay_start;
-       u64                             last_delay;
-       int                             last_use;
-
-       struct rcu_head                 rcu_head;
-};
-
 extern struct cgroup_subsys_state * const blkcg_root_css;
 
-void blkcg_destroy_blkgs(struct blkcg *blkcg);
 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 void blkcg_maybe_throttle_current(void);
-
-static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
-{
-       return css ? container_of(css, struct blkcg, css) : NULL;
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, %NULL if not associated.
- * Callers are expected to either handle %NULL or know association has been
- * done prior to calling this.
- */
-static inline struct blkcg *bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return NULL;
-}
-
-static inline bool blk_cgroup_congested(void)
-{
-       struct cgroup_subsys_state *css;
-       bool ret = false;
-
-       rcu_read_lock();
-       css = kthread_blkcg();
-       if (!css)
-               css = task_css(current, io_cgrp_id);
-       while (css) {
-               if (atomic_read(&css->cgroup->congestion_count)) {
-                       ret = true;
-                       break;
-               }
-               css = css->parent;
-       }
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * blkcg_parent - get the parent of a blkcg
- * @blkcg: blkcg of interest
- *
- * Return the parent blkcg of @blkcg.  Can be called anytime.
- */
-static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
-{
-       return css_to_blkcg(blkcg->css.parent);
-}
-
-/**
- * blkcg_pin_online - pin online state
- * @blkcg: blkcg of interest
- *
- * While pinned, a blkcg is kept online.  This is primarily used to
- * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
- * while an associated cgwb is still active.
- */
-static inline void blkcg_pin_online(struct blkcg *blkcg)
-{
-       refcount_inc(&blkcg->online_pin);
-}
-
-/**
- * blkcg_unpin_online - unpin online state
- * @blkcg: blkcg of interest
- *
- * This is primarily used to impedance-match blkg and cgwb lifetimes so
- * that blkg doesn't go offline while an associated cgwb is still active.
- * When this count goes to zero, all active cgwbs have finished so the
- * blkcg can continue destruction by calling blkcg_destroy_blkgs().
- */
-static inline void blkcg_unpin_online(struct blkcg *blkcg)
-{
-       do {
-               if (!refcount_dec_and_test(&blkcg->online_pin))
-                       break;
-               blkcg_destroy_blkgs(blkcg);
-               blkcg = blkcg_parent(blkcg);
-       } while (blkcg);
-}
+bool blk_cgroup_congested(void);
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css);
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css);
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio);
 
 #else  /* CONFIG_BLK_CGROUP */
 
-struct blkcg {
-};
-
-struct blkcg_gq {
-};
-
 #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
 static inline void blkcg_maybe_throttle_current(void) { }
 static inline bool blk_cgroup_congested(void) { return false; }
-
-#ifdef CONFIG_BLOCK
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
-#endif /* CONFIG_BLOCK */
-
-#endif /* CONFIG_BLK_CGROUP */
-
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
-/*
- * Sets the fc_app_id field associted to blkcg
- * @app_id: application identifier
- * @cgrp_id: cgroup id
- * @app_id_len: size of application identifier
- */
-static inline int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
-{
-       struct cgroup *cgrp;
-       struct cgroup_subsys_state *css;
-       struct blkcg *blkcg;
-       int ret  = 0;
-
-       if (app_id_len > FC_APPID_LEN)
-               return -EINVAL;
-
-       cgrp = cgroup_get_from_id(cgrp_id);
-       if (!cgrp)
-               return -ENOENT;
-       css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
-       if (!css) {
-               ret = -ENOENT;
-               goto out_cgrp_put;
-       }
-       blkcg = css_to_blkcg(css);
-       /*
-        * There is a slight race condition on setting the appid.
-        * Worst case an I/O may not find the right id.
-        * This is no different from the I/O we let pass while obtaining
-        * the vmid from the fabric.
-        * Adding the overhead of a lock is not necessary.
-        */
-       strlcpy(blkcg->fc_app_id, app_id, app_id_len);
-       css_put(css);
-out_cgrp_put:
-       cgroup_put(cgrp);
-       return ret;
-}
-
-/**
- * blkcg_get_fc_appid - get the fc app identifier associated with a bio
- * @bio: target bio
- *
- * On success return the fc_app_id, on failure return NULL
- */
-static inline char *blkcg_get_fc_appid(struct bio *bio)
+static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
 {
-       if (bio && bio->bi_blkg &&
-               (bio->bi_blkg->blkcg->fc_app_id[0] != '\0'))
-               return bio->bi_blkg->blkcg->fc_app_id;
        return NULL;
 }
-#else
-static inline int blkcg_set_fc_appid(char *buf, u64 id, size_t len) { return -EINVAL; }
-static inline char *blkcg_get_fc_appid(struct bio *bio) { return NULL; }
-#endif /*CONFIG_BLK_CGROUP_FC_APPID*/
+#endif /* CONFIG_BLK_CGROUP */
+
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len);
+char *blkcg_get_fc_appid(struct bio *bio);
+
 #endif /* _BLK_CGROUP_H */
index 7aa5c54901a93adcbca8881bdd68d759bc47781e..9f07061418db01cb60ac33a45a48bacb54329855 100644 (file)
@@ -163,7 +163,6 @@ struct request {
                struct rb_node rb_node; /* sort/lookup */
                struct bio_vec special_vec;
                void *completion_data;
-               int error_count; /* for legacy drivers, don't use */
        };
 
 
index 1973ef9bd40fcfbdff0bc7817272e8963f59c80a..c007d58d2703b29415037b94c7bc480635e1f35c 100644 (file)
@@ -44,7 +44,7 @@ struct block_device {
        unsigned long           bd_stamp;
        bool                    bd_read_only;   /* read-only policy */
        dev_t                   bd_dev;
-       int                     bd_openers;
+       atomic_t                bd_openers;
        struct inode *          bd_inode;       /* will die */
        struct super_block *    bd_super;
        void *                  bd_claiming;
@@ -246,9 +246,8 @@ typedef unsigned int blk_qc_t;
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct block_device     *bi_bdev;
-       unsigned int            bi_opf;         /* bottom bits req flags,
-                                                * top bits REQ_OP. Use
-                                                * accessors.
+       unsigned int            bi_opf;         /* bottom bits REQ_OP, top bits
+                                                * req_flags.
                                                 */
        unsigned short          bi_flags;       /* BIO_* below */
        unsigned short          bi_ioprio;
@@ -329,7 +328,6 @@ enum {
        BIO_QOS_MERGED,         /* but went through rq_qos merge path */
        BIO_REMAPPED,
        BIO_ZONE_WRITE_LOCKED,  /* Owns a zoned device zone write lock */
-       BIO_PERCPU_CACHE,       /* can participate in per-cpu alloc cache */
        BIO_FLAG_LAST
 };
 
@@ -409,15 +407,17 @@ enum req_flag_bits {
         * work item to avoid such priority inversions.
         */
        __REQ_CGROUP_PUNT,
+       __REQ_POLLED,           /* caller polls for completion using bio_poll */
+       __REQ_ALLOC_CACHE,      /* allocate IO from cache if available */
+       __REQ_SWAP,             /* swap I/O */
+       __REQ_DRV,              /* for driver use */
 
-       /* command specific flags for REQ_OP_WRITE_ZEROES: */
+       /*
+        * Command specific flags, keep last:
+        */
+       /* for REQ_OP_WRITE_ZEROES: */
        __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 
-       __REQ_POLLED,           /* caller polls for completion using bio_poll */
-
-       /* for driver use */
-       __REQ_DRV,
-       __REQ_SWAP,             /* swapping request. */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -439,6 +439,7 @@ enum req_flag_bits {
 
 #define REQ_NOUNMAP            (1ULL << __REQ_NOUNMAP)
 #define REQ_POLLED             (1ULL << __REQ_POLLED)
+#define REQ_ALLOC_CACHE                (1ULL << __REQ_ALLOC_CACHE)
 
 #define REQ_DRV                        (1ULL << __REQ_DRV)
 #define REQ_SWAP               (1ULL << __REQ_SWAP)
index 60d01613899711c3fedca8001fbc0571bd32f096..5bdf2ac9142c95a8d66d4fbd75eba47acd77142b 100644 (file)
@@ -176,6 +176,21 @@ static inline bool disk_live(struct gendisk *disk)
        return !inode_unhashed(disk->part0->bd_inode);
 }
 
+/**
+ * disk_openers - returns how many openers are there for a disk
+ * @disk: disk to check
+ *
+ * This returns the number of openers for a disk.  Note that this value is only
+ * stable if disk->open_mutex is held.
+ *
+ * Note: Due to a quirk in the block layer open code, each open partition is
+ * only counted once even if there are multiple openers.
+ */
+static inline unsigned int disk_openers(struct gendisk *disk)
+{
+       return atomic_read(&disk->part0->bd_openers);
+}
+
 /*
  * The gendisk is refcounted by the part0 block_device, and the bd_device
  * therein is also used for device model presentation in sysfs.
@@ -248,6 +263,7 @@ struct queue_limits {
        unsigned int            io_opt;
        unsigned int            max_discard_sectors;
        unsigned int            max_hw_discard_sectors;
+       unsigned int            max_secure_erase_sectors;
        unsigned int            max_write_zeroes_sectors;
        unsigned int            max_zone_append_sectors;
        unsigned int            discard_granularity;
@@ -540,10 +556,8 @@ struct request_queue {
 #define QUEUE_FLAG_NONROT      6       /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT                QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     7       /* do disk/partitions IO accounting */
-#define QUEUE_FLAG_DISCARD     8       /* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   9       /* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  10      /* Contributes to random pool */
-#define QUEUE_FLAG_SECERASE    11      /* supports secure erase */
 #define QUEUE_FLAG_SAME_FORCE  12      /* force complete on same CPU */
 #define QUEUE_FLAG_DEAD                13      /* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   14      /* queue is initialized */
@@ -582,11 +596,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
        test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
 #define blk_queue_io_stat(q)   test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)        test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_discard(q)   test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)     \
        test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_secure_erase(q) \
-       (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)       test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
 #define blk_queue_pci_p2pdma(q)        \
        test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
@@ -602,7 +613,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
                             REQ_FAILFAST_DRIVER))
 #define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 #define blk_queue_pm_only(q)   atomic_read(&(q)->pm_only)
-#define blk_queue_fua(q)       test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)        test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)    test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
 
@@ -950,6 +960,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_discard_segments(struct request_queue *,
                unsigned short);
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+               unsigned int max_sectors);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
                unsigned int max_discard_sectors);
@@ -1090,13 +1102,12 @@ static inline long nr_blockdev_pages(void)
 
 extern void blk_io_schedule(void);
 
-#define BLKDEV_DISCARD_SECURE  (1 << 0)        /* issue a secure erase */
-
-extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, int flags,
-               struct bio **biop);
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask);
+int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, struct bio **biop);
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp);
 
 #define BLKDEV_ZERO_NOUNMAP    (1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK (1 << 1)  /* don't write explicit zeroes */
@@ -1115,7 +1126,7 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
                                              SECTOR_SHIFT),
                                    nr_blocks << (sb->s_blocksize_bits -
                                                  SECTOR_SHIFT),
-                                   gfp_mask, flags);
+                                   gfp_mask);
 }
 static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
                sector_t nr_blocks, gfp_t gfp_mask)
@@ -1189,6 +1200,12 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu
        return min(l->max_zone_append_sectors, l->max_sectors);
 }
 
+static inline unsigned int
+bdev_max_zone_append_sectors(struct block_device *bdev)
+{
+       return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+}
+
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
        int retval = 512;
@@ -1246,84 +1263,54 @@ bdev_zone_write_granularity(struct block_device *bdev)
        return queue_zone_write_granularity(bdev_get_queue(bdev));
 }
 
-static inline int queue_alignment_offset(const struct request_queue *q)
-{
-       if (q->limits.misaligned)
-               return -1;
+int bdev_alignment_offset(struct block_device *bdev);
+unsigned int bdev_discard_alignment(struct block_device *bdev);
 
-       return q->limits.alignment_offset;
+static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
+{
+       return bdev_get_queue(bdev)->limits.max_discard_sectors;
 }
 
-static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
+static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
 {
-       unsigned int granularity = max(lim->physical_block_size, lim->io_min);
-       unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
-               << SECTOR_SHIFT;
+       return bdev_get_queue(bdev)->limits.discard_granularity;
+}
 
-       return (granularity + lim->alignment_offset - alignment) % granularity;
+static inline unsigned int
+bdev_max_secure_erase_sectors(struct block_device *bdev)
+{
+       return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
 }
 
-static inline int bdev_alignment_offset(struct block_device *bdev)
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
 {
        struct request_queue *q = bdev_get_queue(bdev);
 
-       if (q->limits.misaligned)
-               return -1;
-       if (bdev_is_partition(bdev))
-               return queue_limit_alignment_offset(&q->limits,
-                               bdev->bd_start_sect);
-       return q->limits.alignment_offset;
+       if (q)
+               return q->limits.max_write_zeroes_sectors;
+
+       return 0;
 }
 
-static inline int queue_discard_alignment(const struct request_queue *q)
+static inline bool bdev_nonrot(struct block_device *bdev)
 {
-       if (q->limits.discard_misaligned)
-               return -1;
-
-       return q->limits.discard_alignment;
+       return blk_queue_nonrot(bdev_get_queue(bdev));
 }
 
-static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
+static inline bool bdev_stable_writes(struct block_device *bdev)
 {
-       unsigned int alignment, granularity, offset;
-
-       if (!lim->max_discard_sectors)
-               return 0;
-
-       /* Why are these in bytes, not sectors? */
-       alignment = lim->discard_alignment >> SECTOR_SHIFT;
-       granularity = lim->discard_granularity >> SECTOR_SHIFT;
-       if (!granularity)
-               return 0;
-
-       /* Offset of the partition start in 'granularity' sectors */
-       offset = sector_div(sector, granularity);
-
-       /* And why do we do this modulus *again* in blkdev_issue_discard()? */
-       offset = (granularity + alignment - offset) % granularity;
-
-       /* Turn it back into bytes, gaah */
-       return offset << SECTOR_SHIFT;
+       return test_bit(QUEUE_FLAG_STABLE_WRITES,
+                       &bdev_get_queue(bdev)->queue_flags);
 }
 
-static inline int bdev_discard_alignment(struct block_device *bdev)
+static inline bool bdev_write_cache(struct block_device *bdev)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       if (bdev_is_partition(bdev))
-               return queue_limit_discard_alignment(&q->limits,
-                               bdev->bd_start_sect);
-       return q->limits.discard_alignment;
+       return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
 }
 
-static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+static inline bool bdev_fua(struct block_device *bdev)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       if (q)
-               return q->limits.max_write_zeroes_sectors;
-
-       return 0;
+       return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
 }
 
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
@@ -1491,9 +1478,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
                wake_up_process(waiter);
 }
 
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
-               unsigned int op);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+                                unsigned int sectors, unsigned int op,
+                                unsigned long start_time);
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
                unsigned long start_time);
 
 void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
index 22501a293fa545b4d744ed6dc054f133fc8e799b..623e22492afa50de750593485e0e264519c44779 100644 (file)
@@ -27,12 +27,10 @@ struct blk_trace {
        atomic_t dropped;
 };
 
-struct blkcg;
-
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern __printf(3, 4)
-void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...);
+__printf(3, 4) void __blk_trace_note_message(struct blk_trace *bt,
+               struct cgroup_subsys_state *css, const char *fmt, ...);
 
 /**
  * blk_add_trace_msg - Add a (simple) message to the blktrace stream
@@ -47,14 +45,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
  *     NOTE: Can not use 'static inline' due to presence of var args...
  *
  **/
-#define blk_add_cgroup_trace_msg(q, cg, fmt, ...)                      \
+#define blk_add_cgroup_trace_msg(q, css, fmt, ...)                     \
        do {                                                            \
                struct blk_trace *bt;                                   \
                                                                        \
                rcu_read_lock();                                        \
                bt = rcu_dereference((q)->blk_trace);                   \
                if (unlikely(bt))                                       \
-                       __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+                       __blk_trace_note_message(bt, css, fmt, ##__VA_ARGS__);\
                rcu_read_unlock();                                      \
        } while (0)
 #define blk_add_trace_msg(q, fmt, ...)                                 \
index bdb5298735ce93d164ab59bdddf1a41cc0761e3b..ecc3d3ec41cf3ea25f5a6cba3544810981fcbcfa 100644 (file)
@@ -2085,6 +2085,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
                                       struct net_device *netdev);
 bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
+void unpriv_ebpf_notify(int new_state);
+
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
index c1fc4af47f696f79e6a5064d3940ee4c71bded89..3a9d2d7cc6b7253de5f49e158d93af31db2ea920 100644 (file)
@@ -570,9 +570,11 @@ static inline u32 type_flag(u32 type)
        return type & ~BPF_BASE_TYPE_MASK;
 }
 
+/* only use after check_attach_btf_id() */
 static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
 {
-       return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+       return prog->type == BPF_PROG_TYPE_EXT ?
+               prog->aux->dst_prog->type : prog->type;
 }
 
 #endif /* _LINUX_BPF_VERIFIER_H */
index efd8205282da74e12c4cb3e1fcb99d899f8fed64..cb0d6cd1c12f24e1dd8681b5f9f0302675bec7d5 100644 (file)
@@ -72,6 +72,24 @@ enum cc_attr {
         * Examples include TDX guest & SEV.
         */
        CC_ATTR_GUEST_UNROLL_STRING_IO,
+
+       /**
+        * @CC_ATTR_SEV_SNP: Guest SNP is active.
+        *
+        * The platform/OS is running as a guest/virtual machine and actively
+        * using AMD SEV-SNP features.
+        */
+       CC_ATTR_GUEST_SEV_SNP,
+
+       /**
+        * @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled.
+        *
+        * The platform/OS is running as a guest/virtual machine does not
+        * support CPU hotplug feature.
+        *
+        * Examples include TDX Guest.
+        */
+       CC_ATTR_HOTPLUG_DISABLED,
 };
 
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
index 0a89f111e00e1c730c283785a5155f4ceaf02d2d..67caa909e3e615670ec415376bc59d57f64ad15d 100644 (file)
@@ -77,7 +77,6 @@ struct cdrom_device_ops {
        int (*tray_move) (struct cdrom_device_info *, int);
        int (*lock_door) (struct cdrom_device_info *, int);
        int (*select_speed) (struct cdrom_device_info *, int);
-       int (*select_disc) (struct cdrom_device_info *, int);
        int (*get_last_session) (struct cdrom_device_info *,
                                 struct cdrom_multisession *);
        int (*get_mcn) (struct cdrom_device_info *,
index 3431011f364dd4cec482bb58b93c345fb815b276..cba8a6ffc3290d068d51f5d734c3547e4de23c4f 100644 (file)
@@ -287,6 +287,9 @@ struct ceph_osd_linger_request {
        rados_watcherrcb_t errcb;
        void *data;
 
+       struct ceph_pagelist *request_pl;
+       struct page **notify_id_pages;
+
        struct page ***preply_pages;
        size_t *preply_len;
 };
index 6a511a1078ca069c4fa0e120b781c4203571afc8..eacb7dd7b3af34a6ea82c17eda3752b4cfb072b2 100644 (file)
@@ -558,6 +558,7 @@ extern const char *const cper_proc_error_type_strs[4];
 u64 cper_next_record_id(void);
 const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
+const char *cper_mem_err_status_str(u64 status);
 void cper_print_bits(const char *prefix, unsigned int bits,
                     const char * const strs[], unsigned int strs_size);
 void cper_mem_err_pack(const struct cper_sec_mem_err *,
@@ -568,5 +569,7 @@ void cper_print_proc_arm(const char *pfx,
                         const struct cper_sec_proc_arm *proc);
 void cper_print_proc_ia(const char *pfx,
                        const struct cper_sec_proc_ia *proc);
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
 
 #endif
index 9cf51e41e697269d69759c46c6ca414b147ce222..54dc2f9a2d56e9f43cac9ef4e30600152c2230fd 100644 (file)
@@ -167,7 +167,7 @@ static inline int suspend_disable_secondary_cpus(void) { return 0; }
 static inline void suspend_enable_secondary_cpus(void) { }
 #endif /* !CONFIG_PM_SLEEP_SMP */
 
-void cpu_startup_entry(enum cpuhp_state state);
+void __noreturn cpu_startup_entry(enum cpuhp_state state);
 
 void cpu_idle_poll_ctrl(bool enable);
 
index 35c7d6db4139e46df2d8da6b8a0e147c012fc71d..d5595d57f4e53680a7e8b2a6e1bf6435a7c14501 100644 (file)
@@ -1199,7 +1199,6 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
                        struct cpufreq_governor *old_gov) { }
 #endif
 
-extern void arch_freq_prepare_all(void);
 extern unsigned int arch_freq_get_on_cpu(int cpu);
 
 #ifndef arch_set_freq_scale
diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h
deleted file mode 100644 (file)
index 19fa0b5..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Pointer to dma-buf-mapped memory, plus helpers.
- */
-
-#ifndef __DMA_BUF_MAP_H__
-#define __DMA_BUF_MAP_H__
-
-#include <linux/io.h>
-#include <linux/string.h>
-
-/**
- * DOC: overview
- *
- * Calling dma-buf's vmap operation returns a pointer to the buffer's memory.
- * Depending on the location of the buffer, users may have to access it with
- * I/O operations or memory load/store operations. For example, copying to
- * system memory could be done with memcpy(), copying to I/O memory would be
- * done with memcpy_toio().
- *
- * .. code-block:: c
- *
- *     void *vaddr = ...; // pointer to system memory
- *     memcpy(vaddr, src, len);
- *
- *     void *vaddr_iomem = ...; // pointer to I/O memory
- *     memcpy_toio(vaddr, _iomem, src, len);
- *
- * When using dma-buf's vmap operation, the returned pointer is encoded as
- * :c:type:`struct dma_buf_map <dma_buf_map>`.
- * :c:type:`struct dma_buf_map <dma_buf_map>` stores the buffer's address in
- * system or I/O memory and a flag that signals the required method of
- * accessing the buffer. Use the returned instance and the helper functions
- * to access the buffer's memory in the correct way.
- *
- * The type :c:type:`struct dma_buf_map <dma_buf_map>` and its helpers are
- * actually independent from the dma-buf infrastructure. When sharing buffers
- * among devices, drivers have to know the location of the memory to access
- * the buffers in a safe way. :c:type:`struct dma_buf_map <dma_buf_map>`
- * solves this problem for dma-buf and its users. If other drivers or
- * sub-systems require similar functionality, the type could be generalized
- * and moved to a more prominent header file.
- *
- * Open-coding access to :c:type:`struct dma_buf_map <dma_buf_map>` is
- * considered bad style. Rather then accessing its fields directly, use one
- * of the provided helper functions, or implement your own. For example,
- * instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be initialized
- * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with
- * dma_buf_map_set_vaddr(). These helpers will set an address in system memory.
- *
- * .. code-block:: c
- *
- *     struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf);
- *
- *     dma_buf_map_set_vaddr(&map, 0xdeadbeaf);
- *
- * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem().
- *
- * .. code-block:: c
- *
- *     dma_buf_map_set_vaddr_iomem(&map, 0xdeadbeaf);
- *
- * Instances of struct dma_buf_map do not have to be cleaned up, but
- * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings
- * always refer to system memory.
- *
- * .. code-block:: c
- *
- *     dma_buf_map_clear(&map);
- *
- * Test if a mapping is valid with either dma_buf_map_is_set() or
- * dma_buf_map_is_null().
- *
- * .. code-block:: c
- *
- *     if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map))
- *             // always true
- *
- * Instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be compared
- * for equality with dma_buf_map_is_equal(). Mappings the point to different
- * memory spaces, system or I/O, are never equal. That's even true if both
- * spaces are located in the same address space, both mappings contain the
- * same address value, or both mappings refer to NULL.
- *
- * .. code-block:: c
- *
- *     struct dma_buf_map sys_map; // refers to system memory
- *     struct dma_buf_map io_map; // refers to I/O memory
- *
- *     if (dma_buf_map_is_equal(&sys_map, &io_map))
- *             // always false
- *
- * A set up instance of struct dma_buf_map can be used to access or manipulate
- * the buffer memory. Depending on the location of the memory, the provided
- * helpers will pick the correct operations. Data can be copied into the memory
- * with dma_buf_map_memcpy_to(). The address can be manipulated with
- * dma_buf_map_incr().
- *
- * .. code-block:: c
- *
- *     const void *src = ...; // source buffer
- *     size_t len = ...; // length of src
- *
- *     dma_buf_map_memcpy_to(&map, src, len);
- *     dma_buf_map_incr(&map, len); // go to first byte after the memcpy
- */
-
-/**
- * struct dma_buf_map - Pointer to vmap'ed dma-buf memory.
- * @vaddr_iomem:       The buffer's address if in I/O memory
- * @vaddr:             The buffer's address if in system memory
- * @is_iomem:          True if the dma-buf memory is located in I/O
- *                     memory, or false otherwise.
- */
-struct dma_buf_map {
-       union {
-               void __iomem *vaddr_iomem;
-               void *vaddr;
-       };
-       bool is_iomem;
-};
-
-/**
- * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory
- * @vaddr_:    A system-memory address
- */
-#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \
-       { \
-               .vaddr = (vaddr_), \
-               .is_iomem = false, \
-       }
-
-/**
- * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory
- * @map:       The dma-buf mapping structure
- * @vaddr:     A system-memory address
- *
- * Sets the address and clears the I/O-memory flag.
- */
-static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr)
-{
-       map->vaddr = vaddr;
-       map->is_iomem = false;
-}
-
-/**
- * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory
- * @map:               The dma-buf mapping structure
- * @vaddr_iomem:       An I/O-memory address
- *
- * Sets the address and the I/O-memory flag.
- */
-static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map,
-                                              void __iomem *vaddr_iomem)
-{
-       map->vaddr_iomem = vaddr_iomem;
-       map->is_iomem = true;
-}
-
-/**
- * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality
- * @lhs:       The dma-buf mapping structure
- * @rhs:       A dma-buf mapping structure to compare with
- *
- * Two dma-buf mapping structures are equal if they both refer to the same type of memory
- * and to the same address within that memory.
- *
- * Returns:
- * True is both structures are equal, or false otherwise.
- */
-static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs,
-                                       const struct dma_buf_map *rhs)
-{
-       if (lhs->is_iomem != rhs->is_iomem)
-               return false;
-       else if (lhs->is_iomem)
-               return lhs->vaddr_iomem == rhs->vaddr_iomem;
-       else
-               return lhs->vaddr == rhs->vaddr;
-}
-
-/**
- * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL
- * @map:       The dma-buf mapping structure
- *
- * Depending on the state of struct dma_buf_map.is_iomem, tests if the
- * mapping is NULL.
- *
- * Returns:
- * True if the mapping is NULL, or false otherwise.
- */
-static inline bool dma_buf_map_is_null(const struct dma_buf_map *map)
-{
-       if (map->is_iomem)
-               return !map->vaddr_iomem;
-       return !map->vaddr;
-}
-
-/**
- * dma_buf_map_is_set - Tests is the dma-buf mapping has been set
- * @map:       The dma-buf mapping structure
- *
- * Depending on the state of struct dma_buf_map.is_iomem, tests if the
- * mapping has been set.
- *
- * Returns:
- * True if the mapping is been set, or false otherwise.
- */
-static inline bool dma_buf_map_is_set(const struct dma_buf_map *map)
-{
-       return !dma_buf_map_is_null(map);
-}
-
-/**
- * dma_buf_map_clear - Clears a dma-buf mapping structure
- * @map:       The dma-buf mapping structure
- *
- * Clears all fields to zero; including struct dma_buf_map.is_iomem. So
- * mapping structures that were set to point to I/O memory are reset for
- * system memory. Pointers are cleared to NULL. This is the default.
- */
-static inline void dma_buf_map_clear(struct dma_buf_map *map)
-{
-       if (map->is_iomem) {
-               map->vaddr_iomem = NULL;
-               map->is_iomem = false;
-       } else {
-               map->vaddr = NULL;
-       }
-}
-
-/**
- * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping
- * @dst:       The dma-buf mapping structure
- * @src:       The source buffer
- * @len:       The number of byte in src
- *
- * Copies data into a dma-buf mapping. The source buffer is in system
- * memory. Depending on the buffer's location, the helper picks the correct
- * method of accessing the memory.
- */
-static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len)
-{
-       if (dst->is_iomem)
-               memcpy_toio(dst->vaddr_iomem, src, len);
-       else
-               memcpy(dst->vaddr, src, len);
-}
-
-/**
- * dma_buf_map_incr - Increments the address stored in a dma-buf mapping
- * @map:       The dma-buf mapping structure
- * @incr:      The number of bytes to increment
- *
- * Increments the address stored in a dma-buf mapping. Depending on the
- * buffer's location, the correct value will be updated.
- */
-static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr)
-{
-       if (map->is_iomem)
-               map->vaddr_iomem += incr;
-       else
-               map->vaddr += incr;
-}
-
-#endif /* __DMA_BUF_MAP_H__ */
index fec374f69e125506a507e92ec9486e53c79c0de2..ec7f25def39290cfb331f0350f431cd8afd249b6 100644 (file)
@@ -61,6 +61,21 @@ to_dma_fence_array(struct dma_fence *fence)
        return container_of(fence, struct dma_fence_array, base);
 }
 
+/**
+ * dma_fence_array_for_each - iterate over all fences in array
+ * @fence: current fence
+ * @index: index into the array
+ * @head: potential dma_fence_array object
+ *
+ * Test if @array is a dma_fence_array object and if yes iterate over all fences
+ * in the array. If not just iterate over the fence in @array itself.
+ *
+ * For a deep dive iterator see dma_fence_unwrap_for_each().
+ */
+#define dma_fence_array_for_each(fence, index, head)                   \
+       for (index = 0, fence = dma_fence_array_first(head); fence;     \
+            ++(index), fence = dma_fence_array_next(head, index))
+
 struct dma_fence_array *dma_fence_array_create(int num_fences,
                                               struct dma_fence **fences,
                                               u64 context, unsigned seqno,
@@ -68,4 +83,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
 
 bool dma_fence_match_context(struct dma_fence *fence, u64 context);
 
+struct dma_fence *dma_fence_array_first(struct dma_fence *head);
+struct dma_fence *dma_fence_array_next(struct dma_fence *head,
+                                      unsigned int index);
+
 #endif /* __LINUX_DMA_FENCE_ARRAY_H */
index 10d51bcdf7b7946a675203d0e37110454e9256c4..4bdf0b96da28312c8e33755ad6ffa1befdd022ae 100644 (file)
@@ -112,6 +112,8 @@ static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
  *
  * Iterate over all fences in the chain. We keep a reference to the current
  * fence while inside the loop which must be dropped when breaking out.
+ *
+ * For a deep dive iterator see dma_fence_unwrap_for_each().
  */
 #define dma_fence_chain_for_each(iter, head)   \
        for (iter = dma_fence_get(head); iter; \
diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
new file mode 100644 (file)
index 0000000..77e335a
--- /dev/null
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ * Authors:
+ *     Christian König <christian.koenig@amd.com>
+ */
+
+#ifndef __LINUX_DMA_FENCE_UNWRAP_H
+#define __LINUX_DMA_FENCE_UNWRAP_H
+
+#include <linux/dma-fence-chain.h>
+#include <linux/dma-fence-array.h>
+
+/**
+ * struct dma_fence_unwrap - cursor into the container structure
+ *
+ * Should be used with dma_fence_unwrap_for_each() iterator macro.
+ */
+struct dma_fence_unwrap {
+       /**
+        * @chain: potential dma_fence_chain, but can be other fence as well
+        */
+       struct dma_fence *chain;
+       /**
+        * @array: potential dma_fence_array, but can be other fence as well
+        */
+       struct dma_fence *array;
+       /**
+        * @index: last returned index if @array is really a dma_fence_array
+        */
+       unsigned int index;
+};
+
+/* Internal helper to start new array iteration, don't use directly */
+static inline struct dma_fence *
+__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor)
+{
+       cursor->array = dma_fence_chain_contained(cursor->chain);
+       cursor->index = 0;
+       return dma_fence_array_first(cursor->array);
+}
+
+/**
+ * dma_fence_unwrap_first - return the first fence from fence containers
+ * @head: the entrypoint into the containers
+ * @cursor: current position inside the containers
+ *
+ * Unwraps potential dma_fence_chain/dma_fence_array containers and return the
+ * first fence.
+ */
+static inline struct dma_fence *
+dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor)
+{
+       cursor->chain = dma_fence_get(head);
+       return __dma_fence_unwrap_array(cursor);
+}
+
+/**
+ * dma_fence_unwrap_next - return the next fence from a fence containers
+ * @cursor: current position inside the containers
+ *
+ * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return
+ * the next fence from them.
+ */
+static inline struct dma_fence *
+dma_fence_unwrap_next(struct dma_fence_unwrap *cursor)
+{
+       struct dma_fence *tmp;
+
+       ++cursor->index;
+       tmp = dma_fence_array_next(cursor->array, cursor->index);
+       if (tmp)
+               return tmp;
+
+       cursor->chain = dma_fence_chain_walk(cursor->chain);
+       return __dma_fence_unwrap_array(cursor);
+}
+
+/**
+ * dma_fence_unwrap_for_each - iterate over all fences in containers
+ * @fence: current fence
+ * @cursor: current position inside the containers
+ * @head: starting point for the iterator
+ *
+ * Unwrap dma_fence_chain and dma_fence_array containers and deep dive into all
+ * potential fences in them. If @head is just a normal fence only that one is
+ * returned.
+ */
+#define dma_fence_unwrap_for_each(fence, cursor, head)                 \
+       for (fence = dma_fence_unwrap_first(head, cursor); fence;       \
+            fence = dma_fence_unwrap_next(cursor))
+
+#endif
index ccd4d3f91c98c247cb772df2a94e104b231a6bf7..db424f3dc3f2fc3550ffceceef226439a0cd2c8c 100644 (file)
@@ -213,6 +213,8 @@ struct capsule_info {
        size_t                  page_bytes_remain;
 };
 
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+                           size_t hdr_bytes);
 int __efi_capsule_setup_info(struct capsule_info *cap_info);
 
 /*
@@ -383,6 +385,7 @@ void efi_native_runtime_setup(void);
 #define EFI_LOAD_FILE_PROTOCOL_GUID            EFI_GUID(0x56ec3091, 0x954c, 0x11d2,  0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 #define EFI_LOAD_FILE2_PROTOCOL_GUID           EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e,  0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
 #define EFI_RT_PROPERTIES_TABLE_GUID           EFI_GUID(0xeb66918a, 0x7eef, 0x402a,  0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
+#define EFI_DXE_SERVICES_TABLE_GUID            EFI_GUID(0x05ad34ba, 0x6f02, 0x4214,  0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
 
 #define EFI_IMAGE_SECURITY_DATABASE_GUID       EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596,  0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
 #define EFI_SHIM_LOCK_GUID                     EFI_GUID(0x605dab50, 0xe046, 0x4300,  0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
@@ -390,6 +393,7 @@ void efi_native_runtime_setup(void);
 #define EFI_CERT_SHA256_GUID                   EFI_GUID(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9, 0x41, 0xf9, 0x36, 0x93, 0x43, 0x28)
 #define EFI_CERT_X509_GUID                     EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72)
 #define EFI_CERT_X509_SHA256_GUID              EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed)
+#define EFI_CC_BLOB_GUID                       EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
 
 /*
  * This GUID is used to pass to the kernel proper the struct screen_info
@@ -405,6 +409,20 @@ void efi_native_runtime_setup(void);
 #define LINUX_EFI_MEMRESERVE_TABLE_GUID                EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5,  0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
 #define LINUX_EFI_INITRD_MEDIA_GUID            EFI_GUID(0x5568e427, 0x68fc, 0x4f3d,  0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68)
 #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID      EFI_GUID(0xc451ed2b, 0x9694, 0x45d3,  0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
+#define LINUX_EFI_COCO_SECRET_AREA_GUID                EFI_GUID(0xadf956ad, 0xe98c, 0x484c,  0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
+
+#define RISCV_EFI_BOOT_PROTOCOL_GUID           EFI_GUID(0xccd15fec, 0x6f73, 0x4eec,  0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
+
+/*
+ * This GUID may be installed onto the kernel image's handle as a NULL protocol
+ * to signal to the stub that the placement of the image should be respected,
+ * and moving the image in physical memory is undesirable. To ensure
+ * compatibility with 64k pages kernels with virtually mapped stacks, and to
+ * avoid defeating physical randomization, this protocol should only be
+ * installed if the image was placed at a randomized 128k aligned address in
+ * memory.
+ */
+#define LINUX_EFI_LOADED_IMAGE_FIXED_GUID      EFI_GUID(0xf5a37b6d, 0x3344, 0x42a5,  0xb6, 0xbb, 0x97, 0x86, 0x48, 0xc1, 0x89, 0x0a)
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID            EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
@@ -435,6 +453,7 @@ typedef struct {
 } efi_config_table_type_t;
 
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
+#define EFI_DXE_SERVICES_TABLE_SIGNATURE ((u64)0x565245535f455844ULL)
 
 #define EFI_2_30_SYSTEM_TABLE_REVISION  ((2 << 16) | (30))
 #define EFI_2_20_SYSTEM_TABLE_REVISION  ((2 << 16) | (20))
@@ -596,6 +615,7 @@ extern struct efi {
        unsigned long                   tpm_log;                /* TPM2 Event Log table */
        unsigned long                   tpm_final_log;          /* TPM2 Final Events Log table */
        unsigned long                   mokvar_table;           /* MOK variable config table */
+       unsigned long                   coco_secret;            /* Confidential computing secret table */
 
        efi_get_time_t                  *get_time;
        efi_set_time_t                  *set_time;
@@ -1335,4 +1355,12 @@ extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
 static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
 #endif
 
+struct linux_efi_coco_secret_area {
+       u64     base_pa;
+       u64     size;
+};
+
+/* Header of a populated EFI secret area */
+#define EFI_SECRET_TABLE_HEADER_GUID   EFI_GUID(0x1e74f542, 0x71dd, 0x4d66,  0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b)
+
 #endif /* _LINUX_EFI_H */
index f8e206e82476c30f067ac96f0ca4036dc693638c..346a8b56cdc831f798b0c59eab56160446d6de74 100644 (file)
@@ -84,15 +84,6 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re
 #endif
 }
 
-static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
-{
-#ifdef ELF_CORE_COPY_KERNEL_REGS
-       ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
-#else
-       elf_core_copy_regs(elfregs, regs);
-#endif
-}
-
 static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
 {
 #if defined (ELF_CORE_COPY_TASK_REGS)
index bbde95387a23af8daf91f0926069a2090c35bd0f..87b5af1d9fbe037dbcbe404547fac061544c3abb 100644 (file)
@@ -1953,6 +1953,7 @@ struct dir_context {
 #define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
+struct io_uring_cmd;
 
 struct file_operations {
        struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
+       int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 } __randomize_layout;
 
 struct inode_operations {
index 6727fb0db6197e93655398332c2a769d75ae3369..e25539072463ba0dc43e872b79abc02b3316d48d 100644 (file)
@@ -573,7 +573,6 @@ int fscache_write(struct netfs_cache_resources *cres,
 
 /**
  * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages
- * @cookie: The cookie representing the cache object
  * @mapping: The netfs inode to use as the source
  * @start: The start position in @mapping
  * @len: The amount of data to unlock
@@ -582,8 +581,7 @@ int fscache_write(struct netfs_cache_resources *cres,
  * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's
  * waiting.
  */
-static inline void fscache_clear_page_bits(struct fscache_cookie *cookie,
-                                          struct address_space *mapping,
+static inline void fscache_clear_page_bits(struct address_space *mapping,
                                           loff_t start, size_t len,
                                           bool caching)
 {
index 761f8f1885c79e0544bb9207a8e6643525e91134..3e3d36fc210982d5d23fae91408d6b602fc1517f 100644 (file)
@@ -613,9 +613,11 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 #ifdef CONFIG_NUMA
 struct page *alloc_pages(gfp_t gfp, unsigned int order);
 struct folio *folio_alloc(gfp_t gfp, unsigned order);
-extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
+struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                        struct vm_area_struct *vma, unsigned long addr,
                        bool hugepage);
+struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+               unsigned long addr, bool hugepage);
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
        alloc_pages_vma(gfp_mask, order, vma, addr, true)
 #else
@@ -627,8 +629,10 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
 {
        return __folio_alloc_node(gfp, order, numa_node_id());
 }
-#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, hugepage) \
        alloc_pages(gfp_mask, order)
+#define vma_alloc_folio(gfp, order, vma, addr, hugepage)               \
+       folio_alloc(gfp, order)
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
        alloc_pages(gfp_mask, order)
 #endif
index c3aa8b330e1c67f57e92faab99e8a4c471f39a31..e71f6e1bfafed8f9ab90bd0f4368bd03272470e9 100644 (file)
@@ -688,7 +688,7 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev);
 int devm_acpi_dev_add_driver_gpios(struct device *dev,
                                   const struct acpi_gpio_mapping *gpios);
 
-struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label);
+struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label);
 
 #else  /* CONFIG_GPIOLIB && CONFIG_ACPI */
 
@@ -705,6 +705,12 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
        return -ENXIO;
 }
 
+static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin,
+                                                          char *label)
+{
+       return ERR_PTR(-ENOSYS);
+}
+
 #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */
 
 
index 98c93510640e9398c09714692dd73564ae314dad..cb689264f3e9170599667174243e670462812b79 100644 (file)
@@ -221,6 +221,15 @@ struct gpio_irq_chip {
         */
        bool per_parent_data;
 
+       /**
+        * @initialized:
+        *
+        * Flag to track GPIO chip irq member's initialization.
+        * This flag will make sure GPIO chip irq members are not used
+        * before they are initialized.
+        */
+       bool initialized;
+
        /**
         * @init_hw: optional routine to initialize hardware before
         * an IRQ chip will be added. This is quite useful when
@@ -579,6 +588,22 @@ void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset);
 void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset);
 void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset);
 
+/* irq_data versions of the above */
+int gpiochip_irq_reqres(struct irq_data *data);
+void gpiochip_irq_relres(struct irq_data *data);
+
+/* Paste this in your irq_chip structure  */
+#define        GPIOCHIP_IRQ_RESOURCE_HELPERS                                   \
+               .irq_request_resources  = gpiochip_irq_reqres,          \
+               .irq_release_resources  = gpiochip_irq_relres
+
+static inline void gpio_irq_chip_set_chip(struct gpio_irq_chip *girq,
+                                         const struct irq_chip *chip)
+{
+       /* Yes, dropping const is ugly, but it isn't like we have a choice */
+       girq->chip = (struct irq_chip *)chip;
+}
+
 /* Line status inquiry for drivers */
 bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset);
 bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset);
index 53c1b6082a4cd9f42fc14e8259e4818b8c7e3e20..ac2a1d758a80eb4177b599f7780bfe4106021db0 100644 (file)
@@ -169,6 +169,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
                                                long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -378,6 +379,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
        return 0;
 }
 
+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+       return 0;
+}
+
 static inline void putback_active_hugepage(struct page *page)
 {
 }
index 1814e698d86115910cdc4b766ffc7f6fd09a9c3f..4a2f6cc5a4927fdc0280b2b5b86d47938b8bd768 100644 (file)
@@ -5,11 +5,37 @@
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
+enum io_uring_cmd_flags {
+       IO_URING_F_COMPLETE_DEFER       = 1,
+       IO_URING_F_UNLOCKED             = 2,
+       /* int's last bit, sign checks are usually faster than a bit test */
+       IO_URING_F_NONBLOCK             = INT_MIN,
+
+       /* ctx state flags, for URING_CMD */
+       IO_URING_F_SQE128               = 4,
+       IO_URING_F_CQE32                = 8,
+       IO_URING_F_IOPOLL               = 16,
+};
+
+struct io_uring_cmd {
+       struct file     *file;
+       const void      *cmd;
+       /* callback to defer completions to task context */
+       void (*task_work_cb)(struct io_uring_cmd *cmd);
+       u32             cmd_op;
+       u32             pad;
+       u8              pdu[32]; /* available inline for free use */
+};
+
 #if defined(CONFIG_IO_URING)
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *));
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
 void io_uring_unreg_ringfd(void);
+const char *io_uring_get_opcode(u8 opcode);
 
 static inline void io_uring_files_cancel(void)
 {
@@ -29,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
                __io_uring_free(tsk);
 }
 #else
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+               ssize_t ret2)
+{
+}
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+}
 static inline struct sock *io_uring_get_socket(struct file *file)
 {
        return NULL;
@@ -42,6 +76,10 @@ static inline void io_uring_files_cancel(void)
 static inline void io_uring_free(struct task_struct *tsk)
 {
 }
+static inline const char *io_uring_get_opcode(u8 opcode)
+{
+       return "";
+}
 #endif
 
 #endif
index f92788ccdba270e4a665df40c4e83026a9698792..505308253d23ce49fed4b8f947437170ffa5571c 100644 (file)
@@ -569,6 +569,7 @@ struct irq_chip {
  * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND:  Invokes __enable_irq()/__disable_irq() for wake irqs
  *                                    in the suspend path if they are in disabled state
  * IRQCHIP_AFFINITY_PRE_STARTUP:      Default affinity update before startup
+ * IRQCHIP_IMMUTABLE:                Don't ever change anything in this chip
  */
 enum {
        IRQCHIP_SET_TYPE_MASKED                 = (1 <<  0),
@@ -582,6 +583,7 @@ enum {
        IRQCHIP_SUPPORTS_NMI                    = (1 <<  8),
        IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND        = (1 <<  9),
        IRQCHIP_AFFINITY_PRE_STARTUP            = (1 << 10),
+       IRQCHIP_IMMUTABLE                       = (1 << 11),
 };
 
 #include <linux/irqdesc.h>
index 12d91f0dedf90caa46f6b8ceef81e876c97fb926..728691365464c1585b23338184c95ba90f1b65c1 100644 (file)
 #define GICR_PIDR2                     GICD_PIDR2
 
 #define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
+#define GICR_CTLR_CES                  (1UL << 1)
+#define GICR_CTLR_IR                   (1UL << 2)
 #define GICR_CTLR_RWP                  (1UL << 3)
 
 #define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
index 08ba5995aa8bbe84cc8bd802303346aeb60f09cb..fe6efb24d151a6fa90cc817e6f99e633cca85d4c 100644 (file)
 }                                      \
 )
 
-/**
- * lower_48_bits() - return bits 0-47 of a number
- * @n: the number we're accessing
- */
-static inline u64 lower_48_bits(u64 n)
-{
-       return n & ((1ull << 48) - 1);
-}
-
 /**
  * upper_32_bits - return bits 32-63 of a number
  * @n: the number we're accessing
@@ -294,7 +285,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
        return buf;
 }
 
-extern int hex_to_bin(char ch);
+extern int hex_to_bin(unsigned char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
 extern char *bin2hex(char *dst, const void *src, size_t count);
 
index f49e64222628ae3a1a15df5c3a111c9f434b135c..726857a4b68054130800b7e22c6904ff324352c2 100644 (file)
@@ -204,6 +204,22 @@ static __always_inline __must_check bool kfence_free(void *addr)
  */
 bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs);
 
+#ifdef CONFIG_PRINTK
+struct kmem_obj_info;
+/**
+ * __kfence_obj_info() - fill kmem_obj_info struct
+ * @kpp: kmem_obj_info to be filled
+ * @object: the object
+ *
+ * Return:
+ * * false - not a KFENCE object
+ * * true - a KFENCE object, filled @kpp
+ *
+ * Copies information to @kpp for KFENCE objects.
+ */
+bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
+#endif
+
 #else /* CONFIG_KFENCE */
 
 static inline bool is_kfence_address(const void *addr) { return false; }
@@ -221,6 +237,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo
        return false;
 }
 
+#ifdef CONFIG_PRINTK
+struct kmem_obj_info;
+static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+{
+       return false;
+}
+#endif
+
 #endif
 
 #endif /* _LINUX_KFENCE_H */
index c7b47399b36ae7eacb57270df504504fb9aeb4dd..57fb972fea05ba7e4be249565c55101a87cc826b 100644 (file)
@@ -120,7 +120,6 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 struct kobj_type {
        void (*release)(struct kobject *kobj);
        const struct sysfs_ops *sysfs_ops;
-       struct attribute **default_attrs;       /* use default_groups instead */
        const struct attribute_group **default_groups;
        const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
        const void *(*namespace)(struct kobject *kobj);
index de5d75bafd6651240d86bed578eef16d77f290a9..30e5bec81d2b6246c4cfdb6ef0aecfb46f046e2b 100644 (file)
@@ -222,9 +222,5 @@ void kthread_associate_blkcg(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *kthread_blkcg(void);
 #else
 static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
-static inline struct cgroup_subsys_state *kthread_blkcg(void)
-{
-       return NULL;
-}
 #endif
 #endif /* _LINUX_KTHREAD_H */
index 3f9b22c4983a85704667a89e274c364326980574..34eed5f85ed607432bd40559d1f9c6356cb6f083 100644 (file)
@@ -315,7 +315,10 @@ struct kvm_vcpu {
        int cpu;
        int vcpu_id; /* id given by userspace at creation */
        int vcpu_idx; /* index in kvm->vcpus array */
-       int srcu_idx;
+       int ____srcu_idx; /* Don't use this directly.  You've been warned. */
+#ifdef CONFIG_PROVE_RCU
+       int srcu_depth;
+#endif
        int mode;
        u64 requests;
        unsigned long guest_debug;
@@ -840,6 +843,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
        unlikely(__ret);                                        \
 })
 
+static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PROVE_RCU
+       WARN_ONCE(vcpu->srcu_depth++,
+                 "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1);
+#endif
+       vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+}
+
+static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu)
+{
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx);
+
+#ifdef CONFIG_PROVE_RCU
+       WARN_ONCE(--vcpu->srcu_depth,
+                 "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth);
+#endif
+}
+
 static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
 {
        return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
@@ -2197,6 +2219,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
                                            unsigned long start, unsigned long end);
 
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
+
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
 #else
index 9b1d3d8b12520165836d4e51b6938a4a15913364..732de90146261feadadb7a8eca01341fb9d66f33 100644 (file)
@@ -820,7 +820,6 @@ struct ata_port {
        unsigned int            cbl;    /* cable type; ATA_CBL_xxx */
 
        struct ata_queued_cmd   qcmd[ATA_MAX_QUEUE + 1];
-       unsigned long           sas_tag_allocated; /* for sas tag allocation only */
        u64                     qc_active;
        int                     nr_active_links; /* #links with active qcs */
        unsigned int            sas_last_tag;   /* track next tag hw expects */
@@ -1111,7 +1110,7 @@ extern void ata_unpack_xfermask(unsigned long xfer_mask,
                        unsigned long *udma_mask);
 extern u8 ata_xfer_mask2mode(unsigned long xfer_mask);
 extern unsigned long ata_xfer_mode2mask(u8 xfer_mode);
-extern int ata_xfer_mode2shift(unsigned long xfer_mode);
+extern int ata_xfer_mode2shift(u8 xfer_mode);
 extern const char *ata_mode_string(unsigned long xfer_mask);
 extern unsigned long ata_id_xfermask(const u16 *id);
 extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
index acb1ad2356f1b29c6fe4dcb1e8dc1d371d8c2684..1feab6136b5b583397cc6267f563037812d6cb7d 100644 (file)
 
 /* SYM_ALIAS -- use only if you have to */
 #ifndef SYM_ALIAS
-#define SYM_ALIAS(alias, name, sym_type, linkage)                      \
-       linkage(alias) ASM_NL                                           \
-       .set alias, name ASM_NL                                         \
-       .type alias sym_type ASM_NL                                     \
-       .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL           \
-       .size alias, .L__sym_size_##alias
+#define SYM_ALIAS(alias, name, linkage)                        \
+       linkage(alias) ASM_NL                           \
+       .set alias, name ASM_NL
 #endif
 
 /* === code annotations === */
  */
 #ifndef SYM_FUNC_ALIAS
 #define SYM_FUNC_ALIAS(alias, name)                                    \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+       SYM_ALIAS(alias, name, SYM_L_GLOBAL)
 #endif
 
 /*
  */
 #ifndef SYM_FUNC_ALIAS_LOCAL
 #define SYM_FUNC_ALIAS_LOCAL(alias, name)                              \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+       SYM_ALIAS(alias, name, SYM_L_LOCAL)
 #endif
 
 /*
  */
 #ifndef SYM_FUNC_ALIAS_WEAK
 #define SYM_FUNC_ALIAS_WEAK(alias, name)                               \
-       SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+       SYM_ALIAS(alias, name, SYM_L_WEAK)
 #endif
 
 /* SYM_CODE_START -- use for non-C (special) functions */
index 6d635e8306d645a820b54fe91ee4a5c874b634e3..975e33b793a774ecc72580c46c0123da725273f1 100644 (file)
@@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l)
 }
 #else /* CONFIG_DEBUG_LOCK_ALLOC */
 # define LOCAL_LOCK_DEBUG_INIT(lockname)
-# define local_lock_acquire(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
-# define local_lock_release(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
-# define local_lock_debug_init(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
+static inline void local_lock_acquire(local_lock_t *l) { }
+static inline void local_lock_release(local_lock_t *l) { }
+static inline void local_lock_debug_init(local_lock_t *l) { }
 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 #define INIT_LOCAL_LOCK(lockname)      { LOCAL_LOCK_DEBUG_INIT(lockname) }
index a68dce3873fccf80a3308b5b7d1743bf001bd2ee..89b14729d59f941ce851162e1fa184b217260466 100644 (file)
@@ -1012,6 +1012,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 }
 
 void mem_cgroup_flush_stats(void);
+void mem_cgroup_flush_stats_delayed(void);
 
 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                              int val);
@@ -1455,6 +1456,10 @@ static inline void mem_cgroup_flush_stats(void)
 {
 }
 
+static inline void mem_cgroup_flush_stats_delayed(void)
+{
+}
+
 static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
                                            enum node_stat_item idx, int val)
 {
index e34edb77533427f63c4563a33110862fad03413e..9f44254af8ce9e6d229adfe027e3ba5955ce50e8 100644 (file)
@@ -3197,6 +3197,14 @@ extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
+#ifdef CONFIG_MEMORY_FAILURE
+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+#else
+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+       return 0;
+}
+#endif
 
 #ifndef arch_memory_failure
 static inline int arch_memory_failure(unsigned long pfn, int flags)
index 71101d1ec825e9d4ba63f4aaae168209d8b724fa..de5c64bbdb725818faf590107f0ecde475152852 100644 (file)
@@ -175,7 +175,7 @@ void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq);
 int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd,
                int retries);
 
-int mmc_hw_reset(struct mmc_host *host);
+int mmc_hw_reset(struct mmc_card *card);
 int mmc_sw_reset(struct mmc_host *host);
 void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card);
 
index 962b14d403e8fc4a8144ed0e02f2593883843ecd..46ffab808f037bfbd26a1fccf28cf06cd48953f2 100644 (file)
@@ -1397,13 +1397,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms)
 
 static inline struct mem_section *__nr_to_section(unsigned long nr)
 {
+       unsigned long root = SECTION_NR_TO_ROOT(nr);
+
+       if (unlikely(root >= NR_SECTION_ROOTS))
+               return NULL;
+
 #ifdef CONFIG_SPARSEMEM_EXTREME
-       if (!mem_section)
+       if (!mem_section || !mem_section[root])
                return NULL;
 #endif
-       if (!mem_section[SECTION_NR_TO_ROOT(nr)])
-               return NULL;
-       return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
+       return &mem_section[root][nr & SECTION_ROOT_MASK];
 }
 extern size_t mem_section_usage_size(void);
 
index 151607e9d64ad0d9f9a2c46849f93591f8a777b0..955aee14b0f7e5791ec6348cbe9d6d3d1c1e94d4 100644 (file)
@@ -389,10 +389,8 @@ struct mtd_info {
        /* List of partitions attached to this MTD device */
        struct list_head partitions;
 
-       union {
-               struct mtd_part part;
-               struct mtd_master master;
-       };
+       struct mtd_part part;
+       struct mtd_master master;
 };
 
 static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd)
index 2c6b9e4162254f7116ed95ee88eb03afcc0fe64f..7c2d77d75a888cdb2b317cc89296681643b0423e 100644 (file)
@@ -169,7 +169,7 @@ enum {
 #define NETIF_F_HW_HSR_FWD     __NETIF_F(HW_HSR_FWD)
 #define NETIF_F_HW_HSR_DUP     __NETIF_F(HW_HSR_DUP)
 
-/* Finds the next feature with the highest number of the range of start till 0.
+/* Finds the next feature with the highest number of the range of start-1 till 0.
  */
 static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 {
@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
        for ((bit) = find_next_netdev_feature((mask_addr),              \
                                              NETDEV_FEATURE_COUNT);    \
             (bit) >= 0;                                                \
-            (bit) = find_next_netdev_feature((mask_addr), (bit) - 1))
+            (bit) = find_next_netdev_feature((mask_addr), (bit)))
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
index 59e27a2b7bf04a28d98d987dedd46dc11d4b8bc8..f736c020cde27761b913be89deb210100d599ea0 100644 (file)
@@ -199,10 +199,10 @@ struct net_device_stats {
  * Try to fit them in a single cache line, for dev_get_stats() sake.
  */
 struct net_device_core_stats {
-       local_t         rx_dropped;
-       local_t         tx_dropped;
-       local_t         rx_nohandler;
-} __aligned(4 * sizeof(local_t));
+       unsigned long   rx_dropped;
+       unsigned long   tx_dropped;
+       unsigned long   rx_nohandler;
+} __aligned(4 * sizeof(unsigned long));
 
 #include <linux/cache.h>
 #include <linux/skbuff.h>
@@ -900,7 +900,7 @@ struct net_device_path_stack {
 
 struct net_device_path_ctx {
        const struct net_device *dev;
-       const u8                *daddr;
+       u8                      daddr[ETH_ALEN];
 
        int                     num_vlans;
        struct {
@@ -3843,15 +3843,15 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
        return false;
 }
 
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev);
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
 
-static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev)
+static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
 {
        /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
        struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
 
        if (likely(p))
-               return this_cpu_ptr(p);
+               return p;
 
        return netdev_core_stats_alloc(dev);
 }
@@ -3859,14 +3859,11 @@ static inline struct net_device_core_stats *dev_core_stats(struct net_device *de
 #define DEV_CORE_STATS_INC(FIELD)                                              \
 static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
 {                                                                              \
-       struct net_device_core_stats *p;                                        \
+       struct net_device_core_stats __percpu *p;                               \
                                                                                \
-       preempt_disable();                                                      \
        p = dev_core_stats(dev);                                                \
-                                                                               \
        if (p)                                                                  \
-               local_inc(&p->FIELD);                                           \
-       preempt_enable();                                                       \
+               this_cpu_inc(p->FIELD);                                         \
 }
 DEV_CORE_STATS_INC(rx_dropped)
 DEV_CORE_STATS_INC(tx_dropped)
index 49ba486aea5fd831f707ed3bc5153814bbe394c0..2863e5a69c6abdd208284c0238d135f297037b9f 100644 (file)
@@ -1694,6 +1694,7 @@ struct nfs_unlinkdata {
 struct nfs_renamedata {
        struct nfs_renameargs   args;
        struct nfs_renameres    res;
+       struct rpc_task         task;
        const struct cred       *cred;
        struct inode            *old_dir;
        struct dentry           *old_dentry;
index f626a445d1a872647c27e6c09139dca1d99af676..29ec3e3481ff61901899e6b2c836bf4a1fbe6e65 100644 (file)
@@ -137,6 +137,7 @@ enum {
        NVME_REG_CMBMSC = 0x0050,       /* Controller Memory Buffer Memory
                                         * Space Control
                                         */
+       NVME_REG_CRTO   = 0x0068,       /* Controller Ready Timeouts */
        NVME_REG_PMRCAP = 0x0e00,       /* Persistent Memory Capabilities */
        NVME_REG_PMRCTL = 0x0e04,       /* Persistent Memory Region Control */
        NVME_REG_PMRSTS = 0x0e08,       /* Persistent Memory Region Status */
@@ -161,6 +162,9 @@ enum {
 #define NVME_CMB_BIR(cmbloc)   ((cmbloc) & 0x7)
 #define NVME_CMB_OFST(cmbloc)  (((cmbloc) >> 12) & 0xfffff)
 
+#define NVME_CRTO_CRIMT(crto)  ((crto) >> 16)
+#define NVME_CRTO_CRWMT(crto)  ((crto) & 0xffff)
+
 enum {
        NVME_CMBSZ_SQS          = 1 << 0,
        NVME_CMBSZ_CQS          = 1 << 1,
@@ -204,8 +208,10 @@ enum {
        NVME_CC_SHN_MASK        = 3 << NVME_CC_SHN_SHIFT,
        NVME_CC_IOSQES          = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
        NVME_CC_IOCQES          = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
-       NVME_CAP_CSS_NVM        = 1 << 0,
-       NVME_CAP_CSS_CSI        = 1 << 6,
+       NVME_CC_CRIME           = 1 << 24,
+};
+
+enum {
        NVME_CSTS_RDY           = 1 << 0,
        NVME_CSTS_CFS           = 1 << 1,
        NVME_CSTS_NSSRO         = 1 << 4,
@@ -214,10 +220,23 @@ enum {
        NVME_CSTS_SHST_OCCUR    = 1 << 2,
        NVME_CSTS_SHST_CMPLT    = 2 << 2,
        NVME_CSTS_SHST_MASK     = 3 << 2,
+};
+
+enum {
        NVME_CMBMSC_CRE         = 1 << 0,
        NVME_CMBMSC_CMSE        = 1 << 1,
 };
 
+enum {
+       NVME_CAP_CSS_NVM        = 1 << 0,
+       NVME_CAP_CSS_CSI        = 1 << 6,
+};
+
+enum {
+       NVME_CAP_CRMS_CRIMS     = 1ULL << 59,
+       NVME_CAP_CRMS_CRWMS     = 1ULL << 60,
+};
+
 struct nvme_id_power_state {
        __le16                  max_power;      /* centiwatts */
        __u8                    rsvd2;
@@ -405,6 +424,21 @@ struct nvme_id_ns {
        __u8                    vs[3712];
 };
 
+/* I/O Command Set Independent Identify Namespace Data Structure */
+struct nvme_id_ns_cs_indep {
+       __u8                    nsfeat;
+       __u8                    nmic;
+       __u8                    rescap;
+       __u8                    fpi;
+       __le32                  anagrpid;
+       __u8                    nsattr;
+       __u8                    rsvd9;
+       __le16                  nvmsetid;
+       __le16                  endgid;
+       __u8                    nstat;
+       __u8                    rsvd15[4081];
+};
+
 struct nvme_zns_lbafe {
        __le64                  zsze;
        __u8                    zdes;
@@ -469,6 +503,7 @@ enum {
        NVME_ID_CNS_NS_DESC_LIST        = 0x03,
        NVME_ID_CNS_CS_NS               = 0x05,
        NVME_ID_CNS_CS_CTRL             = 0x06,
+       NVME_ID_CNS_NS_CS_INDEP         = 0x08,
        NVME_ID_CNS_NS_PRESENT_LIST     = 0x10,
        NVME_ID_CNS_NS_PRESENT          = 0x11,
        NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
@@ -522,6 +557,10 @@ enum {
        NVME_NS_DPS_PI_TYPE3    = 3,
 };
 
+enum {
+       NVME_NSTAT_NRDY         = 1 << 0,
+};
+
 enum {
        NVME_NVM_NS_16B_GUARD   = 0,
        NVME_NVM_NS_32B_GUARD   = 1,
@@ -1583,6 +1622,7 @@ enum {
        NVME_SC_NS_WRITE_PROTECTED      = 0x20,
        NVME_SC_CMD_INTERRUPTED         = 0x21,
        NVME_SC_TRANSIENT_TR_ERR        = 0x22,
+       NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
        NVME_SC_INVALID_IO_CMD_SET      = 0x2C,
 
        NVME_SC_LBA_RANGE               = 0x80,
@@ -1679,9 +1719,11 @@ enum {
        /*
         * Path-related Errors:
         */
+       NVME_SC_INTERNAL_PATH_ERROR     = 0x300,
        NVME_SC_ANA_PERSISTENT_LOSS     = 0x301,
        NVME_SC_ANA_INACCESSIBLE        = 0x302,
        NVME_SC_ANA_TRANSITION          = 0x303,
+       NVME_SC_CTRL_PATH_ERROR         = 0x360,
        NVME_SC_HOST_PATH_ERROR         = 0x370,
        NVME_SC_HOST_ABORTED_CMD        = 0x371,
 
index 586d35720f135f62b9d7434beccacb2e22a5bd2d..b9c1474a571e1c2e9c66c82b09135584de8aa480 100644 (file)
@@ -40,6 +40,8 @@ struct unwind_hint {
 
 #ifdef CONFIG_STACK_VALIDATION
 
+#include <asm/asm.h>
+
 #ifndef __ASSEMBLY__
 
 #define UNWIND_HINT(sp_reg, sp_offset, type, end)              \
@@ -137,7 +139,7 @@ struct unwind_hint {
 
 .macro STACK_FRAME_NON_STANDARD func:req
        .pushsection .discard.func_stack_frame_non_standard, "aw"
-               .long \func - .
+       _ASM_PTR \func
        .popsection
 .endm
 
index 060e8d2031814404175c8635c9a6184ed2d7e885..1766e1de695600968b9501c7d51d6a4db00b53a3 100644 (file)
@@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size)
 
 #ifdef CONFIG_FS_POSIX_ACL
 void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                  struct inode *inode,
                                   void *value, size_t size);
 void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                  struct inode *inode,
                                 void *value, size_t size);
 #else
 static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                                struct inode *inode,
                                                 void *value, size_t size)
 {
 }
 static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                              struct inode *inode,
                                               void *value, size_t size)
 {
 }
index e7c39c200e2b0cc6c353c9d5205713964318d584..1a32036c918cd1e22e99bdbdd305769cfc74f429 100644 (file)
@@ -196,6 +196,7 @@ void synchronize_rcu_tasks_rude(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
 #define rcu_tasks_qs(t, preempt) do { } while (0)
 #define rcu_note_voluntary_context_switch(t) do { } while (0)
 #define call_rcu_tasks call_rcu
index d5e3c00b74e12546c44faad9e00f85a8501ccf98..b3278f8184d588d4232f07c85e486c89d0dc7cfa 100644 (file)
@@ -1443,6 +1443,7 @@ struct task_struct {
        int                             pagefault_disabled;
 #ifdef CONFIG_MMU
        struct task_struct              *oom_reaper_list;
+       struct timer_list               oom_reaper_timer;
 #endif
 #ifdef CONFIG_VMAP_STACK
        struct vm_struct                *stack_vm_area;
@@ -2117,6 +2118,47 @@ static inline void cond_resched_rcu(void)
 #endif
 }
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+extern bool preempt_model_none(void);
+extern bool preempt_model_voluntary(void);
+extern bool preempt_model_full(void);
+
+#else
+
+static inline bool preempt_model_none(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_NONE);
+}
+static inline bool preempt_model_voluntary(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
+}
+static inline bool preempt_model_full(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT);
+}
+
+#endif
+
+static inline bool preempt_model_rt(void)
+{
+       return IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+/*
+ * Does the preemption model allow non-cooperative preemption?
+ *
+ * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
+ * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
+ * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
+ * PREEMPT_NONE model.
+ */
+static inline bool preempt_model_preemptible(void)
+{
+       return preempt_model_full() || preempt_model_rt();
+}
+
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
index a80356e9dc69ab5723a41654e37cc869e045e319..1ad1f4bfa02556554db80ae5a493be3d08ce994e 100644 (file)
@@ -136,6 +136,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr)        (TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
                                  struct rlimit *rlim_stack);
 extern unsigned long
index 3c8b34876744bda628be4c1084e39fc06dbdb938..66b689f6cfcb01afeb02f004f2835d7a1c7f9ee8 100644 (file)
@@ -355,14 +355,23 @@ static inline void clear_notify_signal(void)
        smp_mb__after_atomic();
 }
 
+/*
+ * Returns 'true' if kick_process() is needed to force a transition from
+ * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
+ */
+static inline bool __set_notify_signal(struct task_struct *task)
+{
+       return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+              !wake_up_state(task, TASK_INTERRUPTIBLE);
+}
+
 /*
  * Called to break out of interruptible wait loops, and enter the
  * exit_to_user_mode_loop().
  */
 static inline void set_notify_signal(struct task_struct *task)
 {
-       if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
-           !wake_up_state(task, TASK_INTERRUPTIBLE))
+       if (__set_notify_signal(task))
                kick_process(task);
 }
 
index 6f85f5d957efea06c7fc19c78181feda9e57aead..17311ad9f9af247967d0704aef7f84fd2bce1a07 100644 (file)
@@ -50,6 +50,9 @@ struct linger {
 struct msghdr {
        void            *msg_name;      /* ptr to socket address structure */
        int             msg_namelen;    /* size of socket address structure */
+
+       int             msg_inq;        /* output, data left in socket */
+
        struct iov_iter msg_iter;       /* data */
 
        /*
@@ -62,8 +65,9 @@ struct msghdr {
                void __user     *msg_control_user;
        };
        bool            msg_control_is_user : 1;
-       __kernel_size_t msg_controllen; /* ancillary data buffer length */
+       bool            msg_get_inq : 1;/* return INQ after receive */
        unsigned int    msg_flags;      /* flags on received message */
+       __kernel_size_t msg_controllen; /* ancillary data buffer length */
        struct kiocb    *msg_iocb;      /* ptr to iocb for async requests */
 };
 
@@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
                         int __user *upeer_addrlen, int flags);
 extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
 extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
 extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
                              int addrlen, int file_flags);
index cb1f4351e8baa2d6ddf9c45b1d8f93c9013de0c8..e3014319d1ade73060534db1865429416dea581f 100644 (file)
@@ -47,11 +47,9 @@ struct srcu_data {
  */
 struct srcu_node {
        spinlock_t __private lock;
-       unsigned long srcu_have_cbs[4];         /* GP seq for children */
-                                               /*  having CBs, but only */
-                                               /*  is > ->srcu_gq_seq. */
-       unsigned long srcu_data_have_cbs[4];    /* Which srcu_data structs */
-                                               /*  have CBs for given GP? */
+       unsigned long srcu_have_cbs[4];         /* GP seq for children having CBs, but only */
+                                               /*  if greater than ->srcu_gq_seq. */
+       unsigned long srcu_data_have_cbs[4];    /* Which srcu_data structs have CBs for given GP? */
        unsigned long srcu_gp_seq_needed_exp;   /* Furthest future exp GP. */
        struct srcu_node *srcu_parent;          /* Next up in tree. */
        int grplo;                              /* Least CPU for node. */
@@ -62,18 +60,24 @@ struct srcu_node {
  * Per-SRCU-domain structure, similar in function to rcu_state.
  */
 struct srcu_struct {
-       struct srcu_node node[NUM_RCU_NODES];   /* Combining tree. */
+       struct srcu_node *node;                 /* Combining tree. */
        struct srcu_node *level[RCU_NUM_LVLS + 1];
                                                /* First node at each level. */
+       int srcu_size_state;                    /* Small-to-big transition state. */
        struct mutex srcu_cb_mutex;             /* Serialize CB preparation. */
-       spinlock_t __private lock;              /* Protect counters */
+       spinlock_t __private lock;              /* Protect counters and size state. */
        struct mutex srcu_gp_mutex;             /* Serialize GP work. */
        unsigned int srcu_idx;                  /* Current rdr array element. */
        unsigned long srcu_gp_seq;              /* Grace-period seq #. */
        unsigned long srcu_gp_seq_needed;       /* Latest gp_seq needed. */
        unsigned long srcu_gp_seq_needed_exp;   /* Furthest future exp GP. */
+       unsigned long srcu_gp_start;            /* Last GP start timestamp (jiffies) */
        unsigned long srcu_last_gp_end;         /* Last GP end timestamp (ns) */
+       unsigned long srcu_size_jiffies;        /* Current contention-measurement interval. */
+       unsigned long srcu_n_lock_retries;      /* Contention events in current interval. */
+       unsigned long srcu_n_exp_nodelay;       /* # expedited no-delays in current GP phase. */
        struct srcu_data __percpu *sda;         /* Per-CPU srcu_data array. */
+       bool sda_is_static;                     /* May ->sda be passed to free_percpu()? */
        unsigned long srcu_barrier_seq;         /* srcu_barrier seq #. */
        struct mutex srcu_barrier_mutex;        /* Serialize barrier ops. */
        struct completion srcu_barrier_completion;
@@ -81,10 +85,23 @@ struct srcu_struct {
        atomic_t srcu_barrier_cpu_cnt;          /* # CPUs not yet posting a */
                                                /*  callback for the barrier */
                                                /*  operation. */
+       unsigned long reschedule_jiffies;
+       unsigned long reschedule_count;
        struct delayed_work work;
        struct lockdep_map dep_map;
 };
 
+/* Values for size state variable (->srcu_size_state). */
+#define SRCU_SIZE_SMALL                0
+#define SRCU_SIZE_ALLOC                1
+#define SRCU_SIZE_WAIT_BARRIER 2
+#define SRCU_SIZE_WAIT_CALL    3
+#define SRCU_SIZE_WAIT_CBS1    4
+#define SRCU_SIZE_WAIT_CBS2    5
+#define SRCU_SIZE_WAIT_CBS3    6
+#define SRCU_SIZE_WAIT_CBS4    7
+#define SRCU_SIZE_BIG          8
+
 /* Values for state variable (bottom bits of ->srcu_gp_seq). */
 #define SRCU_STATE_IDLE                0
 #define SRCU_STATE_SCAN1       1
@@ -121,6 +138,7 @@ struct srcu_struct {
 #ifdef MODULE
 # define __DEFINE_SRCU(name, is_static)                                        \
        is_static struct srcu_struct name;                              \
+       extern struct srcu_struct * const __srcu_struct_##name;         \
        struct srcu_struct * const __srcu_struct_##name                 \
                __section("___srcu_struct_ptrs") = &name
 #else
index 3e56a9751c062804b7f5f3db993144c0d4438a65..df53bed9d71f1de74af1410515ab91104b908909 100644 (file)
@@ -180,13 +180,13 @@ extern int static_call_text_reserved(void *start, void *end);
 
 extern long __static_call_return0(void);
 
-#define __DEFINE_STATIC_CALL(name, _func, _func_init)                  \
+#define DEFINE_STATIC_CALL(name, _func)                                        \
        DECLARE_STATIC_CALL(name, _func);                               \
        struct static_call_key STATIC_CALL_KEY(name) = {                \
-               .func = _func_init,                                     \
+               .func = _func,                                          \
                .type = 1,                                              \
        };                                                              \
-       ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init)
+       ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
 
 #define DEFINE_STATIC_CALL_NULL(name, _func)                           \
        DECLARE_STATIC_CALL(name, _func);                               \
@@ -196,6 +196,14 @@ extern long __static_call_return0(void);
        };                                                              \
        ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+#define DEFINE_STATIC_CALL_RET0(name, _func)                           \
+       DECLARE_STATIC_CALL(name, _func);                               \
+       struct static_call_key STATIC_CALL_KEY(name) = {                \
+               .func = __static_call_return0,                          \
+               .type = 1,                                              \
+       };                                                              \
+       ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
+
 #define static_call_cond(name) (void)__static_call(name)
 
 #define EXPORT_STATIC_CALL(name)                                       \
@@ -217,12 +225,12 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-#define __DEFINE_STATIC_CALL(name, _func, _func_init)                  \
+#define DEFINE_STATIC_CALL(name, _func)                                        \
        DECLARE_STATIC_CALL(name, _func);                               \
        struct static_call_key STATIC_CALL_KEY(name) = {                \
-               .func = _func_init,                                     \
+               .func = _func,                                          \
        };                                                              \
-       ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init)
+       ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
 
 #define DEFINE_STATIC_CALL_NULL(name, _func)                           \
        DECLARE_STATIC_CALL(name, _func);                               \
@@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; }
        };                                                              \
        ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+#define DEFINE_STATIC_CALL_RET0(name, _func)                           \
+       DECLARE_STATIC_CALL(name, _func);                               \
+       struct static_call_key STATIC_CALL_KEY(name) = {                \
+               .func = __static_call_return0,                          \
+       };                                                              \
+       ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
 
 #define static_call_cond(name) (void)__static_call(name)
 
@@ -248,10 +262,7 @@ static inline int static_call_text_reserved(void *start, void *end)
        return 0;
 }
 
-static inline long __static_call_return0(void)
-{
-       return 0;
-}
+extern long __static_call_return0(void);
 
 #define EXPORT_STATIC_CALL(name)                                       \
        EXPORT_SYMBOL(STATIC_CALL_KEY(name));                           \
@@ -281,11 +292,14 @@ static inline long __static_call_return0(void)
                .func = _func_init,                                     \
        }
 
+#define DEFINE_STATIC_CALL(name, _func)                                        \
+       __DEFINE_STATIC_CALL(name, _func, _func)
+
 #define DEFINE_STATIC_CALL_NULL(name, _func)                           \
-       DECLARE_STATIC_CALL(name, _func);                               \
-       struct static_call_key STATIC_CALL_KEY(name) = {                \
-               .func = NULL,                                           \
-       }
+       __DEFINE_STATIC_CALL(name, _func, NULL)
+
+#define DEFINE_STATIC_CALL_RET0(name, _func)                           \
+       __DEFINE_STATIC_CALL(name, _func, __static_call_return0)
 
 static inline void __static_call_nop(void) { }
 
@@ -327,10 +341,4 @@ static inline int static_call_text_reserved(void *start, void *end)
 
 #endif /* CONFIG_HAVE_STATIC_CALL */
 
-#define DEFINE_STATIC_CALL(name, _func)                                        \
-       __DEFINE_STATIC_CALL(name, _func, _func)
-
-#define DEFINE_STATIC_CALL_RET0(name, _func)                           \
-       __DEFINE_STATIC_CALL(name, _func, __static_call_return0)
-
 #endif /* _LINUX_STATIC_CALL_H */
index 24eea1b05ca2745fc35af41f6d337fdbf1053cde..29917850f07946b57dfdbd8c5063220c547d5d5d 100644 (file)
@@ -270,5 +270,6 @@ struct plat_stmmacenet_data {
        int msi_rx_base_vec;
        int msi_tx_base_vec;
        bool use_phy_wol;
+       bool sph_disable;
 };
 #endif
index 267b7aeaf1a690f02600876d9ab1ece501b1fbfb..90501404fa49fefd578b1c9cd0c9b549032bb408 100644 (file)
@@ -160,6 +160,7 @@ struct rpc_add_xprt_test {
 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT     (1UL << 9)
 #define RPC_CLNT_CREATE_SOFTERR                (1UL << 10)
 #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
+#define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
index a5dda4987e8ba6e014a2a60d4c97b707febdea36..217711fc9cace111ace19e531537b8ab5ab3c41d 100644 (file)
@@ -395,6 +395,7 @@ struct svc_deferred_req {
        size_t                  addrlen;
        struct sockaddr_storage daddr;  /* where reply must come from */
        size_t                  daddrlen;
+       void                    *xprt_ctxt;
        struct cache_deferred_req handle;
        size_t                  xprt_hlen;
        int                     argslen;
index 45a9530d383999b01606912b0faddde7f0fe069f..522bbf937957132e121ae9e175917e34cde9bc6e 100644 (file)
@@ -144,7 +144,7 @@ struct rpc_xprt_ops {
        unsigned short  (*get_srcport)(struct rpc_xprt *xprt);
        int             (*buf_alloc)(struct rpc_task *task);
        void            (*buf_free)(struct rpc_task *task);
-       void            (*prepare_request)(struct rpc_rqst *req);
+       int             (*prepare_request)(struct rpc_rqst *req);
        int             (*send_request)(struct rpc_rqst *req);
        void            (*wait_for_reply_request)(struct rpc_task *task);
        void            (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
@@ -358,10 +358,9 @@ int                        xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_free_slot(struct rpc_xprt *xprt,
                                       struct rpc_rqst *req);
-void                   xprt_request_prepare(struct rpc_rqst *req);
 bool                   xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_request_enqueue_transmit(struct rpc_task *task);
-void                   xprt_request_enqueue_receive(struct rpc_task *task);
+int                    xprt_request_enqueue_receive(struct rpc_task *task);
 void                   xprt_request_wait_receive(struct rpc_task *task);
 void                   xprt_request_dequeue_xprt(struct rpc_task *task);
 bool                   xprt_request_need_retransmit(struct rpc_task *task);
index a4b1af581f69ec055fbc0828765fad10bfd62a8e..248f4ac9564258524cd2ea897252023718c415fe 100644 (file)
@@ -59,6 +59,15 @@ struct crc64_pi_tuple {
        __u8   ref_tag[6];
 };
 
+/**
+ * lower_48_bits() - return bits 0-47 of a number
+ * @n: the number we're accessing
+ */
+static inline u64 lower_48_bits(u64 n)
+{
+       return n & ((1ull << 48) - 1);
+}
+
 static inline u64 ext_pi_ref_tag(struct request *rq)
 {
        unsigned int shift = ilog2(queue_logical_block_size(rq->q));
index 897494b597ba4f488784fd847c1a68752f3a49aa..795ef5a6842946fcf22d96e6e8e6100a64c7afd8 100644 (file)
@@ -17,6 +17,7 @@ enum task_work_notify_mode {
        TWA_NONE,
        TWA_RESUME,
        TWA_SIGNAL,
+       TWA_SIGNAL_NO_IPI,
 };
 
 static inline bool task_work_pending(struct task_struct *task)
index 78a98bdff76d6a7e9f3ef630e90c2a5176ace9e7..fe1e467ba046f86b3222b1ceed41b6f2b7f85f97 100644 (file)
@@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void)
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
 extern u64 ktime_get_boot_fast_ns(void);
+extern u64 ktime_get_tai_fast_ns(void);
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
index fda13c9d1256c35f59b45f1f825e13d3ea494adc..648f00105f588dac3e713aab419c21ac8e056e99 100644 (file)
@@ -196,14 +196,6 @@ extern void init_timers(void);
 struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-struct ctl_table;
-
-extern unsigned int sysctl_timer_migration;
-int timer_migration_handler(struct ctl_table *table, int write,
-                           void *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
 unsigned long __round_jiffies(unsigned long j, int cpu);
 unsigned long __round_jiffies_relative(unsigned long j, int cpu);
 unsigned long round_jiffies(unsigned long j);
index 059b18eb1f1fab1c3af72b9369273a2782ba641d..5745c90c880054ac2a734d43142f8aa4d81431e5 100644 (file)
@@ -75,7 +75,7 @@
  * By default we use get_cycles() for this purpose, but individual
  * architectures may override this in their asm/timex.h header file.
  */
-#define random_get_entropy()   get_cycles()
+#define random_get_entropy()   ((unsigned long)get_cycles())
 #endif
 
 /*
index 63fa4196e51cb2522704ec1f542817e8eb29d0f0..7038104463e481486424da66e909bcaf0604be2e 100644 (file)
@@ -118,7 +118,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
        _torture_stop_kthread("Stopping " #n " task", &(tp))
 
 #ifdef CONFIG_PREEMPTION
-#define torture_preempt_schedule() preempt_schedule()
+#define torture_preempt_schedule() __preempt_schedule()
 #else
 #define torture_preempt_schedule()     do { } while (0)
 #endif
index 033fe3e1714112da0f802fed57bf175ef4558cc1..7c25b88d79f9002353308c296161b62c18309554 100644 (file)
@@ -15,7 +15,7 @@
 #define BDO_MODE_CARRIER2      (5 << 28)
 #define BDO_MODE_CARRIER3      (6 << 28)
 #define BDO_MODE_EYE           (7 << 28)
-#define BDO_MODE_TESTDATA      (8 << 28)
+#define BDO_MODE_TESTDATA      (8U << 28)
 
 #define BDO_MODE_MASK(mode)    ((mode) & 0xf0000000)
 
index 74a4a0f17b28bd8768e51bfd4f5ce5eed50a4ed3..48f2dd3c568c8334c8591c9dde23d3f4704949f8 100644 (file)
@@ -133,6 +133,8 @@ struct vfio_pci_core_device {
        struct mutex            ioeventfds_lock;
        struct list_head        ioeventfds_list;
        struct vfio_pci_vf_token        *vf_token;
+       struct list_head                sriov_pfs_item;
+       struct vfio_pci_core_device     *sriov_pf_core_dev;
        struct notifier_block   nb;
        struct mutex            vma_lock;
        struct list_head        vma_list;
index dafdc7f48c01b08a84574543af5ff9613b6ab57c..b341dd62aa4da9843f2af1d3132c7132dcc48341 100644 (file)
@@ -23,8 +23,6 @@ struct virtio_shm_region {
  *       any of @get/@set, @get_status/@set_status, or @get_features/
  *       @finalize_features are NOT safe to be called from an atomic
  *       context.
- * @enable_cbs: enable the callbacks
- *      vdev: the virtio_device
  * @get: read the value of a configuration field
  *     vdev: the virtio_device
  *     offset: the offset of the configuration field
@@ -78,7 +76,6 @@ struct virtio_shm_region {
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
-       void (*enable_cbs)(struct virtio_device *vdev);
        void (*get)(struct virtio_device *vdev, unsigned offset,
                    void *buf, unsigned len);
        void (*set)(struct virtio_device *vdev, unsigned offset,
@@ -233,9 +230,6 @@ void virtio_device_ready(struct virtio_device *dev)
 {
        unsigned status = dev->config->get_status(dev);
 
-       if (dev->config->enable_cbs)
-                  dev->config->enable_cbs(dev);
-
        BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
        dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
 }
index 3b1df7da402d60cad6cfb5ac7c701058d60e2686..b159c278996128036f30b2bcbe8a51a3fa6aac3d 100644 (file)
@@ -26,7 +26,7 @@ struct notifier_block;                /* in notifier.h */
 #define VM_KASAN               0x00000080      /* has allocated kasan shadow memory */
 #define VM_FLUSH_RESET_PERMS   0x00000100      /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
 #define VM_MAP_PUT_PAGES       0x00000200      /* put pages and free array in vfree */
-#define VM_NO_HUGE_VMAP                0x00000400      /* force PAGE_SIZE pte mapping */
+#define VM_ALLOW_HUGE_VMAP     0x00000400      /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */
 
 #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
        !defined(CONFIG_KASAN_VMALLOC)
@@ -153,7 +153,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
                        const void *caller) __alloc_size(1);
 void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
                int node, const void *caller) __alloc_size(1);
-void *vmalloc_no_huge(unsigned long size) __alloc_size(1);
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
 
 extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
 extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
index 7c93f5177532f1876f918f1088737feed79ae319..9c0ad64b8d292d49746c542e0c1b6bb261475e32 100644 (file)
@@ -72,6 +72,7 @@ struct rpcif {
        enum rpcif_type type;
        enum rpcif_data_dir dir;
        u8 bus_size;
+       u8 xfer_size;
        void *buffer;
        u32 xferlen;
        u32 smcr;
index 5cb095b09a9407bc8e23f4979a0dca68f12b94bc..69ef31cea58223317e1b6fc89a036466a7ca92b0 100644 (file)
@@ -578,6 +578,7 @@ enum {
 #define HCI_ERROR_CONNECTION_TIMEOUT   0x08
 #define HCI_ERROR_REJ_LIMITED_RESOURCES        0x0d
 #define HCI_ERROR_REJ_BAD_ADDR         0x0f
+#define HCI_ERROR_INVALID_PARAMETERS   0x12
 #define HCI_ERROR_REMOTE_USER_TERM     0x13
 #define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14
 #define HCI_ERROR_REMOTE_POWER_OFF     0x15
index d5377740e99cf0eac12b2cbaa0a76db951e7e7d4..62d7b81b1cb74621d70a21f8295f57227b388dfe 100644 (file)
@@ -36,6 +36,9 @@
 /* HCI priority */
 #define HCI_PRIO_MAX   7
 
+/* HCI maximum id value */
+#define HCI_MAX_ID 10000
+
 /* HCI Core structures */
 struct inquiry_data {
        bdaddr_t        bdaddr;
@@ -1156,7 +1159,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
 
 void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
 
-void hci_le_conn_failed(struct hci_conn *conn, u8 status);
+void hci_conn_failed(struct hci_conn *conn, u8 status);
 
 /*
  * hci_conn_get() and hci_conn_put() are used to control the life-time of an
index 90cd02ff77ef67f7f65e2c53127c4510c23bd4a9..9c5637d41d95168052686caf7b3ff51b517e6b9b 100644 (file)
@@ -4,8 +4,6 @@
 
 #include <linux/skbuff.h>
 
-#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
-
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
index aa33e1092e2c4d6fb85334eb0477a0f5559a5807..9f65f1bfbd24646c4d9a37db85360126d038fc42 100644 (file)
@@ -59,6 +59,8 @@ struct flow_dissector_key_vlan {
                __be16  vlan_tci;
        };
        __be16  vlan_tpid;
+       __be16  vlan_eth_type;
+       u16     padding;
 };
 
 struct flow_dissector_mpls_lse {
index f72ec113ae568a93b31d0fa415f0e94a26fe94d5..98e1ec1a14f0382d1f4f8e85fe5ac2a056d2d6bc 100644 (file)
@@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
 }
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
-                       struct sock *sk, u32 port_offset,
+                       struct sock *sk, u64 port_offset,
                        int (*check_established)(struct inet_timewait_death_row *,
                                                 struct sock *, __u16,
                                                 struct inet_timewait_sock **));
index 463ae5d33eb09c40caeb4d039af268609b5e563b..5b47545f22d39eb2dd9725ac37bd7d7a9016a03c 100644 (file)
@@ -71,7 +71,6 @@ struct inet_timewait_sock {
                                tw_tos          : 8;
        u32                     tw_txhash;
        u32                     tw_priority;
-       u32                     tw_bslot; /* bind bucket slot */
        struct timer_list       tw_timer;
        struct inet_bind_bucket *tw_tb;
 };
@@ -110,6 +109,8 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
 
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
+
 static inline
 struct net *twsk_net(const struct inet_timewait_sock *twsk)
 {
index 3984f2c39c4ba8b4d2a4e4dab6d743f0c9faf798..0161137914cf9d2111fde635b00604a5cf3d768c 100644 (file)
@@ -56,6 +56,7 @@ struct inet_skb_parm {
 #define IPSKB_DOREDIRECT       BIT(5)
 #define IPSKB_FRAG_PMTU                BIT(6)
 #define IPSKB_L3SLAVE          BIT(7)
+#define IPSKB_NOPOLICY         BIT(8)
 
        u16                     frag_max_size;
 };
index a38c4f1e4e5c641dcede4d7fedfcdbfadbac430e..74b369bddf49e82aaa722d412ac407e7f0c29224 100644 (file)
@@ -58,7 +58,7 @@ struct ip6_tnl {
 
        /* These fields used only by GRE */
        __u32 i_seqno;  /* The last seen seqno  */
-       __u32 o_seqno;  /* The last output seqno */
+       atomic_t o_seqno;       /* The last output seqno */
        int hlen;       /* tun_hlen + encap_hlen */
        int tun_hlen;   /* Precalculated header length */
        int encap_hlen; /* Encap header length (FOU,GUE) */
index 0219fe907b261952ec1f140d105cd74d7eda6b40..c24fa934221dde1c59ae6519cee783233d19af48 100644 (file)
@@ -116,7 +116,7 @@ struct ip_tunnel {
 
        /* These four fields used only by GRE */
        u32             i_seqno;        /* The last seen seqno  */
-       u32             o_seqno;        /* The last output seqno */
+       atomic_t        o_seqno;        /* The last output seqno */
        int             tun_hlen;       /* Precalculated header length */
 
        /* These four fields used only by ERSPAN */
@@ -243,11 +243,18 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
                                       int proto,
                                       __be32 daddr, __be32 saddr,
-                                      __be32 key, __u8 tos, int oif,
+                                      __be32 key, __u8 tos,
+                                      struct net *net, int oif,
                                       __u32 mark, __u32 tun_inner_hash)
 {
        memset(fl4, 0, sizeof(*fl4));
-       fl4->flowi4_oif = oif;
+
+       if (oif) {
+               fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif);
+               /* Legacy VRF/l3mdev use case */
+               fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif;
+       }
+
        fl4->daddr = daddr;
        fl4->saddr = saddr;
        fl4->flowi4_tos = tos;
index d37268fe6825d3cad0e413463b524a8fca322c6e..82800d521c3de186bf04a0e6f7eb4b29735a074e 100644 (file)
@@ -36,8 +36,6 @@ struct mctp_hdr {
 #define MCTP_HDR_TAG_SHIFT     0
 #define MCTP_HDR_TAG_MASK      GENMASK(2, 0)
 
-#define MCTP_HEADER_MAXLEN     4
-
 #define MCTP_INITIAL_DEFAULT_NET       1
 
 static inline bool mctp_address_unicast(mctp_eid_t eid)
index 3d83b64471d32391fb632e8c25e12a8ec7d1b42e..b4af4837d80b4ed47d05474432d5b8ebb42322e7 100644 (file)
@@ -75,8 +75,8 @@ struct netns_ipv6 {
        struct list_head        fib6_walkers;
        rwlock_t                fib6_walker_lock;
        spinlock_t              fib6_gc_lock;
-       unsigned int             ip6_rt_gc_expire;
-       unsigned long            ip6_rt_last_gc;
+       atomic_t                ip6_rt_gc_expire;
+       unsigned long           ip6_rt_last_gc;
        unsigned char           flowlabel_has_excl;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        bool                    fib6_has_custom_rules;
index d7d2495f83c27cc6707fed26f8e433dd6d1eb295..dac91aa38c5af389648e84971b0ad17947ef844c 100644 (file)
@@ -4,8 +4,8 @@
 
 #include <linux/types.h>
 
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
                               __be16 dport);
 u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
                   __be16 sport, __be16 dport);
index 748cf87a4d7ea5c92b4fd48dd3302b8ad64944fe..3e02709a1df656931942be4851a115dd6bef8b4c 100644 (file)
@@ -14,6 +14,7 @@ struct tcf_pedit {
        struct tc_action        common;
        unsigned char           tcfp_nkeys;
        unsigned char           tcfp_flags;
+       u32                     tcfp_off_max_hint;
        struct tc_pedit_key     *tcfp_keys;
        struct tcf_pedit_key_ex *tcfp_keys_ex;
 };
index 70ca4a5e330a2002acffd7ddd4f685a758c7fbc4..cc1295037533a7741e454f7c040f77a21deae02b 100644 (file)
@@ -480,6 +480,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
                      u32 cookie);
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
 struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+                                           const struct tcp_request_sock_ops *af_ops,
                                            struct sock *sk, struct sk_buff *skb);
 #ifdef CONFIG_SYN_COOKIES
 
@@ -620,6 +621,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
 void tcp_reset(struct sock *sk, struct sk_buff *skb);
 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
 void tcp_fin(struct sock *sk);
+void tcp_check_space(struct sock *sk);
 
 /* tcp_timer.c */
 void tcp_init_xmit_timers(struct sock *);
@@ -1042,6 +1044,7 @@ struct rate_sample {
        int  losses;            /* number of packets marked lost upon ACK */
        u32  acked_sacked;      /* number of packets newly (S)ACKed upon ACK */
        u32  prior_in_flight;   /* in flight before this ACK */
+       u32  last_end_seq;      /* end_seq of most recently ACKed packet */
        bool is_app_limited;    /* is sample from packet with bubble in pipe? */
        bool is_retrans;        /* is sample from retransmission? */
        bool is_ack_delayed;    /* is this (likely) a delayed ACK? */
@@ -1164,6 +1167,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
                  bool is_sack_reneg, struct rate_sample *rs);
 void tcp_rate_check_app_limited(struct sock *sk);
 
+static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
+{
+       return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+}
+
 /* These functions determine how the current flow behaves in respect of SACK
  * handling. SACK is negotiated with the peer, and therefore it can vary
  * between different flows.
index 6fb899ff5afce9d73c7f02748bd14ffc4cdebcfe..d2efddce65d46b10f9c0983e4d597eb1685ba47f 100644 (file)
@@ -1093,6 +1093,18 @@ static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
        return false;
 }
 
+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
+                                            int dir, unsigned short family)
+{
+       if (dir != XFRM_POLICY_OUT && family == AF_INET) {
+               /* same dst may be used for traffic originating from
+                * devices with different policy settings.
+                */
+               return IPCB(skb)->flags & IPSKB_NOPOLICY;
+       }
+       return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
+}
+
 static inline int __xfrm_policy_check2(struct sock *sk, int dir,
                                       struct sk_buff *skb,
                                       unsigned int family, int reverse)
@@ -1104,7 +1116,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
                return __xfrm_policy_check(sk, ndir, skb, family);
 
        return __xfrm_check_nopolicy(net, skb, dir) ||
-              (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+              __xfrm_check_dev_nopolicy(skb, dir, family) ||
               __xfrm_policy_check(sk, ndir, skb, family);
 }
 
index 5554ee75e7da8bc5f14a6498d937545b819f080a..647722e847b4131bd3cde64e15b2a834ef04a664 100644 (file)
@@ -97,6 +97,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
                  u16 queue_id, u16 flags);
 int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
                         struct net_device *dev, u16 queue_id);
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs);
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_get_pool(struct xsk_buff_pool *pool);
 bool xp_put_pool(struct xsk_buff_pool *pool);
index e76c94697c1bc557af1f8351e26d3128fcdd3bfc..d0a24779c52dc44cb14c4fea56f8243e1b0f22a0 100644 (file)
@@ -53,8 +53,10 @@ enum {
 
 #define ISID_SIZE                      6
 
-/* Connection suspend "bit" */
-#define ISCSI_SUSPEND_BIT              1
+/* Connection flags */
+#define ISCSI_CONN_FLAG_SUSPEND_TX     BIT(0)
+#define ISCSI_CONN_FLAG_SUSPEND_RX     BIT(1)
+#define ISCSI_CONN_FLAG_BOUND          BIT(2)
 
 #define ISCSI_ITT_MASK                 0x1fff
 #define ISCSI_TOTAL_CMDS_MAX           4096
@@ -211,8 +213,7 @@ struct iscsi_conn {
        struct list_head        cmdqueue;       /* data-path cmd queue */
        struct list_head        requeue;        /* tasks needing another run */
        struct work_struct      xmitwork;       /* per-conn. xmit workqueue */
-       unsigned long           suspend_tx;     /* suspend Tx */
-       unsigned long           suspend_rx;     /* suspend Rx */
+       unsigned long           flags;          /* ISCSI_CONN_FLAGs */
 
        /* negotiated params */
        unsigned                max_recv_dlength; /* initiator_max_recv_dsl*/
index 38e4a67f5922ce4321050a36da21a0419a312419..9acb8422f68024cee9e70946b012541a460b38aa 100644 (file)
@@ -211,6 +211,8 @@ struct iscsi_cls_conn {
        struct mutex ep_mutex;
        struct iscsi_endpoint *ep;
 
+       /* Used when accessing flags and queueing work. */
+       spinlock_t lock;
        unsigned long flags;
        struct work_struct cleanup_work;
 
@@ -295,7 +297,7 @@ extern void iscsi_host_for_each_session(struct Scsi_Host *shost,
 struct iscsi_endpoint {
        void *dd_data;                  /* LLD private data */
        struct device dev;
-       uint64_t id;
+       int id;
        struct iscsi_cls_conn *conn;
 };
 
index 7b2bf9b1fe697e8d9670e1f25eb07485271598ca..de26c992f82146cd4ae7ca8b5fde03563fbc70a2 100644 (file)
@@ -681,7 +681,6 @@ struct ocelot_vcap_id {
 
 struct ocelot_vcap_filter {
        struct list_head list;
-       struct list_head trap_list;
 
        enum ocelot_vcap_filter_type type;
        int block_id;
@@ -695,6 +694,7 @@ struct ocelot_vcap_filter {
        struct ocelot_vcap_stats stats;
        /* For VCAP IS1 and IS2 */
        bool take_ts;
+       bool is_trap;
        unsigned long ingress_port_mask;
        /* For VCAP ES0 */
        struct ocelot_vcap_port ingress_port;
index b7e9b58d3c78807271d48abe1719096936a64838..6d4cc49584c6372a127525a9b47710d4bcd64d8b 100644 (file)
@@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card);
 void snd_card_disconnect_sync(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
+int snd_card_free_on_error(struct device *dev, int ret);
 void snd_card_set_id(struct snd_card *card, const char *id);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
index 653dfffb3ac845400c19b9f57243af70aae4437f..8d79cebf95f328357cd83598374d29a85603df9b 100644 (file)
@@ -51,6 +51,11 @@ struct snd_dma_device {
 #define SNDRV_DMA_TYPE_DEV_SG  SNDRV_DMA_TYPE_DEV /* no SG-buf support */
 #define SNDRV_DMA_TYPE_DEV_WC_SG       SNDRV_DMA_TYPE_DEV_WC
 #endif
+/* fallback types, don't use those directly */
+#ifdef CONFIG_SND_DMA_SGBUF
+#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK         10
+#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK      11
+#endif
 
 /*
  * info for buffer allocation
index a52080407b98c539760f3823cc2a3c0f1765de90..766dc6f009c0b7e2bc02bfde8dcf2de5048a1c3b 100644 (file)
@@ -179,6 +179,10 @@ struct snd_soc_component_driver {
                                  struct snd_pcm_hw_params *params);
        bool use_dai_pcm_id;    /* use DAI link PCM ID as PCM device number */
        int be_pcm_base;        /* base device ID for all BE PCMs */
+
+#ifdef CONFIG_DEBUG_FS
+       const char *debugfs_prefix;
+#endif
 };
 
 struct snd_soc_component {
index 675f3a1fe613944464ab5c14fe841d590f91f17e..773963a1e0b5362d4e5b4423f8534c23dc6d7957 100644 (file)
@@ -14,7 +14,7 @@
 #define TRANSPORT_FLAG_PASSTHROUGH_ALUA                0x2
 #define TRANSPORT_FLAG_PASSTHROUGH_PGR          0x4
 
-struct request_queue;
+struct block_device;
 struct scatterlist;
 
 struct target_backend_ops {
@@ -117,7 +117,7 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 bool target_sense_desc_format(struct se_device *dev);
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-                                      struct request_queue *q);
+                                      struct block_device *bdev);
 
 static inline bool target_dev_configured(struct se_device *se_dev)
 {
index cddf5b6fbeb452183d1d9ff5d497e31dd9334a9b..66fcc5a1a5b1c0321414418031a7894f543cdef4 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/tracepoint.h>
 #include <uapi/linux/io_uring.h>
+#include <linux/io_uring.h>
 
 struct io_wq_work;
 
@@ -147,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work,
        TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
                unsigned int flags, struct io_wq_work *work, int rw),
 
-       TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
+       TP_ARGS(ctx, req, user_data, opcode, flags, work, rw),
 
        TP_STRUCT__entry (
                __field(  void *,                       ctx             )
@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
                __entry->rw             = rw;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode),
                __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
 );
 
@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
                __entry->opcode = opcode;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
-               __entry->ctx, __entry->req, __entry->data, __entry->opcode)
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
+               __entry->ctx, __entry->req, __entry->data,
+               io_uring_get_opcode(__entry->opcode))
 );
 
 /**
@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
                __entry->link           = link;
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
-               __entry->link)
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode), __entry->link)
 );
 
 /**
@@ -318,13 +321,16 @@ TRACE_EVENT(io_uring_fail_link,
  * @user_data:         user data associated with the request
  * @res:               result of the request
  * @cflags:            completion flags
+ * @extra1:            extra 64-bit data for CQE32
+ * @extra2:            extra 64-bit data for CQE32
  *
  */
 TRACE_EVENT(io_uring_complete,
 
-       TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
+       TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
+                u64 extra1, u64 extra2),
 
-       TP_ARGS(ctx, req, user_data, res, cflags),
+       TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
 
        TP_STRUCT__entry (
                __field(  void *,       ctx             )
@@ -332,6 +338,8 @@ TRACE_EVENT(io_uring_complete,
                __field(  u64,          user_data       )
                __field(  int,          res             )
                __field(  unsigned,     cflags          )
+               __field(  u64,          extra1          )
+               __field(  u64,          extra2          )
        ),
 
        TP_fast_assign(
@@ -340,12 +348,17 @@ TRACE_EVENT(io_uring_complete,
                __entry->user_data      = user_data;
                __entry->res            = res;
                __entry->cflags         = cflags;
+               __entry->extra1         = extra1;
+               __entry->extra2         = extra2;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
+       TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
+                 "extra1 %llu extra2 %llu ",
                __entry->ctx, __entry->req,
                __entry->user_data,
-               __entry->res, __entry->cflags)
+               __entry->res, __entry->cflags,
+               (unsigned long long) __entry->extra1,
+               (unsigned long long) __entry->extra2)
 );
 
 /**
@@ -389,9 +402,9 @@ TRACE_EVENT(io_uring_submit_sqe,
                __entry->sq_thread      = sq_thread;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
                  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
-                 __entry->user_data, __entry->opcode,
+                 __entry->user_data, io_uring_get_opcode(__entry->opcode),
                  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
 );
 
@@ -433,8 +446,9 @@ TRACE_EVENT(io_uring_poll_arm,
                __entry->events         = events;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
-                 __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
+                 __entry->ctx, __entry->req, __entry->user_data,
+                 io_uring_get_opcode(__entry->opcode),
                  __entry->mask, __entry->events)
 );
 
@@ -470,8 +484,9 @@ TRACE_EVENT(io_uring_task_add,
                __entry->mask           = mask;
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
-               __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
+               __entry->ctx, __entry->req, __entry->user_data,
+               io_uring_get_opcode(__entry->opcode),
                __entry->mask)
 );
 
@@ -506,7 +521,7 @@ TRACE_EVENT(io_uring_req_failed,
                __field( u16,                   personality     )
                __field( u32,                   file_index      )
                __field( u64,                   pad1            )
-               __field( u64,                   pad2            )
+               __field( u64,                   addr3           )
                __field( int,                   error           )
        ),
 
@@ -520,27 +535,69 @@ TRACE_EVENT(io_uring_req_failed,
                __entry->off            = sqe->off;
                __entry->addr           = sqe->addr;
                __entry->len            = sqe->len;
-               __entry->op_flags       = sqe->rw_flags;
+               __entry->op_flags       = sqe->poll32_events;
                __entry->buf_index      = sqe->buf_index;
                __entry->personality    = sqe->personality;
                __entry->file_index     = sqe->file_index;
                __entry->pad1           = sqe->__pad2[0];
-               __entry->pad2           = sqe->__pad2[1];
+               __entry->addr3          = sqe->addr3;
                __entry->error          = error;
        ),
 
        TP_printk("ring %p, req %p, user_data 0x%llx, "
-               "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
+                 "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
                  "len=%u, rw_flags=0x%x, buf_index=%d, "
-                 "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+                 "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
+                 "error=%d",
                  __entry->ctx, __entry->req, __entry->user_data,
-                 __entry->opcode, __entry->flags, __entry->ioprio,
+                 io_uring_get_opcode(__entry->opcode),
+                 __entry->flags, __entry->ioprio,
                  (unsigned long long)__entry->off,
                  (unsigned long long) __entry->addr, __entry->len,
                  __entry->op_flags,
                  __entry->buf_index, __entry->personality, __entry->file_index,
                  (unsigned long long) __entry->pad1,
-                 (unsigned long long) __entry->pad2, __entry->error)
+                 (unsigned long long) __entry->addr3, __entry->error)
+);
+
+
+/*
+ * io_uring_cqe_overflow - a CQE overflowed
+ *
+ * @ctx:               pointer to a ring context structure
+ * @user_data:         user data associated with the request
+ * @res:               CQE result
+ * @cflags:            CQE flags
+ * @ocqe:              pointer to the overflow cqe (if available)
+ *
+ */
+TRACE_EVENT(io_uring_cqe_overflow,
+
+       TP_PROTO(void *ctx, unsigned long long user_data, s32 res, u32 cflags,
+                void *ocqe),
+
+       TP_ARGS(ctx, user_data, res, cflags, ocqe),
+
+       TP_STRUCT__entry (
+               __field(  void *,               ctx             )
+               __field(  unsigned long long,   user_data       )
+               __field(  s32,                  res             )
+               __field(  u32,                  cflags          )
+               __field(  void *,               ocqe            )
+       ),
+
+       TP_fast_assign(
+               __entry->ctx            = ctx;
+               __entry->user_data      = user_data;
+               __entry->res            = res;
+               __entry->cflags         = cflags;
+               __entry->ocqe           = ocqe;
+       ),
+
+       TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, "
+                 "overflow_cqe %p",
+                 __entry->ctx, __entry->user_data, __entry->res,
+                 __entry->cflags, __entry->ocqe)
 );
 
 #endif /* _TRACE_IO_URING_H */
index 65e7867563214270ec80bdbad1da5d8a824a4f79..fbb99a61f714cbebb91ba9280ce44f812ece32de 100644 (file)
@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
 TRACE_EVENT(sched_switch,
 
        TP_PROTO(bool preempt,
-                unsigned int prev_state,
                 struct task_struct *prev,
-                struct task_struct *next),
+                struct task_struct *next,
+                unsigned int prev_state),
 
-       TP_ARGS(preempt, prev_state, prev, next),
+       TP_ARGS(preempt, prev, next, prev_state),
 
        TP_STRUCT__entry(
                __array(        char,   prev_comm,      TASK_COMM_LEN   )
index 0f34f13ebd55853adbe773640564badcbcf8b5a7..3995c58a1c514ccafdb2049cdc0b25566a8f227a 100644 (file)
@@ -1004,7 +1004,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect);
 DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto);
 DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done);
 DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force);
-DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup);
 DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy);
 
 DECLARE_EVENT_CLASS(rpc_xprt_event,
@@ -2016,17 +2015,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
        TP_STRUCT__entry(
                __field(const void *, dr)
                __field(u32, xid)
-               __string(addr, dr->xprt->xpt_remotebuf)
+               __array(__u8, addr, INET6_ADDRSTRLEN + 10)
        ),
 
        TP_fast_assign(
                __entry->dr = dr;
                __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
                                                       (dr->xprt_hlen>>2)));
-               __assign_str(addr, dr->xprt->xpt_remotebuf);
+               snprintf(__entry->addr, sizeof(__entry->addr) - 1,
+                        "%pISpc", (struct sockaddr *)&dr->addr);
        ),
 
-       TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
+       TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr,
                __entry->xid)
 );
 
index 6ad031c71be748528a4cc28d6324a4c8353de161..2e713a7d9aa3a45c11c551d038d8ea4d087017d6 100644 (file)
@@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init,
  * timer_start - called when the timer is started
  * @timer:     pointer to struct timer_list
  * @expires:   the timers expiry time
+ * @flags:     the timers flags
  */
 TRACE_EVENT(timer_start,
 
@@ -84,6 +85,7 @@ TRACE_EVENT(timer_start,
 /**
  * timer_expire_entry - called immediately before the timer callback
  * @timer:     pointer to struct timer_list
+ * @baseclk:   value of timer_base::clk when timer expires
  *
  * Allows to determine the timer latency.
  */
@@ -190,7 +192,8 @@ TRACE_EVENT(hrtimer_init,
 
 /**
  * hrtimer_start - called when the hrtimer is started
- * @hrtimer: pointer to struct hrtimer
+ * @hrtimer:   pointer to struct hrtimer
+ * @mode:      the hrtimers mode
  */
 TRACE_EVENT(hrtimer_start,
 
index 804ff8d98f710c873c4876fcf3d6878cec4728f5..011e594e4a0da66f652dc77c8a468aa815591c65 100644 (file)
 #define CDROMREADALL           0x5318  /* read all 2646 bytes */
 
 /* 
- * These ioctls are (now) only in ide-cd.c for controlling 
+ * These ioctls were only in (now removed) ide-cd.c for controlling
  * drive spindown time.  They should be implemented in the
  * Uniform driver, via generic packet commands, GPCMD_MODE_SELECT_10,
  * GPCMD_MODE_SENSE_10 and the GPMODE_POWER_PAGE...
index 8e4a2ca0bcbf70abfa3735982bc2fd9f87508fb2..b1523cb8ab307d7454160a5d5b3f1d1c0fc1b596 100644 (file)
@@ -92,7 +92,7 @@ struct dma_buf_sync {
  * between them in actual uapi, they're just different numbers.
  */
 #define DMA_BUF_SET_NAME       _IOW(DMA_BUF_BASE, 1, const char *)
-#define DMA_BUF_SET_NAME_A     _IOW(DMA_BUF_BASE, 1, u32)
-#define DMA_BUF_SET_NAME_B     _IOW(DMA_BUF_BASE, 1, u64)
+#define DMA_BUF_SET_NAME_A     _IOW(DMA_BUF_BASE, 1, __u32)
+#define DMA_BUF_SET_NAME_B     _IOW(DMA_BUF_BASE, 1, __u64)
 
 #endif
index 787c657bfae8cd92509529aeb641e042ac7c6070..7ce993e6786ccae99474795844a01cc4e6b0ae8d 100644 (file)
@@ -42,7 +42,7 @@ typedef __s64 Elf64_Sxword;
 
 
 /* ARM MTE memory tag segment type */
-#define PT_ARM_MEMTAG_MTE      (PT_LOPROC + 0x1)
+#define PT_AARCH64_MEMTAG_MTE  (PT_LOPROC + 0x2)
 
 /*
  * Extended Numbering
index 4c14e8be7267761bf3e9e3aa76a1f1ebe4c11bbc..3a49913d006c9bf6e502ea209336d24d2924effd 100644 (file)
@@ -182,7 +182,7 @@ struct fb_fix_screeninfo {
  *
  * For pseudocolor: offset and length should be the same for all color
  * components. Offset specifies the position of the least significant bit
- * of the pallette index in a pixel value. Length indicates the number
+ * of the palette index in a pixel value. Length indicates the number
  * of available palette entries (i.e. # of entries = 1 << length).
  */
 struct fb_bitfield {
index 7989d9483ea75e2bbaaf78c1fd3d3bca741678ff..dff8e7f17074851211eba754d7eeb19da806931b 100644 (file)
 /* Select an area of screen to be copied */
 #define KEY_SELECTIVE_SCREENSHOT       0x27a
 
+/* Move the focus to the next or previous user controllable element within a UI container */
+#define KEY_NEXT_ELEMENT               0x27b
+#define KEY_PREVIOUS_ELEMENT           0x27c
+
+/* Toggle Autopilot engagement */
+#define KEY_AUTOPILOT_ENGAGE_TOGGLE    0x27d
+
+/* Shortcut Keys */
+#define KEY_MARK_WAYPOINT              0x27e
+#define KEY_SOS                                0x27f
+#define KEY_NAV_CHART                  0x280
+#define KEY_FISHING_CHART              0x281
+#define KEY_SINGLE_RANGE_RADAR         0x282
+#define KEY_DUAL_RANGE_RADAR           0x283
+#define KEY_RADAR_OVERLAY              0x284
+#define KEY_TRADITIONAL_SONAR          0x285
+#define KEY_CLEARVU_SONAR              0x286
+#define KEY_SIDEVU_SONAR               0x287
+#define KEY_NAV_INFO                   0x288
+#define KEY_BRIGHTNESS_MENU            0x289
+
 /*
  * Some keyboards have keys which do not have a defined meaning, these keys
  * are intended to be programmed / bound to macros by the user. For most
index 784adc6f6ed2e6ff47c8c02a754067bf7af4b74a..53e7dae92e42e4d9a373d2ef61ab925f0a8d64d8 100644 (file)
@@ -22,6 +22,7 @@ struct io_uring_sqe {
        union {
                __u64   off;    /* offset into file */
                __u64   addr2;
+               __u32   cmd_op;
        };
        union {
                __u64   addr;   /* pointer to buffer or iovecs */
@@ -45,6 +46,7 @@ struct io_uring_sqe {
                __u32           rename_flags;
                __u32           unlink_flags;
                __u32           hardlink_flags;
+               __u32           xattr_flags;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        /* pack this to avoid bogus arm OABI complaints */
@@ -60,9 +62,28 @@ struct io_uring_sqe {
                __s32   splice_fd_in;
                __u32   file_index;
        };
-       __u64   __pad2[2];
+       union {
+               struct {
+                       __u64   addr3;
+                       __u64   __pad2[1];
+               };
+               /*
+                * If the ring is initialized with IORING_SETUP_SQE128, then
+                * this field is used for 80 bytes of arbitrary command data
+                */
+               __u8    cmd[0];
+       };
 };
 
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC                (~0U)
+
 enum {
        IOSQE_FIXED_FILE_BIT,
        IOSQE_IO_DRAIN_BIT,
@@ -102,8 +123,25 @@ enum {
 #define IORING_SETUP_ATTACH_WQ (1U << 5)       /* attach to existing wq */
 #define IORING_SETUP_R_DISABLED        (1U << 6)       /* start with ring disabled */
 #define IORING_SETUP_SUBMIT_ALL        (1U << 7)       /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN      (1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG      (1U << 9)
 
-enum {
+#define IORING_SETUP_SQE128            (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32             (1U << 11) /* CQEs are 32 byte */
+
+enum io_uring_op {
        IORING_OP_NOP,
        IORING_OP_READV,
        IORING_OP_WRITEV,
@@ -145,6 +183,12 @@ enum {
        IORING_OP_SYMLINKAT,
        IORING_OP_LINKAT,
        IORING_OP_MSG_RING,
+       IORING_OP_FSETXATTR,
+       IORING_OP_SETXATTR,
+       IORING_OP_FGETXATTR,
+       IORING_OP_GETXATTR,
+       IORING_OP_SOCKET,
+       IORING_OP_URING_CMD,
 
        /* this goes last, obviously */
        IORING_OP_LAST,
@@ -187,6 +231,33 @@ enum {
 #define IORING_POLL_UPDATE_EVENTS      (1U << 1)
 #define IORING_POLL_UPDATE_USER_DATA   (1U << 2)
 
+/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL     Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD      Key off 'fd' for cancelation rather than the
+ *                             request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY     Match any request
+ */
+#define IORING_ASYNC_CANCEL_ALL        (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY        (1U << 2)
+
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST  If set, instead of first attempting to send
+ *                             or receive and arm poll if that yields an
+ *                             -EAGAIN result, arm poll upfront and skip
+ *                             the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST     (1U << 0)
+
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT        (1U << 0)
+
 /*
  * IO completion data structure (Completion Queue Entry)
  */
@@ -194,6 +265,12 @@ struct io_uring_cqe {
        __u64   user_data;      /* sqe->data submission passed back */
        __s32   res;            /* result code for this event */
        __u32   flags;
+
+       /*
+        * If the ring is initialized with IORING_SETUP_CQE32, then this field
+        * contains 16-bytes of padding, doubling the size of the CQE.
+        */
+       __u64 big_cqe[];
 };
 
 /*
@@ -201,9 +278,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE   If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY  If set, more data to read after socket recv
  */
 #define IORING_CQE_F_BUFFER            (1U << 0)
 #define IORING_CQE_F_MORE              (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY     (1U << 2)
 
 enum {
        IORING_CQE_BUFFER_SHIFT         = 16,
@@ -236,6 +315,7 @@ struct io_sqring_offsets {
  */
 #define IORING_SQ_NEED_WAKEUP  (1U << 0) /* needs io_uring_enter wakeup */
 #define IORING_SQ_CQ_OVERFLOW  (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN      (1U << 2) /* task should enter the kernel */
 
 struct io_cqring_offsets {
        __u32 head;
@@ -296,6 +376,7 @@ struct io_uring_params {
 #define IORING_FEAT_NATIVE_WORKERS     (1U << 9)
 #define IORING_FEAT_RSRC_TAGS          (1U << 10)
 #define IORING_FEAT_CQE_SKIP           (1U << 11)
+#define IORING_FEAT_LINKED_FILE                (1U << 12)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -332,6 +413,10 @@ enum {
        IORING_REGISTER_RING_FDS                = 20,
        IORING_UNREGISTER_RING_FDS              = 21,
 
+       /* register ring based provide buffer group */
+       IORING_REGISTER_PBUF_RING               = 22,
+       IORING_UNREGISTER_PBUF_RING             = 23,
+
        /* this goes last */
        IORING_REGISTER_LAST
 };
@@ -349,9 +434,15 @@ struct io_uring_files_update {
        __aligned_u64 /* __s32 * */ fds;
 };
 
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE    (1U << 0)
+
 struct io_uring_rsrc_register {
        __u32 nr;
-       __u32 resv;
+       __u32 flags;
        __u64 resv2;
        __aligned_u64 data;
        __aligned_u64 tags;
@@ -403,6 +494,38 @@ struct io_uring_restriction {
        __u32 resv2[3];
 };
 
+struct io_uring_buf {
+       __u64   addr;
+       __u32   len;
+       __u16   bid;
+       __u16   resv;
+};
+
+struct io_uring_buf_ring {
+       union {
+               /*
+                * To avoid spilling into more pages than we need to, the
+                * ring tail is overlaid with the io_uring_buf->resv field.
+                */
+               struct {
+                       __u64   resv1;
+                       __u32   resv2;
+                       __u16   resv3;
+                       __u16   tail;
+               };
+               struct io_uring_buf     bufs[0];
+       };
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+       __u64   ring_addr;
+       __u32   ring_entries;
+       __u16   bgid;
+       __u16   pad;
+       __u64   resv[3];
+};
+
 /*
  * io_uring_restriction->opcode values
  */
index 91a6fe4e02c08c4b6ac6f1fb91f8f9e3fce85c0f..6a184d260c7f2e17d05831e702410175b18e550e 100644 (file)
@@ -445,7 +445,13 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
                        __u32 type;
-                       __u64 flags;
+                       __u32 ndata;
+                       union {
+#ifndef __KERNEL__
+                               __u64 flags;
+#endif
+                               __u64 data[16];
+                       };
                } system_event;
                /* KVM_EXIT_S390_STSI */
                struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 98e60801195e29a5e38d7f2b2b3fb40d39d34364..6f63527dd2ed6ad964ee54c4d72e6f5a78e245ed 100644 (file)
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
 /*
- * include/linux/loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
+ * Copyright 1993 by Theodore Ts'o.
  */
 #ifndef _UAPI_LINUX_LOOP_H
 #define _UAPI_LINUX_LOOP_H
index b2e43185e3b5562cc17015d5d5274aa4552af92b..2f76cba6716637baff53e167a6141b68420d75c3 100644 (file)
@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
        __u64   result;
 };
 
+/* same as struct nvme_passthru_cmd64, minus the 8b result field */
+struct nvme_uring_cmd {
+       __u8    opcode;
+       __u8    flags;
+       __u16   rsvd1;
+       __u32   nsid;
+       __u32   cdw2;
+       __u32   cdw3;
+       __u64   metadata;
+       __u64   addr;
+       __u32   metadata_len;
+       __u32   data_len;
+       __u32   cdw10;
+       __u32   cdw11;
+       __u32   cdw12;
+       __u32   cdw13;
+       __u32   cdw14;
+       __u32   cdw15;
+       __u32   timeout_ms;
+       __u32   rsvd2;
+};
+
 #define nvme_admin_cmd nvme_passthru_cmd
 
 #define NVME_IOCTL_ID          _IO('N', 0x40)
@@ -83,4 +105,10 @@ struct nvme_passthru_cmd64 {
 #define NVME_IOCTL_IO64_CMD    _IOWR('N', 0x48, struct nvme_passthru_cmd64)
 #define NVME_IOCTL_IO64_CMD_VEC        _IOWR('N', 0x49, struct nvme_passthru_cmd64)
 
+/* io_uring async commands: */
+#define NVME_URING_CMD_IO      _IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC  _IOWR('N', 0x81, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN   _IOWR('N', 0x82, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
+
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
index 283c5a7b3f2c813f72c81181010a67af551026cb..db6c8588c1d0c130360123ed156de2414a11dd92 100644 (file)
@@ -184,7 +184,7 @@ struct rfkill_event_ext {
 #define RFKILL_IOC_NOINPUT     1
 #define RFKILL_IOCTL_NOINPUT   _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
 #define RFKILL_IOC_MAX_SIZE    2
-#define RFKILL_IOCTL_MAX_SIZE  _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32)
+#define RFKILL_IOCTL_MAX_SIZE  _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32)
 
 /* and that's all userspace gets */
 
diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
new file mode 100644 (file)
index 0000000..256aaef
--- /dev/null
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD SEV and SNP guest driver.
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * SEV API specification is available at: https://developer.amd.com/sev/
+ */
+
+#ifndef __UAPI_LINUX_SEV_GUEST_H_
+#define __UAPI_LINUX_SEV_GUEST_H_
+
+#include <linux/types.h>
+
+struct snp_report_req {
+       /* user data that should be included in the report */
+       __u8 user_data[64];
+
+       /* The vmpl level to be included in the report */
+       __u32 vmpl;
+
+       /* Must be zero filled */
+       __u8 rsvd[28];
+};
+
+struct snp_report_resp {
+       /* response data, see SEV-SNP spec for the format */
+       __u8 data[4000];
+};
+
+struct snp_derived_key_req {
+       __u32 root_key_select;
+       __u32 rsvd;
+       __u64 guest_field_select;
+       __u32 vmpl;
+       __u32 guest_svn;
+       __u64 tcb_version;
+};
+
+struct snp_derived_key_resp {
+       /* response data, see SEV-SNP spec for the format */
+       __u8 data[64];
+};
+
+struct snp_guest_request_ioctl {
+       /* message version number (must be non-zero) */
+       __u8 msg_version;
+
+       /* Request and response structure address */
+       __u64 req_data;
+       __u64 resp_data;
+
+       /* firmware error code on failure (see psp-sev.h) */
+       __u64 fw_err;
+};
+
+struct snp_ext_report_req {
+       struct snp_report_req data;
+
+       /* where to copy the certificate blob */
+       __u64 certs_address;
+
+       /* length of the certificate blob */
+       __u32 certs_len;
+};
+
+#define SNP_GUEST_REQ_IOC_TYPE 'S'
+
+/* Get SNP attestation report */
+#define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl)
+
+/* Get a derived key from the root */
+#define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl)
+
+/* Get SNP extended report as defined in the GHCB specification version 2. */
+#define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl)
+
+#endif /* __UAPI_LINUX_SEV_GUEST_H_ */
index 3021ea25a2849ed351c0c064fdf4a4780a9f7b29..7837ba4fe7289024c10942a7793411e0e3b27e7f 100644 (file)
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_STDDEF_H
+#define _UAPI_LINUX_STDDEF_H
+
 #include <linux/compiler_types.h>
 
 #ifndef __always_inline
@@ -41,3 +44,4 @@
                struct { } __empty_ ## NAME; \
                TYPE NAME[]; \
        }
+#endif
index 80d76b75bccd9e2f96b20db96189319f9546acd5..7aa2eb76620508fdc915533f74973d76308d3ef5 100644 (file)
  * Virtio Transitional IDs
  */
 
-#define VIRTIO_TRANS_ID_NET            1000 /* transitional virtio net */
-#define VIRTIO_TRANS_ID_BLOCK          1001 /* transitional virtio block */
-#define VIRTIO_TRANS_ID_BALLOON                1002 /* transitional virtio balloon */
-#define VIRTIO_TRANS_ID_CONSOLE                1003 /* transitional virtio console */
-#define VIRTIO_TRANS_ID_SCSI           1004 /* transitional virtio SCSI */
-#define VIRTIO_TRANS_ID_RNG            1005 /* transitional virtio rng */
-#define VIRTIO_TRANS_ID_9P             1009 /* transitional virtio 9p console */
+#define VIRTIO_TRANS_ID_NET            0x1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK          0x1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON                0x1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE                0x1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI           0x1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG            0x1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P             0x1009 /* transitional virtio 9p console */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index 471d71935e90a55b825cf3ccbc8e656ac89e2df8..847a82bfe0e3a6a20116775c8684d7cd10e8d67b 100644 (file)
@@ -114,7 +114,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
 obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
-obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
+obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
+obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
 obj-$(CONFIG_CFI_CLANG) += cfi.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
index ea2ee1181921e13592f19aae0fc7a0d4f7857ac5..f3a2abd6d1a1922c8e19e44e3b26a49064e0f521 100644 (file)
@@ -1959,6 +1959,12 @@ void __audit_uring_exit(int success, long code)
 {
        struct audit_context *ctx = audit_context();
 
+       if (ctx->dummy) {
+               if (ctx->context != AUDIT_CTX_URING)
+                       return;
+               goto out;
+       }
+
        if (ctx->context == AUDIT_CTX_SYSCALL) {
                /*
                 * NOTE: See the note in __audit_uring_entry() about the case
index d56ee177d5f8fb4e5d397f65d14f5736a19d444d..2dfe1079f7727ad836feca67861c3c9e7356fddb 100644 (file)
@@ -27,6 +27,7 @@ config BPF_SYSCALL
        bool "Enable bpf() system call"
        select BPF
        select IRQ_WORK
+       select TASKS_RCU if PREEMPTION
        select TASKS_TRACE_RCU
        select BINARY_PRINTF
        select NET_SOCK_MSG if NET
index 9390bfd9f1cd382e6e08a9d12df051fd3722c636..71a418858a5e0d9861131c600bec5efd62b5ae4a 100644 (file)
@@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = {
  */
 void __init cpuset_init_smp(void)
 {
-       cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
-       top_cpuset.mems_allowed = node_states[N_MEMORY];
+       /*
+        * cpus_allowd/mems_allowed set to v2 values in the initial
+        * cpuset_bind() call will be reset to v1 values in another
+        * cpuset_bind() call when v1 cpuset is mounted.
+        */
        top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
 
        cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
diff --git a/kernel/configs/x86_debug.config b/kernel/configs/x86_debug.config
new file mode 100644 (file)
index 0000000..dcd86f3
--- /dev/null
@@ -0,0 +1,18 @@
+CONFIG_X86_DEBUG_FPU=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_KMEMCHECK=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_GCOV_KERNEL=y
+CONFIG_LOCKDEP=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_SCHEDSTATS=y
+CONFIG_VMLINUX_VALIDATION=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
index 5797c2a7a93f415dd622c8aaf34bb98e9e7d7e61..bbad5e375d3ba1e6d841979cb82195ab6d427ede 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
 #include <linux/random.h>
+#include <linux/cc_platform.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -71,7 +72,6 @@ struct cpuhp_cpu_state {
        bool                    rollback;
        bool                    single;
        bool                    bringup;
-       int                     cpu;
        struct hlist_node       *node;
        struct hlist_node       *last;
        enum cpuhp_state        cb_state;
@@ -475,7 +475,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 #endif
 
 static inline enum cpuhp_state
-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
 {
        enum cpuhp_state prev_state = st->state;
        bool bringup = st->state < target;
@@ -486,14 +486,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
        st->target = target;
        st->single = false;
        st->bringup = bringup;
-       if (cpu_dying(st->cpu) != !bringup)
-               set_cpu_dying(st->cpu, !bringup);
+       if (cpu_dying(cpu) != !bringup)
+               set_cpu_dying(cpu, !bringup);
 
        return prev_state;
 }
 
 static inline void
-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
+                 enum cpuhp_state prev_state)
 {
        bool bringup = !st->bringup;
 
@@ -520,8 +521,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
        }
 
        st->bringup = bringup;
-       if (cpu_dying(st->cpu) != !bringup)
-               set_cpu_dying(st->cpu, !bringup);
+       if (cpu_dying(cpu) != !bringup)
+               set_cpu_dying(cpu, !bringup);
 }
 
 /* Regular hotplug invocation of the AP hotplug thread */
@@ -541,15 +542,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
        wait_for_ap_thread(st, st->bringup);
 }
 
-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
+                        enum cpuhp_state target)
 {
        enum cpuhp_state prev_state;
        int ret;
 
-       prev_state = cpuhp_set_state(st, target);
+       prev_state = cpuhp_set_state(cpu, st, target);
        __cpuhp_kick_ap(st);
        if ((ret = st->result)) {
-               cpuhp_reset_state(st, prev_state);
+               cpuhp_reset_state(cpu, st, prev_state);
                __cpuhp_kick_ap(st);
        }
 
@@ -581,7 +583,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
        if (st->target <= CPUHP_AP_ONLINE_IDLE)
                return 0;
 
-       return cpuhp_kick_ap(st, st->target);
+       return cpuhp_kick_ap(cpu, st, st->target);
 }
 
 static int bringup_cpu(unsigned int cpu)
@@ -704,7 +706,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                         ret, cpu, cpuhp_get_step(st->state)->name,
                         st->state);
 
-               cpuhp_reset_state(st, prev_state);
+               cpuhp_reset_state(cpu, st, prev_state);
                if (can_rollback_cpu(st))
                        WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
                                                            prev_state));
@@ -715,15 +717,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 /*
  * The cpu hotplug threads manage the bringup and teardown of the cpus
  */
-static void cpuhp_create(unsigned int cpu)
-{
-       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-
-       init_completion(&st->done_up);
-       init_completion(&st->done_down);
-       st->cpu = cpu;
-}
-
 static int cpuhp_should_run(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
@@ -875,7 +868,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
        cpuhp_lock_release(true);
 
        trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
-       ret = cpuhp_kick_ap(st, st->target);
+       ret = cpuhp_kick_ap(cpu, st, st->target);
        trace_cpuhp_exit(cpu, st->state, prev_state, ret);
 
        return ret;
@@ -883,15 +876,27 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
 
 static struct smp_hotplug_thread cpuhp_threads = {
        .store                  = &cpuhp_state.thread,
-       .create                 = &cpuhp_create,
        .thread_should_run      = cpuhp_should_run,
        .thread_fn              = cpuhp_thread_fun,
        .thread_comm            = "cpuhp/%u",
        .selfparking            = true,
 };
 
+static __init void cpuhp_init_state(void)
+{
+       struct cpuhp_cpu_state *st;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               st = per_cpu_ptr(&cpuhp_state, cpu);
+               init_completion(&st->done_up);
+               init_completion(&st->done_down);
+       }
+}
+
 void __init cpuhp_threads_init(void)
 {
+       cpuhp_init_state();
        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
@@ -1107,7 +1112,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                         ret, cpu, cpuhp_get_step(st->state)->name,
                         st->state);
 
-               cpuhp_reset_state(st, prev_state);
+               cpuhp_reset_state(cpu, st, prev_state);
 
                if (st->state < prev_state)
                        WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
@@ -1134,7 +1139,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       prev_state = cpuhp_set_state(st, target);
+       prev_state = cpuhp_set_state(cpu, st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread.
@@ -1165,7 +1170,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
        ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state < prev_state) {
                if (st->state == CPUHP_TEARDOWN_CPU) {
-                       cpuhp_reset_state(st, prev_state);
+                       cpuhp_reset_state(cpu, st, prev_state);
                        __cpuhp_kick_ap(st);
                } else {
                        WARN(1, "DEAD callback error for CPU%d", cpu);
@@ -1186,6 +1191,12 @@ out:
 
 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
 {
+       /*
+        * If the platform does not support hotplug, report it explicitly to
+        * differentiate it from a transient offlining failure.
+        */
+       if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
+               return -EOPNOTSUPP;
        if (cpu_hotplug_disabled)
                return -EBUSY;
        return _cpu_down(cpu, 0, target);
@@ -1352,7 +1363,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       cpuhp_set_state(st, target);
+       cpuhp_set_state(cpu, st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread once more.
index 4632b0f4f72eb08c1cea667b9d0e248e5dd1a7b3..8a6cd53dbe8ce11018eb9c05729256576e7d917e 100644 (file)
@@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
                dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
        if (unlikely(is_swiotlb_buffer(dev, phys)))
-               swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+               swiotlb_tbl_unmap_single(dev, phys, size, dir,
+                                        attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
index e57a224d6b794f27eb74f1a57c62f9a2b1555641..93c3b86e781c143f73092f3e48d7ed073471ce82 100644 (file)
@@ -392,7 +392,7 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
 DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
 void dynamic_irqentry_exit_cond_resched(void)
 {
-       if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+       if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
                return;
        raw_irqentry_exit_cond_resched();
 }
index cfde994ce61c8a8b6b47ba9d5d9153d1590c8297..7f1e4c5897e7558db09ddb178fbd77bac3dec4aa 100644 (file)
@@ -574,8 +574,7 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
                              enum event_type_t event_type);
 
 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
-                            enum event_type_t event_type,
-                            struct task_struct *task);
+                            enum event_type_t event_type);
 
 static void update_context_time(struct perf_event_context *ctx);
 static u64 perf_event_time(struct perf_event *event);
@@ -781,7 +780,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
 static inline void update_cgrp_time_from_event(struct perf_event *event)
 {
        struct perf_cgroup_info *info;
-       struct perf_cgroup *cgrp;
 
        /*
         * ensure we access cgroup data only when needed and
@@ -790,21 +788,19 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
        if (!is_cgroup_event(event))
                return;
 
-       cgrp = perf_cgroup_from_task(current, event->ctx);
+       info = this_cpu_ptr(event->cgrp->info);
        /*
         * Do not update time when cgroup is not active
         */
-       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
-               info = this_cpu_ptr(event->cgrp->info);
+       if (info->active)
                __update_cgrp_time(info, perf_clock(), true);
-       }
 }
 
 static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
-                         struct perf_event_context *ctx)
+perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
 {
-       struct perf_cgroup *cgrp;
+       struct perf_event_context *ctx = &cpuctx->ctx;
+       struct perf_cgroup *cgrp = cpuctx->cgrp;
        struct perf_cgroup_info *info;
        struct cgroup_subsys_state *css;
 
@@ -813,10 +809,10 @@ perf_cgroup_set_timestamp(struct task_struct *task,
         * ensure we do not access cgroup data
         * unless we have the cgroup pinned (css_get)
         */
-       if (!task || !ctx->nr_cgroups)
+       if (!cgrp)
                return;
 
-       cgrp = perf_cgroup_from_task(task, ctx);
+       WARN_ON_ONCE(!ctx->nr_cgroups);
 
        for (css = &cgrp->css; css; css = css->parent) {
                cgrp = container_of(css, struct perf_cgroup, css);
@@ -828,17 +824,12 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 
 static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
 
-#define PERF_CGROUP_SWOUT      0x1 /* cgroup switch out every event */
-#define PERF_CGROUP_SWIN       0x2 /* cgroup switch in events based on task */
-
 /*
  * reschedule events based on the cgroup constraint of task.
- *
- * mode SWOUT : schedule out everything
- * mode SWIN : schedule in based on cgroup for next
  */
-static void perf_cgroup_switch(struct task_struct *task, int mode)
+static void perf_cgroup_switch(struct task_struct *task)
 {
+       struct perf_cgroup *cgrp;
        struct perf_cpu_context *cpuctx, *tmp;
        struct list_head *list;
        unsigned long flags;
@@ -849,35 +840,31 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
         */
        local_irq_save(flags);
 
+       cgrp = perf_cgroup_from_task(task, NULL);
+
        list = this_cpu_ptr(&cgrp_cpuctx_list);
        list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
                WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+               if (READ_ONCE(cpuctx->cgrp) == cgrp)
+                       continue;
 
                perf_ctx_lock(cpuctx, cpuctx->task_ctx);
                perf_pmu_disable(cpuctx->ctx.pmu);
 
-               if (mode & PERF_CGROUP_SWOUT) {
-                       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-                       /*
-                        * must not be done before ctxswout due
-                        * to event_filter_match() in event_sched_out()
-                        */
-                       cpuctx->cgrp = NULL;
-               }
+               cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+               /*
+                * must not be done before ctxswout due
+                * to update_cgrp_time_from_cpuctx() in
+                * ctx_sched_out()
+                */
+               cpuctx->cgrp = cgrp;
+               /*
+                * set cgrp before ctxsw in to allow
+                * perf_cgroup_set_timestamp() in ctx_sched_in()
+                * to not have to pass task around
+                */
+               cpu_ctx_sched_in(cpuctx, EVENT_ALL);
 
-               if (mode & PERF_CGROUP_SWIN) {
-                       WARN_ON_ONCE(cpuctx->cgrp);
-                       /*
-                        * set cgrp before ctxsw in to allow
-                        * event_filter_match() to not have to pass
-                        * task around
-                        * we pass the cpuctx->ctx to perf_cgroup_from_task()
-                        * because cgorup events are only per-cpu
-                        */
-                       cpuctx->cgrp = perf_cgroup_from_task(task,
-                                                            &cpuctx->ctx);
-                       cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
-               }
                perf_pmu_enable(cpuctx->ctx.pmu);
                perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
        }
@@ -885,58 +872,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
        local_irq_restore(flags);
 }
 
-static inline void perf_cgroup_sched_out(struct task_struct *task,
-                                        struct task_struct *next)
-{
-       struct perf_cgroup *cgrp1;
-       struct perf_cgroup *cgrp2 = NULL;
-
-       rcu_read_lock();
-       /*
-        * we come here when we know perf_cgroup_events > 0
-        * we do not need to pass the ctx here because we know
-        * we are holding the rcu lock
-        */
-       cgrp1 = perf_cgroup_from_task(task, NULL);
-       cgrp2 = perf_cgroup_from_task(next, NULL);
-
-       /*
-        * only schedule out current cgroup events if we know
-        * that we are switching to a different cgroup. Otherwise,
-        * do no touch the cgroup events.
-        */
-       if (cgrp1 != cgrp2)
-               perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
-
-       rcu_read_unlock();
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *prev,
-                                       struct task_struct *task)
-{
-       struct perf_cgroup *cgrp1;
-       struct perf_cgroup *cgrp2 = NULL;
-
-       rcu_read_lock();
-       /*
-        * we come here when we know perf_cgroup_events > 0
-        * we do not need to pass the ctx here because we know
-        * we are holding the rcu lock
-        */
-       cgrp1 = perf_cgroup_from_task(task, NULL);
-       cgrp2 = perf_cgroup_from_task(prev, NULL);
-
-       /*
-        * only need to schedule in cgroup events if we are changing
-        * cgroup during ctxsw. Cgroup events were not scheduled
-        * out of ctxsw out if that was not the case.
-        */
-       if (cgrp1 != cgrp2)
-               perf_cgroup_switch(task, PERF_CGROUP_SWIN);
-
-       rcu_read_unlock();
-}
-
 static int perf_cgroup_ensure_storage(struct perf_event *event,
                                struct cgroup_subsys_state *css)
 {
@@ -1032,22 +967,10 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct
         */
        cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
 
-       /*
-        * Since setting cpuctx->cgrp is conditional on the current @cgrp
-        * matching the event's cgroup, we must do this for every new event,
-        * because if the first would mismatch, the second would not try again
-        * and we would leave cpuctx->cgrp unset.
-        */
-       if (ctx->is_active && !cpuctx->cgrp) {
-               struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
-
-               if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
-                       cpuctx->cgrp = cgrp;
-       }
-
        if (ctx->nr_cgroups++)
                return;
 
+       cpuctx->cgrp = perf_cgroup_from_task(current, ctx);
        list_add(&cpuctx->cgrp_cpuctx_entry,
                        per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
 }
@@ -1069,9 +992,7 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c
        if (--ctx->nr_cgroups)
                return;
 
-       if (ctx->is_active && cpuctx->cgrp)
-               cpuctx->cgrp = NULL;
-
+       cpuctx->cgrp = NULL;
        list_del(&cpuctx->cgrp_cpuctx_entry);
 }
 
@@ -1100,16 +1021,6 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
 {
 }
 
-static inline void perf_cgroup_sched_out(struct task_struct *task,
-                                        struct task_struct *next)
-{
-}
-
-static inline void perf_cgroup_sched_in(struct task_struct *prev,
-                                       struct task_struct *task)
-{
-}
-
 static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
                                      struct perf_event_attr *attr,
                                      struct perf_event *group_leader)
@@ -1118,13 +1029,7 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
 }
 
 static inline void
-perf_cgroup_set_timestamp(struct task_struct *task,
-                         struct perf_event_context *ctx)
-{
-}
-
-static inline void
-perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
+perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
 {
 }
 
@@ -1147,6 +1052,10 @@ static inline void
 perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
 {
 }
+
+static void perf_cgroup_switch(struct task_struct *task)
+{
+}
 #endif
 
 /*
@@ -2713,8 +2622,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
-            enum event_type_t event_type,
-            struct task_struct *task);
+            enum event_type_t event_type);
 
 static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
                               struct perf_event_context *ctx,
@@ -2730,15 +2638,14 @@ static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
 }
 
 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
-                               struct perf_event_context *ctx,
-                               struct task_struct *task)
+                               struct perf_event_context *ctx)
 {
-       cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
+       cpu_ctx_sched_in(cpuctx, EVENT_PINNED);
        if (ctx)
-               ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
-       cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
+               ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
+       cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
        if (ctx)
-               ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
+               ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
 }
 
 /*
@@ -2788,7 +2695,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
        else if (ctx_event_type & EVENT_PINNED)
                cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 
-       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_event_sched_in(cpuctx, task_ctx);
        perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
@@ -3011,7 +2918,7 @@ static void __perf_event_enable(struct perf_event *event,
                return;
 
        if (!event_filter_match(event)) {
-               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME);
                return;
        }
 
@@ -3020,7 +2927,7 @@ static void __perf_event_enable(struct perf_event *event,
         * then don't put it on unless the group is on.
         */
        if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
-               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME);
                return;
        }
 
@@ -3668,7 +3575,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
         * cgroup event are system-wide mode only
         */
        if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_out(task, next);
+               perf_cgroup_switch(next);
 }
 
 /*
@@ -3865,8 +3772,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
-            enum event_type_t event_type,
-            struct task_struct *task)
+            enum event_type_t event_type)
 {
        int is_active = ctx->is_active;
 
@@ -3878,7 +3784,7 @@ ctx_sched_in(struct perf_event_context *ctx,
        if (is_active ^ EVENT_TIME) {
                /* start ctx time */
                __update_context_time(ctx, false);
-               perf_cgroup_set_timestamp(task, ctx);
+               perf_cgroup_set_timestamp(cpuctx);
                /*
                 * CPU-release for the below ->is_active store,
                 * see __load_acquire() in perf_event_time_now()
@@ -3909,12 +3815,11 @@ ctx_sched_in(struct perf_event_context *ctx,
 }
 
 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
-                            enum event_type_t event_type,
-                            struct task_struct *task)
+                            enum event_type_t event_type)
 {
        struct perf_event_context *ctx = &cpuctx->ctx;
 
-       ctx_sched_in(ctx, cpuctx, event_type, task);
+       ctx_sched_in(ctx, cpuctx, event_type);
 }
 
 static void perf_event_context_sched_in(struct perf_event_context *ctx,
@@ -3956,7 +3861,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         */
        if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
                cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-       perf_event_sched_in(cpuctx, ctx, task);
+       perf_event_sched_in(cpuctx, ctx);
 
        if (cpuctx->sched_cb_usage && pmu->sched_task)
                pmu->sched_task(cpuctx->task_ctx, true);
@@ -3984,16 +3889,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        struct perf_event_context *ctx;
        int ctxn;
 
-       /*
-        * If cgroup events exist on this CPU, then we need to check if we have
-        * to switch in PMU state; cgroup event are system-wide mode only.
-        *
-        * Since cgroup events are CPU events, we must schedule these in before
-        * we schedule in the task events.
-        */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_in(prev, task);
-
        for_each_task_context_nr(ctxn) {
                ctx = task->perf_event_ctxp[ctxn];
                if (likely(!ctx))
@@ -4267,7 +4162,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
        if (cpu_event)
                rotate_ctx(&cpuctx->ctx, cpu_event);
 
-       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_event_sched_in(cpuctx, task_ctx);
 
        perf_pmu_enable(cpuctx->ctx.pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -4339,7 +4234,7 @@ static void perf_event_enable_on_exec(int ctxn)
                clone_ctx = unclone_ctx(ctx);
                ctx_resched(cpuctx, ctx, event_type);
        } else {
-               ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
+               ctx_sched_in(ctx, cpuctx, EVENT_TIME);
        }
        perf_ctx_unlock(cpuctx, ctx);
 
@@ -6352,7 +6247,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 again:
        mutex_lock(&event->mmap_mutex);
        if (event->rb) {
-               if (event->rb->nr_pages != nr_pages) {
+               if (data_page_nr(event->rb) != nr_pages) {
                        ret = -EINVAL;
                        goto unlock;
                }
@@ -11635,6 +11530,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
        event->state            = PERF_EVENT_STATE_INACTIVE;
 
+       if (parent_event)
+               event->event_caps = parent_event->event_caps;
+
        if (event->attr.sigtrap)
                atomic_set(&event->event_limit, 1);
 
@@ -12319,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open,
                 * Do not allow to attach to a group in a different task
                 * or CPU context. If we're moving SW events, we'll fix
                 * this up later, so allow that.
+                *
+                * Racy, not holding group_leader->ctx->mutex, see comment with
+                * perf_event_ctx_lock().
                 */
                if (!move_group && group_leader->ctx != ctx)
                        goto err_context;
@@ -12384,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open,
                        } else {
                                perf_event_ctx_unlock(group_leader, gctx);
                                move_group = 0;
+                               goto not_move_group;
                        }
                }
 
@@ -12400,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        } else {
                mutex_lock(&ctx->mutex);
+
+               /*
+                * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
+                * see the group_leader && !move_group test earlier.
+                */
+               if (group_leader && group_leader->ctx != ctx) {
+                       err = -EINVAL;
+                       goto err_locked;
+               }
        }
+not_move_group:
 
        if (ctx->task == TASK_TOMBSTONE) {
                err = -ESRCH;
@@ -13562,7 +13474,7 @@ static int __perf_cgroup_move(void *info)
 {
        struct task_struct *task = info;
        rcu_read_lock();
-       perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+       perf_cgroup_switch(task);
        rcu_read_unlock();
        return 0;
 }
index 082832738c8fd97c9382fd70cc5266e1a6fcf082..5150d5f84c033e5c2e7594db13e3d40f334d4eb3 100644 (file)
@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb)
 }
 #endif
 
+static inline int data_page_nr(struct perf_buffer *rb)
+{
+       return rb->nr_pages << page_order(rb);
+}
+
 static inline unsigned long perf_data_size(struct perf_buffer *rb)
 {
        return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
index 52868716ec358673c3c67fb130278f162c895283..fb35b926024caeda34d6678f4bc98db6b70069b2 100644 (file)
@@ -859,11 +859,6 @@ void rb_free(struct perf_buffer *rb)
 }
 
 #else
-static int data_page_nr(struct perf_buffer *rb)
-{
-       return rb->nr_pages << page_order(rb);
-}
-
 static struct page *
 __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
 {
index 9796897560ab18ae96ef0979076bfed083204a7e..35a3beff140b6dccb6b4f1e7971df9eb6aa85bf7 100644 (file)
@@ -792,6 +792,7 @@ void __mmdrop(struct mm_struct *mm)
        mmu_notifier_subscriptions_destroy(mm);
        check_mm(mm);
        put_user_ns(mm->user_ns);
+       mm_pasid_drop(mm);
        free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1190,7 +1191,6 @@ static inline void __mmput(struct mm_struct *mm)
        }
        if (mm->binfmt)
                module_put(mm->binfmt->module);
-       mm_pasid_drop(mm);
        mmdrop(mm);
 }
 
index f7ff8919dc9bbb1ab2a907c1b28a5605dc894251..d9a5c1d65a79db075f002cbd82b346429c4fb26b 100644 (file)
@@ -258,7 +258,7 @@ static int __irq_build_affinity_masks(unsigned int startvec,
        nodemask_t nodemsk = NODE_MASK_NONE;
        struct node_vectors *node_vectors;
 
-       if (!cpumask_weight(cpu_mask))
+       if (cpumask_empty(cpu_mask))
                return 0;
 
        nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
@@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec,
         */
        if (numvecs <= nodes) {
                for_each_node_mask(n, nodemsk) {
-                       cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
-                                  node_to_cpumask[n]);
+                       /* Ensure that only CPUs which are in both masks are set */
+                       cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+                       cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
                        if (++curvec == last_affv)
                                curvec = firstvec;
                }
index 54af0deb239b8751679505dfb6295d99dc060cb7..e6b8e564b37f049237a39b2d65c486d5b686fd3d 100644 (file)
@@ -1573,17 +1573,12 @@ static struct device *irq_get_parent_device(struct irq_data *data)
 int irq_chip_pm_get(struct irq_data *data)
 {
        struct device *dev = irq_get_parent_device(data);
-       int retval;
+       int retval = 0;
 
-       if (IS_ENABLED(CONFIG_PM) && dev) {
-               retval = pm_runtime_get_sync(dev);
-               if (retval < 0) {
-                       pm_runtime_put_noidle(dev);
-                       return retval;
-               }
-       }
+       if (IS_ENABLED(CONFIG_PM) && dev)
+               retval = pm_runtime_resume_and_get(dev);
 
-       return 0;
+       return retval;
 }
 
 /**
index 2b43f5f5033d13cca9af2bd4c70c96fe641757c7..bc8e40cf2b65adc4c8ac0e1a56909b4dc2f078dc 100644 (file)
@@ -58,6 +58,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
        BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
        BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
        BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND),
+       BIT_MASK_DESCR(IRQCHIP_IMMUTABLE),
 };
 
 static void
index 99cbdf55a8bda0db2494aa4433a62d0ee060fe07..f09c60393e559f72ba8f4e26980fd2e73cb140e2 100644 (file)
@@ -29,12 +29,14 @@ extern struct irqaction chained_action;
  * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
  * IRQTF_AFFINITY  - irq thread is requested to adjust affinity
  * IRQTF_FORCED_THREAD  - irq action is force threaded
+ * IRQTF_READY     - signals that irq thread is ready
  */
 enum {
        IRQTF_RUNTHREAD,
        IRQTF_WARNED,
        IRQTF_AFFINITY,
        IRQTF_FORCED_THREAD,
+       IRQTF_READY,
 };
 
 /*
index 0cd02efa3a742ed77fbcafb49869bf45e82520fa..dd76323ea3fd7410d56dc648441595e3474e6426 100644 (file)
@@ -181,7 +181,7 @@ struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
                goto err_free_bitmap;
 
        work_ctx->irq_count = num_irqs;
-       init_irq_work(&work_ctx->work, irq_sim_handle_irq);
+       work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq);
 
        return work_ctx->domain;
 
index 939d21cd55c3891a4d7208a519fa85ecc98bc080..d323b180b0f371b1176e1926e2d3130497e146b8 100644 (file)
@@ -407,6 +407,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
        lockdep_set_class(&desc->lock, &irq_desc_lock_class);
        mutex_init(&desc->request_mutex);
        init_rcu_head(&desc->rcu);
+       init_waitqueue_head(&desc->wait_for_threads);
 
        desc_set_defaults(irq, desc, node, affinity, owner);
        irqd_set(&desc->irq_data, flags);
@@ -575,6 +576,7 @@ int __init early_irq_init(void)
                raw_spin_lock_init(&desc[i].lock);
                lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
                mutex_init(&desc[i].request_mutex);
+               init_waitqueue_head(&desc[i].wait_for_threads);
                desc_set_defaults(i, &desc[i], node, NULL, NULL);
        }
        return arch_early_irq_init();
@@ -699,7 +701,6 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 {
-       WARN_ON_ONCE(!in_hardirq());
        return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
index c03f71d5ec1016541494c37211846ce572b1611a..8c396319d5ac2f4d3619bbdde7fa98dc625e776d 100644 (file)
@@ -222,11 +222,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 {
        struct irq_desc *desc = irq_data_to_desc(data);
        struct irq_chip *chip = irq_data_get_irq_chip(data);
+       const struct cpumask  *prog_mask;
        int ret;
 
+       static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
+       static struct cpumask tmp_mask;
+
        if (!chip || !chip->irq_set_affinity)
                return -EINVAL;
 
+       raw_spin_lock(&tmp_mask_lock);
        /*
         * If this is a managed interrupt and housekeeping is enabled on
         * it check whether the requested affinity mask intersects with
@@ -248,24 +253,34 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
         */
        if (irqd_affinity_is_managed(data) &&
            housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
-               const struct cpumask *hk_mask, *prog_mask;
-
-               static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
-               static struct cpumask tmp_mask;
+               const struct cpumask *hk_mask;
 
                hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
 
-               raw_spin_lock(&tmp_mask_lock);
                cpumask_and(&tmp_mask, mask, hk_mask);
                if (!cpumask_intersects(&tmp_mask, cpu_online_mask))
                        prog_mask = mask;
                else
                        prog_mask = &tmp_mask;
-               ret = chip->irq_set_affinity(data, prog_mask, force);
-               raw_spin_unlock(&tmp_mask_lock);
        } else {
-               ret = chip->irq_set_affinity(data, mask, force);
+               prog_mask = mask;
        }
+
+       /*
+        * Make sure we only provide online CPUs to the irqchip,
+        * unless we are being asked to force the affinity (in which
+        * case we do as we are told).
+        */
+       cpumask_and(&tmp_mask, prog_mask, cpu_online_mask);
+       if (!force && !cpumask_empty(&tmp_mask))
+               ret = chip->irq_set_affinity(data, &tmp_mask, force);
+       else if (force)
+               ret = chip->irq_set_affinity(data, mask, force);
+       else
+               ret = -EINVAL;
+
+       raw_spin_unlock(&tmp_mask_lock);
+
        switch (ret) {
        case IRQ_SET_MASK_OK:
        case IRQ_SET_MASK_OK_DONE:
@@ -1248,6 +1263,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
        raw_spin_unlock_irq(&desc->lock);
 }
 
+/*
+ * Internal function to notify that a interrupt thread is ready.
+ */
+static void irq_thread_set_ready(struct irq_desc *desc,
+                                struct irqaction *action)
+{
+       set_bit(IRQTF_READY, &action->thread_flags);
+       wake_up(&desc->wait_for_threads);
+}
+
+/*
+ * Internal function to wake up a interrupt thread and wait until it is
+ * ready.
+ */
+static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc,
+                                                 struct irqaction *action)
+{
+       if (!action || !action->thread)
+               return;
+
+       wake_up_process(action->thread);
+       wait_event(desc->wait_for_threads,
+                  test_bit(IRQTF_READY, &action->thread_flags));
+}
+
 /*
  * Interrupt handler thread
  */
@@ -1259,6 +1299,8 @@ static int irq_thread(void *data)
        irqreturn_t (*handler_fn)(struct irq_desc *desc,
                        struct irqaction *action);
 
+       irq_thread_set_ready(desc, action);
+
        sched_set_fifo(current);
 
        if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
@@ -1683,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        }
 
        if (!shared) {
-               init_waitqueue_head(&desc->wait_for_threads);
-
                /* Setup the type (level, edge polarity) if configured: */
                if (new->flags & IRQF_TRIGGER_MASK) {
                        ret = __irq_set_trigger(desc,
@@ -1780,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
        irq_setup_timings(desc, new);
 
-       /*
-        * Strictly no need to wake it up, but hung_task complains
-        * when no hard interrupt wakes the thread up.
-        */
-       if (new->thread)
-               wake_up_process(new->thread);
-       if (new->secondary)
-               wake_up_process(new->secondary->thread);
+       wake_up_and_wait_for_irq_thread_ready(desc, new);
+       wake_up_and_wait_for_irq_thread_ready(desc, new->secondary);
 
        register_irq_proc(irq, desc);
        new->dir = NULL;
index bbfb26489aa1c4aceb73dcc99d810bfa3cd2e267..1698e77645acf78ec96107e849e880475dc5da52 100644 (file)
@@ -286,7 +286,7 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
 int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
                             unsigned int *mapped_cpu)
 {
-       unsigned int bit, cpu, end = m->alloc_end;
+       unsigned int bit, cpu, end;
        struct cpumap *cm;
 
        if (cpumask_empty(msk))
index 2bdfce5edafd025344c38c47c91e97be623fbac4..a9ee535293eb264d52b1b52d861c1a26cd3fd101 100644 (file)
@@ -818,6 +818,21 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag
                irqd_clr_can_reserve(irqd);
                if (vflags & VIRQ_NOMASK_QUIRK)
                        irqd_set_msi_nomask_quirk(irqd);
+
+               /*
+                * If the interrupt is managed but no CPU is available to
+                * service it, shut it down until better times. Note that
+                * we only do this on the !RESERVE path as x86 (the only
+                * architecture using this flag) deals with this in a
+                * different way by using a catch-all vector.
+                */
+               if ((vflags & VIRQ_ACTIVATE) &&
+                   irqd_affinity_is_managed(irqd) &&
+                   !cpumask_intersects(irq_data_get_affinity_mask(irqd),
+                                       cpu_online_mask)) {
+                           irqd_set_managed_shutdown(irqd);
+                           return 0;
+                   }
        }
 
        if (!(vflags & VIRQ_ACTIVATE))
index f7df715ec28e6de930c7b00d7f2789801761c07a..7afa40fe5cc43ed51bd78ebc4f049685428425dd 100644 (file)
@@ -137,7 +137,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
        if (!irq_work_claim(work))
                return false;
 
-       kasan_record_aux_stack(work);
+       kasan_record_aux_stack_noalloc(work);
 
        preempt_disable();
        if (cpu != smp_processor_id()) {
index 475524bd900ab73bb473d8428438a34141922c7a..b3732b210593086b6402090df6ed3cf1c4a53d1b 100644 (file)
@@ -475,8 +475,11 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
        vma->vm_flags |= VM_DONTEXPAND;
        for (off = 0; off < size; off += PAGE_SIZE) {
                page = vmalloc_to_page(kcov->area + off);
-               if (vm_insert_page(vma, vma->vm_start + off, page))
-                       WARN_ONCE(1, "vm_insert_page() failed");
+               res = vm_insert_page(vma, vma->vm_start + off, page);
+               if (res) {
+                       pr_warn_once("kcov: vm_insert_page() failed\n");
+                       return res;
+               }
        }
        return 0;
 exit:
index 68480f731192ebe325f607953dbdcc70d2dfbe18..be4b54c2c615c696c6533fb388dc32e4473c5e7f 100644 (file)
@@ -1078,7 +1078,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
                return;
        memset(&prstatus, 0, sizeof(prstatus));
        prstatus.common.pr_pid = current->pid;
-       elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+       elf_core_copy_regs(&prstatus.pr_reg, regs);
        buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
                              &prstatus, sizeof(prstatus));
        final_note(buf);
index dbe57df2e199e789d7014932a60ef9156a447bc8..dd58c0be9ce25675b3523c801f5b96e7963fa780 100644 (file)
@@ -2126,7 +2126,7 @@ static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
        struct kprobe_ctlblk *kcb;
 
        /* The data must NOT be null. This means rethook data structure is broken. */
-       if (WARN_ON_ONCE(!data))
+       if (WARN_ON_ONCE(!data) || !rp->handler)
                return;
 
        __this_cpu_write(current_kprobe, &rp->kp);
index 50265f69a1354f174ca91d92dc9a440e3cb5a943..544fd4097406892d9280b9acb37303e231d0b141 100644 (file)
@@ -1522,5 +1522,4 @@ struct cgroup_subsys_state *kthread_blkcg(void)
        }
        return NULL;
 }
-EXPORT_SYMBOL(kthread_blkcg);
 #endif
index bf8e341e75b4fd0a270d386f65de42c94dd69ac2..1c630e573548df34a28a9061e87b961c2a879573 100644 (file)
@@ -77,31 +77,56 @@ config TASKS_RCU_GENERIC
          This option enables generic infrastructure code supporting
          task-based RCU implementations.  Not for manual selection.
 
+config FORCE_TASKS_RCU
+       bool "Force selection of TASKS_RCU"
+       depends on RCU_EXPERT
+       select TASKS_RCU
+       default n
+       help
+         This option force-enables a task-based RCU implementation
+         that uses only voluntary context switch (not preemption!),
+         idle, and user-mode execution as quiescent states.  Not for
+         manual selection in most cases.
+
 config TASKS_RCU
-       def_bool PREEMPTION
+       bool
+       default n
+       select IRQ_WORK
+
+config FORCE_TASKS_RUDE_RCU
+       bool "Force selection of Tasks Rude RCU"
+       depends on RCU_EXPERT
+       select TASKS_RUDE_RCU
+       default n
        help
-         This option enables a task-based RCU implementation that uses
-         only voluntary context switch (not preemption!), idle, and
-         user-mode execution as quiescent states.  Not for manual selection.
+         This option force-enables a task-based RCU implementation
+         that uses only context switch (including preemption) and
+         user-mode execution as quiescent states.  It forces IPIs and
+         context switches on all online CPUs, including idle ones,
+         so use with caution.  Not for manual selection in most cases.
 
 config TASKS_RUDE_RCU
-       def_bool 0
+       bool
+       default n
+       select IRQ_WORK
+
+config FORCE_TASKS_TRACE_RCU
+       bool "Force selection of Tasks Trace RCU"
+       depends on RCU_EXPERT
+       select TASKS_TRACE_RCU
+       default n
        help
          This option enables a task-based RCU implementation that uses
-         only context switch (including preemption) and user-mode
-         execution as quiescent states.  It forces IPIs and context
-         switches on all online CPUs, including idle ones, so use
-         with caution.
+         explicit rcu_read_lock_trace() read-side markers, and allows
+         these readers to appear in the idle loop as well as on the
+         CPU hotplug code paths.  It can force IPIs on online CPUs,
+         including idle ones, so use with caution.  Not for manual
+         selection in most cases.
 
 config TASKS_TRACE_RCU
-       def_bool 0
+       bool
+       default n
        select IRQ_WORK
-       help
-         This option enables a task-based RCU implementation that uses
-         explicit rcu_read_lock_trace() read-side markers, and allows
-         these readers to appear in the idle loop as well as on the CPU
-         hotplug code paths.  It can force IPIs on online CPUs, including
-         idle ones, so use with caution.
 
 config RCU_STALL_COMMON
        def_bool TREE_RCU
@@ -195,6 +220,20 @@ config RCU_BOOST_DELAY
 
          Accept the default if unsure.
 
+config RCU_EXP_KTHREAD
+       bool "Perform RCU expedited work in a real-time kthread"
+       depends on RCU_BOOST && RCU_EXPERT
+       default !PREEMPT_RT && NR_CPUS <= 32
+       help
+         Use this option to further reduce the latencies of expedited
+         grace periods at the expense of being more disruptive.
+
+         This option is disabled by default on PREEMPT_RT=y kernels which
+         disable expedited grace periods after boot by unconditionally
+         setting rcupdate.rcu_normal_after_boot=1.
+
+         Accept the default if unsure.
+
 config RCU_NOCB_CPU
        bool "Offload RCU callback processing from boot-selected CPUs"
        depends on TREE_RCU
@@ -225,7 +264,7 @@ config RCU_NOCB_CPU
 
 config TASKS_TRACE_RCU_READ_MB
        bool "Tasks Trace RCU readers use memory barriers in user and idle"
-       depends on RCU_EXPERT
+       depends on RCU_EXPERT && TASKS_TRACE_RCU
        default PREEMPT_RT || NR_CPUS < 8
        help
          Use this option to further reduce the number of IPIs sent
index 4fd64999300fc229bea06793861adae6042a697b..9b64e55d4f6159f44eac3017ded4b6c67a1803a1 100644 (file)
@@ -28,9 +28,6 @@ config RCU_SCALE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs performance
@@ -47,9 +44,6 @@ config RCU_TORTURE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs torture tests
@@ -66,9 +60,6 @@ config RCU_REF_SCALE_TEST
        depends on DEBUG_KERNEL
        select TORTURE_TEST
        select SRCU
-       select TASKS_RCU
-       select TASKS_RUDE_RCU
-       select TASKS_TRACE_RCU
        default n
        help
          This option provides a kernel module that runs performance tests
@@ -91,6 +82,20 @@ config RCU_CPU_STALL_TIMEOUT
          RCU grace period persists, additional CPU stall warnings are
          printed at more widely spaced intervals.
 
+config RCU_EXP_CPU_STALL_TIMEOUT
+       int "Expedited RCU CPU stall timeout in milliseconds"
+       depends on RCU_STALL_COMMON
+       range 0 21000
+       default 20 if ANDROID
+       default 0 if !ANDROID
+       help
+         If a given expedited RCU grace period extends more than the
+         specified number of milliseconds, a CPU stall warning is printed.
+         If the RCU grace period persists, additional CPU stall warnings
+         are printed at more widely spaced intervals.  A value of zero
+         says to use the RCU_CPU_STALL_TIMEOUT value converted from
+         seconds to milliseconds.
+
 config RCU_TRACE
        bool "Enable tracing for RCU"
        depends on DEBUG_KERNEL
index 24b5f2c2de87b4c37b0d3c52c3189f26d669b455..152492d52715647f953c97c264954cb3496de786 100644 (file)
@@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
 extern int rcu_cpu_stall_ftrace_dump;
 extern int rcu_cpu_stall_suppress;
 extern int rcu_cpu_stall_timeout;
+extern int rcu_exp_cpu_stall_timeout;
 int rcu_jiffies_till_stall_check(void);
+int rcu_exp_jiffies_till_stall_check(void);
 
 static inline bool rcu_stall_is_suppressed(void)
 {
@@ -523,6 +525,8 @@ static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { ret
 static inline void show_rcu_gp_kthreads(void) { }
 static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
 static inline void rcu_fwd_progress_check(unsigned long j) { }
+static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
+static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
 #else /* #ifdef CONFIG_TINY_RCU */
 bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
 unsigned long rcu_get_gp_seq(void);
@@ -534,14 +538,19 @@ int rcu_get_gp_kthreads_prio(void);
 void rcu_fwd_progress_check(unsigned long j);
 void rcu_force_quiescent_state(void);
 extern struct workqueue_struct *rcu_gp_wq;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+extern struct kthread_worker *rcu_exp_gp_kworker;
+extern struct kthread_worker *rcu_exp_par_gp_kworker;
+#else /* !CONFIG_RCU_EXP_KTHREAD */
 extern struct workqueue_struct *rcu_par_gp_wq;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+void rcu_gp_slow_register(atomic_t *rgssp);
+void rcu_gp_slow_unregister(atomic_t *rgssp);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_RCU_NOCB_CPU
-bool rcu_is_nocb_cpu(int cpu);
 void rcu_bind_current_to_nocb(void);
 #else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
 static inline void rcu_bind_current_to_nocb(void) { }
 #endif
 
index 81145c3ece25fab1f089a2e0eeaee80776d55c23..c54ea2b6a36bc2e969ee59b33b18bec0413fad2d 100644 (file)
@@ -505,10 +505,10 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
                WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
 
        /*
-        * Callbacks moved, so clean up the misordered ->tails[] pointers
-        * that now point into the middle of the list of ready-to-invoke
-        * callbacks.  The overall effect is to copy down the later pointers
-        * into the gap that was created by the now-ready segments.
+        * Callbacks moved, so there might be an empty RCU_WAIT_TAIL
+        * and a non-empty RCU_NEXT_READY_TAIL.  If so, copy the
+        * RCU_NEXT_READY_TAIL segment to fill the RCU_WAIT_TAIL gap
+        * created by the now-ready-to-invoke segments.
         */
        for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
                if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
index 5e4f1f83d38e72663d3760f46d5c0084cd3bb6c0..277a5bfb37d4830bfbc1bdf7407e49548dec3fbe 100644 (file)
@@ -268,6 +268,8 @@ static struct rcu_scale_ops srcud_ops = {
        .name           = "srcud"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks scalability testing.
  */
@@ -295,6 +297,16 @@ static struct rcu_scale_ops tasks_ops = {
        .name           = "tasks"
 };
 
+#define TASKS_OPS &tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for RCU-tasks-trace scalability testing.
  */
@@ -324,6 +336,14 @@ static struct rcu_scale_ops tasks_tracing_ops = {
        .name           = "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
 {
        if (!cur_ops->gp_diff)
@@ -797,7 +817,7 @@ rcu_scale_init(void)
        long i;
        int firsterr = 0;
        static struct rcu_scale_ops *scale_ops[] = {
-               &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
+               &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
        };
 
        if (!torture_init_begin(scale_type, verbose))
index 55d049c39608f581bc8157fc0fbb6264c2ba0bde..7120165a93426c282c7d9f6d3a87a4738ac52905 100644 (file)
@@ -737,6 +737,50 @@ static struct rcu_torture_ops busted_srcud_ops = {
        .name           = "busted_srcud"
 };
 
+/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+               WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+       }
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+       preempt_disable();
+       return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+       preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+       .ttype          = RCU_TRIVIAL_FLAVOR,
+       .init           = rcu_sync_torture_init,
+       .readlock       = rcu_torture_read_lock_trivial,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = rcu_torture_read_unlock_trivial,
+       .readlock_held  = torture_readlock_not_held,
+       .get_gp_seq     = rcu_no_completed,
+       .sync           = synchronize_rcu_trivial,
+       .exp_sync       = synchronize_rcu_trivial,
+       .fqs            = NULL,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "trivial"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks torture testing.
  */
@@ -780,47 +824,16 @@ static struct rcu_torture_ops tasks_ops = {
        .name           = "tasks"
 };
 
-/*
- * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
- * This implementation does not necessarily work well with CPU hotplug.
- */
+#define TASKS_OPS &tasks_ops,
 
-static void synchronize_rcu_trivial(void)
-{
-       int cpu;
+#else // #ifdef CONFIG_TASKS_RCU
 
-       for_each_online_cpu(cpu) {
-               rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
-               WARN_ON_ONCE(raw_smp_processor_id() != cpu);
-       }
-}
+#define TASKS_OPS
 
-static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
-{
-       preempt_disable();
-       return 0;
-}
+#endif // #else #ifdef CONFIG_TASKS_RCU
 
-static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
-{
-       preempt_enable();
-}
 
-static struct rcu_torture_ops trivial_ops = {
-       .ttype          = RCU_TRIVIAL_FLAVOR,
-       .init           = rcu_sync_torture_init,
-       .readlock       = rcu_torture_read_lock_trivial,
-       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
-       .readunlock     = rcu_torture_read_unlock_trivial,
-       .readlock_held  = torture_readlock_not_held,
-       .get_gp_seq     = rcu_no_completed,
-       .sync           = synchronize_rcu_trivial,
-       .exp_sync       = synchronize_rcu_trivial,
-       .fqs            = NULL,
-       .stats          = NULL,
-       .irq_capable    = 1,
-       .name           = "trivial"
-};
+#ifdef CONFIG_TASKS_RUDE_RCU
 
 /*
  * Definitions for rude RCU-tasks torture testing.
@@ -851,6 +864,17 @@ static struct rcu_torture_ops tasks_rude_ops = {
        .name           = "tasks-rude"
 };
 
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_RUDE_RCU
+
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for tracing RCU-tasks torture testing.
  */
@@ -893,6 +917,15 @@ static struct rcu_torture_ops tasks_tracing_ops = {
        .name           = "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_TRACE_RCU
+
+
 static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
 {
        if (!cur_ops->gp_diff)
@@ -1178,7 +1211,7 @@ rcu_torture_writer(void *arg)
                         " GP expediting controlled from boot/sysfs for %s.\n",
                         torture_type, cur_ops->name);
        if (WARN_ONCE(nsynctypes == 0,
-                     "rcu_torture_writer: No update-side primitives.\n")) {
+                     "%s: No update-side primitives.\n", __func__)) {
                /*
                 * No updates primitives, so don't try updating.
                 * The resulting test won't be testing much, hence the
@@ -1186,6 +1219,7 @@ rcu_torture_writer(void *arg)
                 */
                rcu_torture_writer_state = RTWS_STOPPING;
                torture_kthread_stopping("rcu_torture_writer");
+               return 0;
        }
 
        do {
@@ -1322,6 +1356,17 @@ rcu_torture_fakewriter(void *arg)
        VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
        set_user_nice(current, MAX_NICE);
 
+       if (WARN_ONCE(nsynctypes == 0,
+                     "%s: No update-side primitives.\n", __func__)) {
+               /*
+                * No updates primitives, so don't try updating.
+                * The resulting test won't be testing much, hence the
+                * above WARN_ONCE().
+                */
+               torture_kthread_stopping("rcu_torture_fakewriter");
+               return 0;
+       }
+
        do {
                torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
                if (cur_ops->cb_barrier != NULL &&
@@ -2916,10 +2961,12 @@ rcu_torture_cleanup(void)
                        pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
                        cur_ops->cb_barrier();
                }
+               rcu_gp_slow_unregister(NULL);
                return;
        }
        if (!cur_ops) {
                torture_cleanup_end();
+               rcu_gp_slow_unregister(NULL);
                return;
        }
 
@@ -3016,6 +3063,7 @@ rcu_torture_cleanup(void)
        else
                rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
        torture_cleanup_end();
+       rcu_gp_slow_unregister(&rcu_fwd_cb_nodelay);
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -3096,9 +3144,9 @@ rcu_torture_init(void)
        int flags = 0;
        unsigned long gp_seq = 0;
        static struct rcu_torture_ops *torture_ops[] = {
-               &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
-               &busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
-               &tasks_tracing_ops, &trivial_ops,
+               &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
+               TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
+               &trivial_ops,
        };
 
        if (!torture_init_begin(torture_type, verbose))
@@ -3320,6 +3368,7 @@ rcu_torture_init(void)
        if (object_debug)
                rcu_test_debug_objects();
        torture_init_end();
+       rcu_gp_slow_register(&rcu_fwd_cb_nodelay);
        return 0;
 
 unwind:
index 5489ff7f478e4372858693a56be3e95fd148e577..909644abee67fc72a47566c063e1f7493c0bbf95 100644 (file)
@@ -207,6 +207,8 @@ static struct ref_scale_ops srcu_ops = {
        .name           = "srcu"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 // Definitions for RCU Tasks ref scale testing: Empty read markers.
 // These definitions also work for RCU Rude readers.
 static void rcu_tasks_ref_scale_read_section(const int nloops)
@@ -232,6 +234,16 @@ static struct ref_scale_ops rcu_tasks_ops = {
        .name           = "rcu-tasks"
 };
 
+#define RCU_TASKS_OPS &rcu_tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define RCU_TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for RCU Tasks Trace ref scale testing.
 static void rcu_trace_ref_scale_read_section(const int nloops)
 {
@@ -261,6 +273,14 @@ static struct ref_scale_ops rcu_trace_ops = {
        .name           = "rcu-trace"
 };
 
+#define RCU_TRACE_OPS &rcu_trace_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define RCU_TRACE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for reference count
 static atomic_t refcnt;
 
@@ -790,7 +810,7 @@ ref_scale_init(void)
        long i;
        int firsterr = 0;
        static struct ref_scale_ops *scale_ops[] = {
-               &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
+               &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
                &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
        };
 
index 6833d888718169fe307536c738764b0dbbd5e26d..50ba70f019dea0996b1ec28e96191eb9e73c98ff 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/srcu.h>
 
 #include "rcu.h"
@@ -38,6 +39,35 @@ module_param(exp_holdoff, ulong, 0444);
 static ulong counter_wrap_check = (ULONG_MAX >> 2);
 module_param(counter_wrap_check, ulong, 0444);
 
+/*
+ * Control conversion to SRCU_SIZE_BIG:
+ *    0: Don't convert at all.
+ *    1: Convert at init_srcu_struct() time.
+ *    2: Convert when rcutorture invokes srcu_torture_stats_print().
+ *    3: Decide at boot time based on system shape (default).
+ * 0x1x: Convert when excessive contention encountered.
+ */
+#define SRCU_SIZING_NONE       0
+#define SRCU_SIZING_INIT       1
+#define SRCU_SIZING_TORTURE    2
+#define SRCU_SIZING_AUTO       3
+#define SRCU_SIZING_CONTEND    0x10
+#define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x)
+#define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE))
+#define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT))
+#define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE))
+#define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND)
+static int convert_to_big = SRCU_SIZING_AUTO;
+module_param(convert_to_big, int, 0444);
+
+/* Number of CPUs to trigger init_srcu_struct()-time transition to big. */
+static int big_cpu_lim __read_mostly = 128;
+module_param(big_cpu_lim, int, 0444);
+
+/* Contention events per jiffy to initiate transition to big. */
+static int small_contention_lim __read_mostly = 100;
+module_param(small_contention_lim, int, 0444);
+
 /* Early-boot callback-management, so early that no lock is required! */
 static LIST_HEAD(srcu_boot_list);
 static bool __read_mostly srcu_init_done;
@@ -48,39 +78,90 @@ static void process_srcu(struct work_struct *work);
 static void srcu_delay_timer(struct timer_list *t);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
-#define spin_lock_rcu_node(p)                                  \
-do {                                                                   \
-       spin_lock(&ACCESS_PRIVATE(p, lock));                    \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_rcu_node(p)                                                  \
+do {                                                                           \
+       spin_lock(&ACCESS_PRIVATE(p, lock));                                    \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
 #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
 
-#define spin_lock_irq_rcu_node(p)                                      \
-do {                                                                   \
-       spin_lock_irq(&ACCESS_PRIVATE(p, lock));                        \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_irq_rcu_node(p)                                              \
+do {                                                                           \
+       spin_lock_irq(&ACCESS_PRIVATE(p, lock));                                \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
-#define spin_unlock_irq_rcu_node(p)                                    \
+#define spin_unlock_irq_rcu_node(p)                                            \
        spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
 
-#define spin_lock_irqsave_rcu_node(p, flags)                   \
-do {                                                                   \
-       spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);     \
-       smp_mb__after_unlock_lock();                                    \
+#define spin_lock_irqsave_rcu_node(p, flags)                                   \
+do {                                                                           \
+       spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);                     \
+       smp_mb__after_unlock_lock();                                            \
 } while (0)
 
-#define spin_unlock_irqrestore_rcu_node(p, flags)                      \
-       spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
+#define spin_trylock_irqsave_rcu_node(p, flags)                                        \
+({                                                                             \
+       bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+                                                                               \
+       if (___locked)                                                          \
+               smp_mb__after_unlock_lock();                                    \
+       ___locked;                                                              \
+})
+
+#define spin_unlock_irqrestore_rcu_node(p, flags)                              \
+       spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)                 \
 
 /*
- * Initialize SRCU combining tree.  Note that statically allocated
+ * Initialize SRCU per-CPU data.  Note that statically allocated
  * srcu_struct structures might already have srcu_read_lock() and
  * srcu_read_unlock() running against them.  So if the is_static parameter
  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
  */
-static void init_srcu_struct_nodes(struct srcu_struct *ssp)
+static void init_srcu_struct_data(struct srcu_struct *ssp)
+{
+       int cpu;
+       struct srcu_data *sdp;
+
+       /*
+        * Initialize the per-CPU srcu_data array, which feeds into the
+        * leaves of the srcu_node tree.
+        */
+       WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
+                    ARRAY_SIZE(sdp->srcu_unlock_count));
+       for_each_possible_cpu(cpu) {
+               sdp = per_cpu_ptr(ssp->sda, cpu);
+               spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+               rcu_segcblist_init(&sdp->srcu_cblist);
+               sdp->srcu_cblist_invoking = false;
+               sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
+               sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+               sdp->mynode = NULL;
+               sdp->cpu = cpu;
+               INIT_WORK(&sdp->work, srcu_invoke_callbacks);
+               timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
+               sdp->ssp = ssp;
+       }
+}
+
+/* Invalid seq state, used during snp node initialization */
+#define SRCU_SNP_INIT_SEQ              0x2
+
+/*
+ * Check whether sequence number corresponding to snp node,
+ * is invalid.
+ */
+static inline bool srcu_invl_snp_seq(unsigned long s)
+{
+       return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
+}
+
+/*
+ * Allocated and initialize SRCU combining tree.  Returns @true if
+ * allocation succeeded and @false otherwise.
+ */
+static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
 {
        int cpu;
        int i;
@@ -92,6 +173,9 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
 
        /* Initialize geometry if it has not already been initialized. */
        rcu_init_geometry();
+       ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags);
+       if (!ssp->node)
+               return false;
 
        /* Work out the overall tree geometry. */
        ssp->level[0] = &ssp->node[0];
@@ -105,10 +189,10 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
                WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
                             ARRAY_SIZE(snp->srcu_data_have_cbs));
                for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
-                       snp->srcu_have_cbs[i] = 0;
+                       snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
                        snp->srcu_data_have_cbs[i] = 0;
                }
-               snp->srcu_gp_seq_needed_exp = 0;
+               snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
                snp->grplo = -1;
                snp->grphi = -1;
                if (snp == &ssp->node[0]) {
@@ -129,39 +213,31 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
         * Initialize the per-CPU srcu_data array, which feeds into the
         * leaves of the srcu_node tree.
         */
-       WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
-                    ARRAY_SIZE(sdp->srcu_unlock_count));
        level = rcu_num_lvls - 1;
        snp_first = ssp->level[level];
        for_each_possible_cpu(cpu) {
                sdp = per_cpu_ptr(ssp->sda, cpu);
-               spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
-               rcu_segcblist_init(&sdp->srcu_cblist);
-               sdp->srcu_cblist_invoking = false;
-               sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
-               sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
                sdp->mynode = &snp_first[cpu / levelspread[level]];
                for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
                        if (snp->grplo < 0)
                                snp->grplo = cpu;
                        snp->grphi = cpu;
                }
-               sdp->cpu = cpu;
-               INIT_WORK(&sdp->work, srcu_invoke_callbacks);
-               timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
-               sdp->ssp = ssp;
                sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
        }
+       smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
+       return true;
 }
 
 /*
  * Initialize non-compile-time initialized fields, including the
- * associated srcu_node and srcu_data structures.  The is_static
- * parameter is passed through to init_srcu_struct_nodes(), and
- * also tells us that ->sda has already been wired up to srcu_data.
+ * associated srcu_node and srcu_data structures.  The is_static parameter
+ * tells us that ->sda has already been wired up to srcu_data.
  */
 static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 {
+       ssp->srcu_size_state = SRCU_SIZE_SMALL;
+       ssp->node = NULL;
        mutex_init(&ssp->srcu_cb_mutex);
        mutex_init(&ssp->srcu_gp_mutex);
        ssp->srcu_idx = 0;
@@ -170,13 +246,25 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
        mutex_init(&ssp->srcu_barrier_mutex);
        atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
        INIT_DELAYED_WORK(&ssp->work, process_srcu);
+       ssp->sda_is_static = is_static;
        if (!is_static)
                ssp->sda = alloc_percpu(struct srcu_data);
        if (!ssp->sda)
                return -ENOMEM;
-       init_srcu_struct_nodes(ssp);
+       init_srcu_struct_data(ssp);
        ssp->srcu_gp_seq_needed_exp = 0;
        ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+       if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
+               if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
+                       if (!ssp->sda_is_static) {
+                               free_percpu(ssp->sda);
+                               ssp->sda = NULL;
+                               return -ENOMEM;
+                       }
+               } else {
+                       WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG);
+               }
+       }
        smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
        return 0;
 }
@@ -213,6 +301,86 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/*
+ * Initiate a transition to SRCU_SIZE_BIG with lock held.
+ */
+static void __srcu_transition_to_big(struct srcu_struct *ssp)
+{
+       lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
+       smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC);
+}
+
+/*
+ * Initiate an idempotent transition to SRCU_SIZE_BIG.
+ */
+static void srcu_transition_to_big(struct srcu_struct *ssp)
+{
+       unsigned long flags;
+
+       /* Double-checked locking on ->srcu_size-state. */
+       if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL)
+               return;
+       spin_lock_irqsave_rcu_node(ssp, flags);
+       if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) {
+               spin_unlock_irqrestore_rcu_node(ssp, flags);
+               return;
+       }
+       __srcu_transition_to_big(ssp);
+       spin_unlock_irqrestore_rcu_node(ssp, flags);
+}
+
+/*
+ * Check to see if the just-encountered contention event justifies
+ * a transition to SRCU_SIZE_BIG.
+ */
+static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
+{
+       unsigned long j;
+
+       if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state)
+               return;
+       j = jiffies;
+       if (ssp->srcu_size_jiffies != j) {
+               ssp->srcu_size_jiffies = j;
+               ssp->srcu_n_lock_retries = 0;
+       }
+       if (++ssp->srcu_n_lock_retries <= small_contention_lim)
+               return;
+       __srcu_transition_to_big(ssp);
+}
+
+/*
+ * Acquire the specified srcu_data structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG.  But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
+{
+       struct srcu_struct *ssp = sdp->ssp;
+
+       if (spin_trylock_irqsave_rcu_node(sdp, *flags))
+               return;
+       spin_lock_irqsave_rcu_node(ssp, *flags);
+       spin_lock_irqsave_check_contention(ssp);
+       spin_unlock_irqrestore_rcu_node(ssp, *flags);
+       spin_lock_irqsave_rcu_node(sdp, *flags);
+}
+
+/*
+ * Acquire the specified srcu_struct structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG.  But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
+{
+       if (spin_trylock_irqsave_rcu_node(ssp, *flags))
+               return;
+       spin_lock_irqsave_rcu_node(ssp, *flags);
+       spin_lock_irqsave_check_contention(ssp);
+}
+
 /*
  * First-use initialization of statically allocated srcu_struct
  * structure.  Wiring up the combining tree is more than can be
@@ -343,7 +511,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
        return sum;
 }
 
-#define SRCU_INTERVAL          1
+#define SRCU_INTERVAL          1       // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL      10      // Maximum incremental delay from slow readers.
+#define SRCU_MAX_NODELAY_PHASE 1       // Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_MAX_NODELAY       100     // Maximum consecutive no-delay instances.
 
 /*
  * Return grace-period delay, zero if there are expedited grace
@@ -351,10 +522,18 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
  */
 static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 {
-       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
-                        READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
-               return 0;
-       return SRCU_INTERVAL;
+       unsigned long jbase = SRCU_INTERVAL;
+
+       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+               jbase = 0;
+       if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
+               jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
+       if (!jbase) {
+               WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+               if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+                       jbase = 1;
+       }
+       return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
 }
 
 /**
@@ -382,13 +561,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
                        return; /* Forgot srcu_barrier(), so just leak it! */
        }
        if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+           WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
            WARN_ON(srcu_readers_active(ssp))) {
-               pr_info("%s: Active srcu_struct %p state: %d\n",
-                       __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
+               pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
+                       __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
+                       rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
                return; /* Caller forgot to stop doing call_srcu()? */
        }
-       free_percpu(ssp->sda);
-       ssp->sda = NULL;
+       if (!ssp->sda_is_static) {
+               free_percpu(ssp->sda);
+               ssp->sda = NULL;
+       }
+       kfree(ssp->node);
+       ssp->node = NULL;
+       ssp->srcu_size_state = SRCU_SIZE_SMALL;
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
@@ -434,9 +620,13 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 static void srcu_gp_start(struct srcu_struct *ssp)
 {
-       struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
+       struct srcu_data *sdp;
        int state;
 
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               sdp = per_cpu_ptr(ssp->sda, 0);
+       else
+               sdp = this_cpu_ptr(ssp->sda);
        lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
        WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
        spin_lock_rcu_node(sdp);  /* Interrupts already disabled. */
@@ -445,6 +635,8 @@ static void srcu_gp_start(struct srcu_struct *ssp)
        (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
                                       rcu_seq_snap(&ssp->srcu_gp_seq));
        spin_unlock_rcu_node(sdp);  /* Interrupts remain disabled. */
+       WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+       WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
        smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
        rcu_seq_start(&ssp->srcu_gp_seq);
        state = rcu_seq_state(ssp->srcu_gp_seq);
@@ -517,7 +709,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        int idx;
        unsigned long mask;
        struct srcu_data *sdp;
+       unsigned long sgsne;
        struct srcu_node *snp;
+       int ss_state;
 
        /* Prevent more than one additional grace period. */
        mutex_lock(&ssp->srcu_cb_mutex);
@@ -526,7 +720,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        spin_lock_irq_rcu_node(ssp);
        idx = rcu_seq_state(ssp->srcu_gp_seq);
        WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-       cbdelay = srcu_get_delay(ssp);
+       cbdelay = !!srcu_get_delay(ssp);
        WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
        rcu_seq_end(&ssp->srcu_gp_seq);
        gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -537,38 +731,45 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        /* A new grace period can start at this point.  But only one. */
 
        /* Initiate callback invocation as needed. */
-       idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
-       srcu_for_each_node_breadth_first(ssp, snp) {
-               spin_lock_irq_rcu_node(snp);
-               cbs = false;
-               last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
-               if (last_lvl)
-                       cbs = snp->srcu_have_cbs[idx] == gpseq;
-               snp->srcu_have_cbs[idx] = gpseq;
-               rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
-               if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
-                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
-               mask = snp->srcu_data_have_cbs[idx];
-               snp->srcu_data_have_cbs[idx] = 0;
-               spin_unlock_irq_rcu_node(snp);
-               if (cbs)
-                       srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
-
-               /* Occasionally prevent srcu_data counter wrap. */
-               if (!(gpseq & counter_wrap_check) && last_lvl)
-                       for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
-                               sdp = per_cpu_ptr(ssp->sda, cpu);
-                               spin_lock_irqsave_rcu_node(sdp, flags);
-                               if (ULONG_CMP_GE(gpseq,
-                                                sdp->srcu_gp_seq_needed + 100))
-                                       sdp->srcu_gp_seq_needed = gpseq;
-                               if (ULONG_CMP_GE(gpseq,
-                                                sdp->srcu_gp_seq_needed_exp + 100))
-                                       sdp->srcu_gp_seq_needed_exp = gpseq;
-                               spin_unlock_irqrestore_rcu_node(sdp, flags);
-                       }
+       ss_state = smp_load_acquire(&ssp->srcu_size_state);
+       if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
+               srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
+       } else {
+               idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
+               srcu_for_each_node_breadth_first(ssp, snp) {
+                       spin_lock_irq_rcu_node(snp);
+                       cbs = false;
+                       last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
+                       if (last_lvl)
+                               cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
+                       snp->srcu_have_cbs[idx] = gpseq;
+                       rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, gpseq))
+                               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
+                       if (ss_state < SRCU_SIZE_BIG)
+                               mask = ~0;
+                       else
+                               mask = snp->srcu_data_have_cbs[idx];
+                       snp->srcu_data_have_cbs[idx] = 0;
+                       spin_unlock_irq_rcu_node(snp);
+                       if (cbs)
+                               srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
+               }
        }
 
+       /* Occasionally prevent srcu_data counter wrap. */
+       if (!(gpseq & counter_wrap_check))
+               for_each_possible_cpu(cpu) {
+                       sdp = per_cpu_ptr(ssp->sda, cpu);
+                       spin_lock_irqsave_rcu_node(sdp, flags);
+                       if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
+                               sdp->srcu_gp_seq_needed = gpseq;
+                       if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
+                               sdp->srcu_gp_seq_needed_exp = gpseq;
+                       spin_unlock_irqrestore_rcu_node(sdp, flags);
+               }
+
        /* Callback initiation done, allow grace periods after next. */
        mutex_unlock(&ssp->srcu_cb_mutex);
 
@@ -583,6 +784,14 @@ static void srcu_gp_end(struct srcu_struct *ssp)
        } else {
                spin_unlock_irq_rcu_node(ssp);
        }
+
+       /* Transition to big if needed. */
+       if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) {
+               if (ss_state == SRCU_SIZE_ALLOC)
+                       init_srcu_struct_nodes(ssp, GFP_KERNEL);
+               else
+                       smp_store_release(&ssp->srcu_size_state, ss_state + 1);
+       }
 }
 
 /*
@@ -596,20 +805,24 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
                                  unsigned long s)
 {
        unsigned long flags;
+       unsigned long sgsne;
 
-       for (; snp != NULL; snp = snp->srcu_parent) {
-               if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
-                   ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
-                       return;
-               spin_lock_irqsave_rcu_node(snp, flags);
-               if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
+       if (snp)
+               for (; snp != NULL; snp = snp->srcu_parent) {
+                       sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
+                       if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
+                           (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
+                               return;
+                       spin_lock_irqsave_rcu_node(snp, flags);
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) {
+                               spin_unlock_irqrestore_rcu_node(snp, flags);
+                               return;
+                       }
+                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
                        spin_unlock_irqrestore_rcu_node(snp, flags);
-                       return;
                }
-               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-               spin_unlock_irqrestore_rcu_node(snp, flags);
-       }
-       spin_lock_irqsave_rcu_node(ssp, flags);
+       spin_lock_irqsave_ssp_contention(ssp, &flags);
        if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
                WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
        spin_unlock_irqrestore_rcu_node(ssp, flags);
@@ -630,39 +843,47 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 {
        unsigned long flags;
        int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
-       struct srcu_node *snp = sdp->mynode;
+       unsigned long sgsne;
+       struct srcu_node *snp;
+       struct srcu_node *snp_leaf;
        unsigned long snp_seq;
 
-       /* Each pass through the loop does one level of the srcu_node tree. */
-       for (; snp != NULL; snp = snp->srcu_parent) {
-               if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
-                       return; /* GP already done and CBs recorded. */
-               spin_lock_irqsave_rcu_node(snp, flags);
-               if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
+       /* Ensure that snp node tree is fully initialized before traversing it */
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               snp_leaf = NULL;
+       else
+               snp_leaf = sdp->mynode;
+
+       if (snp_leaf)
+               /* Each pass through the loop does one level of the srcu_node tree. */
+               for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
+                       if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
+                               return; /* GP already done and CBs recorded. */
+                       spin_lock_irqsave_rcu_node(snp, flags);
                        snp_seq = snp->srcu_have_cbs[idx];
-                       if (snp == sdp->mynode && snp_seq == s)
-                               snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-                       spin_unlock_irqrestore_rcu_node(snp, flags);
-                       if (snp == sdp->mynode && snp_seq != s) {
-                               srcu_schedule_cbs_sdp(sdp, do_norm
-                                                          ? SRCU_INTERVAL
-                                                          : 0);
+                       if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
+                               if (snp == snp_leaf && snp_seq == s)
+                                       snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+                               spin_unlock_irqrestore_rcu_node(snp, flags);
+                               if (snp == snp_leaf && snp_seq != s) {
+                                       srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0);
+                                       return;
+                               }
+                               if (!do_norm)
+                                       srcu_funnel_exp_start(ssp, snp, s);
                                return;
                        }
-                       if (!do_norm)
-                               srcu_funnel_exp_start(ssp, snp, s);
-                       return;
+                       snp->srcu_have_cbs[idx] = s;
+                       if (snp == snp_leaf)
+                               snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+                       sgsne = snp->srcu_gp_seq_needed_exp;
+                       if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
+                               WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
+                       spin_unlock_irqrestore_rcu_node(snp, flags);
                }
-               snp->srcu_have_cbs[idx] = s;
-               if (snp == sdp->mynode)
-                       snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-               if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
-                       WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-               spin_unlock_irqrestore_rcu_node(snp, flags);
-       }
 
        /* Top of tree, must ensure the grace period will be started. */
-       spin_lock_irqsave_rcu_node(ssp, flags);
+       spin_lock_irqsave_ssp_contention(ssp, &flags);
        if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
                /*
                 * Record need for grace period s.  Pair with load
@@ -678,9 +899,15 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
            rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
                WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
                srcu_gp_start(ssp);
+
+               // And how can that list_add() in the "else" clause
+               // possibly be safe for concurrent execution?  Well,
+               // it isn't.  And it does not have to be.  After all, it
+               // can only be executed during early boot when there is only
+               // the one boot CPU running with interrupts still disabled.
                if (likely(srcu_init_done))
                        queue_delayed_work(rcu_gp_wq, &ssp->work,
-                                          srcu_get_delay(ssp));
+                                          !!srcu_get_delay(ssp));
                else if (list_empty(&ssp->work.work.entry))
                        list_add(&ssp->work.work.entry, &srcu_boot_list);
        }
@@ -814,11 +1041,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
        bool needgp = false;
        unsigned long s;
        struct srcu_data *sdp;
+       struct srcu_node *sdp_mynode;
+       int ss_state;
 
        check_init_srcu_struct(ssp);
        idx = srcu_read_lock(ssp);
-       sdp = raw_cpu_ptr(ssp->sda);
-       spin_lock_irqsave_rcu_node(sdp, flags);
+       ss_state = smp_load_acquire(&ssp->srcu_size_state);
+       if (ss_state < SRCU_SIZE_WAIT_CALL)
+               sdp = per_cpu_ptr(ssp->sda, 0);
+       else
+               sdp = raw_cpu_ptr(ssp->sda);
+       spin_lock_irqsave_sdp_contention(sdp, &flags);
        if (rhp)
                rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
        rcu_segcblist_advance(&sdp->srcu_cblist,
@@ -834,10 +1067,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
                needexp = true;
        }
        spin_unlock_irqrestore_rcu_node(sdp, flags);
+
+       /* Ensure that snp node tree is fully initialized before traversing it */
+       if (ss_state < SRCU_SIZE_WAIT_BARRIER)
+               sdp_mynode = NULL;
+       else
+               sdp_mynode = sdp->mynode;
+
        if (needgp)
                srcu_funnel_gp_start(ssp, sdp, s, do_norm);
        else if (needexp)
-               srcu_funnel_exp_start(ssp, sdp->mynode, s);
+               srcu_funnel_exp_start(ssp, sdp_mynode, s);
        srcu_read_unlock(ssp, idx);
        return s;
 }
@@ -1097,6 +1337,28 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
                complete(&ssp->srcu_barrier_completion);
 }
 
+/*
+ * Enqueue an srcu_barrier() callback on the specified srcu_data
+ * structure's ->cblist.  but only if that ->cblist already has at least one
+ * callback enqueued.  Note that if a CPU already has callbacks enqueue,
+ * it must have already registered the need for a future grace period,
+ * so all we need do is enqueue a callback that will use the same grace
+ * period as the last callback already in the queue.
+ */
+static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
+{
+       spin_lock_irq_rcu_node(sdp);
+       atomic_inc(&ssp->srcu_barrier_cpu_cnt);
+       sdp->srcu_barrier_head.func = srcu_barrier_cb;
+       debug_rcu_head_queue(&sdp->srcu_barrier_head);
+       if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
+                                  &sdp->srcu_barrier_head)) {
+               debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
+               atomic_dec(&ssp->srcu_barrier_cpu_cnt);
+       }
+       spin_unlock_irq_rcu_node(sdp);
+}
+
 /**
  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
  * @ssp: srcu_struct on which to wait for in-flight callbacks.
@@ -1104,7 +1366,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
 void srcu_barrier(struct srcu_struct *ssp)
 {
        int cpu;
-       struct srcu_data *sdp;
+       int idx;
        unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
 
        check_init_srcu_struct(ssp);
@@ -1120,27 +1382,13 @@ void srcu_barrier(struct srcu_struct *ssp)
        /* Initial count prevents reaching zero until all CBs are posted. */
        atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
-       /*
-        * Each pass through this loop enqueues a callback, but only
-        * on CPUs already having callbacks enqueued.  Note that if
-        * a CPU already has callbacks enqueue, it must have already
-        * registered the need for a future grace period, so all we
-        * need do is enqueue a callback that will use the same
-        * grace period as the last callback already in the queue.
-        */
-       for_each_possible_cpu(cpu) {
-               sdp = per_cpu_ptr(ssp->sda, cpu);
-               spin_lock_irq_rcu_node(sdp);
-               atomic_inc(&ssp->srcu_barrier_cpu_cnt);
-               sdp->srcu_barrier_head.func = srcu_barrier_cb;
-               debug_rcu_head_queue(&sdp->srcu_barrier_head);
-               if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
-                                          &sdp->srcu_barrier_head)) {
-                       debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
-                       atomic_dec(&ssp->srcu_barrier_cpu_cnt);
-               }
-               spin_unlock_irq_rcu_node(sdp);
-       }
+       idx = srcu_read_lock(ssp);
+       if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+               srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
+       else
+               for_each_possible_cpu(cpu)
+                       srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
+       srcu_read_unlock(ssp, idx);
 
        /* Remove the initial count, at which point reaching zero can happen. */
        if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
@@ -1214,6 +1462,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
                srcu_flip(ssp);
                spin_lock_irq_rcu_node(ssp);
                rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+               ssp->srcu_n_exp_nodelay = 0;
                spin_unlock_irq_rcu_node(ssp);
        }
 
@@ -1228,6 +1477,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
                        mutex_unlock(&ssp->srcu_gp_mutex);
                        return; /* readers present, retry later. */
                }
+               ssp->srcu_n_exp_nodelay = 0;
                srcu_gp_end(ssp);  /* Releases ->srcu_gp_mutex. */
        }
 }
@@ -1318,12 +1568,28 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
  */
 static void process_srcu(struct work_struct *work)
 {
+       unsigned long curdelay;
+       unsigned long j;
        struct srcu_struct *ssp;
 
        ssp = container_of(work, struct srcu_struct, work.work);
 
        srcu_advance_state(ssp);
-       srcu_reschedule(ssp, srcu_get_delay(ssp));
+       curdelay = srcu_get_delay(ssp);
+       if (curdelay) {
+               WRITE_ONCE(ssp->reschedule_count, 0);
+       } else {
+               j = jiffies;
+               if (READ_ONCE(ssp->reschedule_jiffies) == j) {
+                       WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
+                       if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+                               curdelay = 1;
+               } else {
+                       WRITE_ONCE(ssp->reschedule_count, 1);
+                       WRITE_ONCE(ssp->reschedule_jiffies, j);
+               }
+       }
+       srcu_reschedule(ssp, curdelay);
 }
 
 void srcutorture_get_gp_data(enum rcutorture_type test_type,
@@ -1337,43 +1603,69 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
 
+static const char * const srcu_size_state_name[] = {
+       "SRCU_SIZE_SMALL",
+       "SRCU_SIZE_ALLOC",
+       "SRCU_SIZE_WAIT_BARRIER",
+       "SRCU_SIZE_WAIT_CALL",
+       "SRCU_SIZE_WAIT_CBS1",
+       "SRCU_SIZE_WAIT_CBS2",
+       "SRCU_SIZE_WAIT_CBS3",
+       "SRCU_SIZE_WAIT_CBS4",
+       "SRCU_SIZE_BIG",
+       "SRCU_SIZE_???",
+};
+
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 {
        int cpu;
        int idx;
        unsigned long s0 = 0, s1 = 0;
+       int ss_state = READ_ONCE(ssp->srcu_size_state);
+       int ss_state_idx = ss_state;
 
        idx = ssp->srcu_idx & 0x1;
-       pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
-                tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
-       for_each_possible_cpu(cpu) {
-               unsigned long l0, l1;
-               unsigned long u0, u1;
-               long c0, c1;
-               struct srcu_data *sdp;
-
-               sdp = per_cpu_ptr(ssp->sda, cpu);
-               u0 = data_race(sdp->srcu_unlock_count[!idx]);
-               u1 = data_race(sdp->srcu_unlock_count[idx]);
-
-               /*
-                * Make sure that a lock is always counted if the corresponding
-                * unlock is counted.
-                */
-               smp_rmb();
-
-               l0 = data_race(sdp->srcu_lock_count[!idx]);
-               l1 = data_race(sdp->srcu_lock_count[idx]);
-
-               c0 = l0 - u0;
-               c1 = l1 - u1;
-               pr_cont(" %d(%ld,%ld %c)",
-                       cpu, c0, c1,
-                       "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
-               s0 += c0;
-               s1 += c1;
+       if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
+               ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
+       pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
+                tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+                srcu_size_state_name[ss_state_idx]);
+       if (!ssp->sda) {
+               // Called after cleanup_srcu_struct(), perhaps.
+               pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
+       } else {
+               pr_cont(" per-CPU(idx=%d):", idx);
+               for_each_possible_cpu(cpu) {
+                       unsigned long l0, l1;
+                       unsigned long u0, u1;
+                       long c0, c1;
+                       struct srcu_data *sdp;
+
+                       sdp = per_cpu_ptr(ssp->sda, cpu);
+                       u0 = data_race(sdp->srcu_unlock_count[!idx]);
+                       u1 = data_race(sdp->srcu_unlock_count[idx]);
+
+                       /*
+                        * Make sure that a lock is always counted if the corresponding
+                        * unlock is counted.
+                        */
+                       smp_rmb();
+
+                       l0 = data_race(sdp->srcu_lock_count[!idx]);
+                       l1 = data_race(sdp->srcu_lock_count[idx]);
+
+                       c0 = l0 - u0;
+                       c1 = l1 - u1;
+                       pr_cont(" %d(%ld,%ld %c)",
+                               cpu, c0, c1,
+                               "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
+                       s0 += c0;
+                       s1 += c1;
+               }
+               pr_cont(" T(%ld,%ld)\n", s0, s1);
        }
-       pr_cont(" T(%ld,%ld)\n", s0, s1);
+       if (SRCU_SIZING_IS_TORTURE())
+               srcu_transition_to_big(ssp);
 }
 EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
 
@@ -1390,6 +1682,17 @@ void __init srcu_init(void)
 {
        struct srcu_struct *ssp;
 
+       /* Decide on srcu_struct-size strategy. */
+       if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) {
+               if (nr_cpu_ids >= big_cpu_lim) {
+                       convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention.
+                       pr_info("%s: Setting srcu_struct sizes to big.\n", __func__);
+               } else {
+                       convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND;
+                       pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__);
+               }
+       }
+
        /*
         * Once that is set, call_srcu() can follow the normal path and
         * queue delayed work. This must follow RCU workqueues creation
@@ -1400,6 +1703,8 @@ void __init srcu_init(void)
                ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
                                      work.work.entry);
                list_del_init(&ssp->work.work.entry);
+               if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL)
+                       ssp->srcu_size_state = SRCU_SIZE_ALLOC;
                queue_work(rcu_gp_wq, &ssp->work.work);
        }
 }
index 33d896d8590233e59fc8b73f626a45f20429e318..5cefc702158fefe6c0d8e1d1dba054501cd89183 100644 (file)
@@ -111,7 +111,7 @@ static void rcu_sync_func(struct rcu_head *rhp)
  * a slowpath during the update.  After this function returns, all
  * subsequent calls to rcu_sync_is_idle() will return false, which
  * tells readers to stay off their fastpaths.  A later call to
- * rcu_sync_exit() re-enables reader slowpaths.
+ * rcu_sync_exit() re-enables reader fastpaths.
  *
  * When called in isolation, rcu_sync_enter() must wait for a grace
  * period, however, closely spaced calls to rcu_sync_enter() can
index 99cf3a13954cfb17828fbbeeb884f11614a526a9..3925e32159b5a588c0afe195fe40be40c7e687d2 100644 (file)
@@ -46,7 +46,7 @@ struct rcu_tasks_percpu {
 
 /**
  * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
- * @cbs_wq: Wait queue allowing new callback to get kthread's attention.
+ * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
  * @cbs_gbl_lock: Lock protecting callback list.
  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
@@ -77,7 +77,7 @@ struct rcu_tasks_percpu {
  * @kname: This flavor's kthread name.
  */
 struct rcu_tasks {
-       struct wait_queue_head cbs_wq;
+       struct rcuwait cbs_wait;
        raw_spinlock_t cbs_gbl_lock;
        int gp_state;
        int gp_sleep;
@@ -113,11 +113,11 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
 #define DEFINE_RCU_TASKS(rt_name, gp, call, n)                                         \
 static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {                        \
        .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock),            \
-       .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup),                        \
+       .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),                   \
 };                                                                                     \
 static struct rcu_tasks rt_name =                                                      \
 {                                                                                      \
-       .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),                        \
+       .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait),                                \
        .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock),                 \
        .gp_func = gp,                                                                  \
        .call_func = call,                                                              \
@@ -143,6 +143,11 @@ module_param(rcu_task_ipi_delay, int, 0644);
 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
+#define RCU_TASK_STALL_INFO (HZ * 10)
+static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO;
+module_param(rcu_task_stall_info, int, 0644);
+static int rcu_task_stall_info_mult __read_mostly = 3;
+module_param(rcu_task_stall_info_mult, int, 0444);
 
 static int rcu_task_enqueue_lim __read_mostly = -1;
 module_param(rcu_task_enqueue_lim, int, 0444);
@@ -261,14 +266,16 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
        struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
 
        rtp = rtpcp->rtpp;
-       wake_up(&rtp->cbs_wq);
+       rcuwait_wake_up(&rtp->cbs_wait);
 }
 
 // Enqueue a callback for the specified flavor of Tasks RCU.
 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
                                   struct rcu_tasks *rtp)
 {
+       int chosen_cpu;
        unsigned long flags;
+       int ideal_cpu;
        unsigned long j;
        bool needadjust = false;
        bool needwake;
@@ -278,8 +285,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
        rhp->func = func;
        local_irq_save(flags);
        rcu_read_lock();
-       rtpcp = per_cpu_ptr(rtp->rtpcpu,
-                           smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift));
+       ideal_cpu = smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift);
+       chosen_cpu = cpumask_next(ideal_cpu - 1, cpu_possible_mask);
+       rtpcp = per_cpu_ptr(rtp->rtpcpu, chosen_cpu);
        if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
                raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
                j = jiffies;
@@ -460,7 +468,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
                }
        }
 
-       if (rcu_segcblist_empty(&rtpcp->cblist))
+       if (rcu_segcblist_empty(&rtpcp->cblist) || !cpu_possible(cpu))
                return;
        raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
        rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
@@ -509,7 +517,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
                set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
 
                /* If there were none, wait a bit and start over. */
-               wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
+               rcuwait_wait_event(&rtp->cbs_wait,
+                                  (needgpcb = rcu_tasks_need_gpcb(rtp)),
+                                  TASK_IDLE);
 
                if (needgpcb & 0x2) {
                        // Wait for one grace period.
@@ -548,8 +558,15 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 static void __init rcu_tasks_bootup_oddness(void)
 {
 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+       int rtsimc;
+
        if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
                pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+       rtsimc = clamp(rcu_task_stall_info_mult, 1, 10);
+       if (rtsimc != rcu_task_stall_info_mult) {
+               pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc);
+               rcu_task_stall_info_mult = rtsimc;
+       }
 #endif /* #ifdef CONFIG_TASKS_RCU */
 #ifdef CONFIG_TASKS_RCU
        pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
@@ -568,7 +585,17 @@ static void __init rcu_tasks_bootup_oddness(void)
 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
-       struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...
+       int cpu;
+       bool havecbs = false;
+
+       for_each_possible_cpu(cpu) {
+               struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+               if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+                       havecbs = true;
+                       break;
+               }
+       }
        pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
                rtp->kname,
                tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
@@ -576,7 +603,7 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
                data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
                data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
                ".k"[!!data_race(rtp->kthread_ptr)],
-               ".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],
+               ".C"[havecbs],
                s);
 }
 #endif // #ifndef CONFIG_TINY_RCU
@@ -592,10 +619,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t);
 /* Wait for one RCU-tasks grace period. */
 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 {
-       struct task_struct *g, *t;
-       unsigned long lastreport;
-       LIST_HEAD(holdouts);
+       struct task_struct *g;
        int fract;
+       LIST_HEAD(holdouts);
+       unsigned long j;
+       unsigned long lastinfo;
+       unsigned long lastreport;
+       bool reported = false;
+       int rtsi;
+       struct task_struct *t;
 
        set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
        rtp->pregp_func();
@@ -621,30 +653,50 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
         * is empty, we are done.
         */
        lastreport = jiffies;
+       lastinfo = lastreport;
+       rtsi = READ_ONCE(rcu_task_stall_info);
 
        // Start off with initial wait and slowly back off to 1 HZ wait.
        fract = rtp->init_fract;
 
        while (!list_empty(&holdouts)) {
+               ktime_t exp;
                bool firstreport;
                bool needreport;
                int rtst;
 
-               /* Slowly back off waiting for holdouts */
+               // Slowly back off waiting for holdouts
                set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
-               schedule_timeout_idle(fract);
+               if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                       schedule_timeout_idle(fract);
+               } else {
+                       exp = jiffies_to_nsecs(fract);
+                       __set_current_state(TASK_IDLE);
+                       schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), HRTIMER_MODE_REL_HARD);
+               }
 
                if (fract < HZ)
                        fract++;
 
                rtst = READ_ONCE(rcu_task_stall_timeout);
                needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
-               if (needreport)
+               if (needreport) {
                        lastreport = jiffies;
+                       reported = true;
+               }
                firstreport = true;
                WARN_ON(signal_pending(current));
                set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
                rtp->holdouts_func(&holdouts, needreport, &firstreport);
+
+               // Print pre-stall informational messages if needed.
+               j = jiffies;
+               if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
+                       lastinfo = j;
+                       rtsi = rtsi * rcu_task_stall_info_mult;
+                       pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
+                               __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
+               }
        }
 
        set_tasks_gp_state(rtp, RTGS_POST_GP);
@@ -950,6 +1002,9 @@ static void rcu_tasks_be_rude(struct work_struct *work)
 // Wait for one rude RCU-tasks grace period.
 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
 {
+       if (num_online_cpus() <= 1)
+               return; // Fastpath for only one CPU.
+
        rtp->n_ipis += cpumask_weight(cpu_online_mask);
        schedule_on_each_cpu(rcu_tasks_be_rude);
 }
index a4b8189455d5eca9694101d779346a269597994b..c25ba442044a6e9452a43f617a93abd4a8b6ab08 100644 (file)
@@ -1679,6 +1679,8 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
        rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
        if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
                WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
+       if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+               WRITE_ONCE(rdp->last_sched_clock, jiffies);
        WRITE_ONCE(rdp->gpwrap, false);
        rcu_gpnum_ovf(rnp, rdp);
        return ret;
@@ -1705,11 +1707,37 @@ static void note_gp_changes(struct rcu_data *rdp)
                rcu_gp_kthread_wake();
 }
 
+static atomic_t *rcu_gp_slow_suppress;
+
+/* Register a counter to suppress debugging grace-period delays. */
+void rcu_gp_slow_register(atomic_t *rgssp)
+{
+       WARN_ON_ONCE(rcu_gp_slow_suppress);
+
+       WRITE_ONCE(rcu_gp_slow_suppress, rgssp);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
+
+/* Unregister a counter, with NULL for not caring which. */
+void rcu_gp_slow_unregister(atomic_t *rgssp)
+{
+       WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress);
+
+       WRITE_ONCE(rcu_gp_slow_suppress, NULL);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister);
+
+static bool rcu_gp_slow_is_suppressed(void)
+{
+       atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress);
+
+       return rgssp && atomic_read(rgssp);
+}
+
 static void rcu_gp_slow(int delay)
 {
-       if (delay > 0 &&
-           !(rcu_seq_ctr(rcu_state.gp_seq) %
-             (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
+       if (!rcu_gp_slow_is_suppressed() && delay > 0 &&
+           !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
                schedule_timeout_idle(delay);
 }
 
@@ -2096,14 +2124,29 @@ static noinline void rcu_gp_cleanup(void)
        /* Advance CBs to reduce false positives below. */
        offloaded = rcu_rdp_is_offloaded(rdp);
        if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
+
+               // We get here if a grace period was needed (“needgp”)
+               // and the above call to rcu_accelerate_cbs() did not set
+               // the RCU_GP_FLAG_INIT bit in ->gp_state (which records
+               // the need for another grace period).  The purpose
+               // of the “offloaded” check is to avoid invoking
+               // rcu_accelerate_cbs() on an offloaded CPU because we do not
+               // hold the ->nocb_lock needed to safely access an offloaded
+               // ->cblist.  We do not want to acquire that lock because
+               // it can be heavily contended during callback floods.
+
                WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
                WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
-               trace_rcu_grace_period(rcu_state.name,
-                                      rcu_state.gp_seq,
-                                      TPS("newreq"));
+               trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
        } else {
-               WRITE_ONCE(rcu_state.gp_flags,
-                          rcu_state.gp_flags & RCU_GP_FLAG_INIT);
+
+               // We get here either if there is no need for an
+               // additional grace period or if rcu_accelerate_cbs() has
+               // already set the RCU_GP_FLAG_INIT bit in ->gp_flags. 
+               // So all we need to do is to clear all of the other
+               // ->gp_flags bits.
+
+               WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT);
        }
        raw_spin_unlock_irq_rcu_node(rnp);
 
@@ -2609,6 +2652,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
  */
 void rcu_sched_clock_irq(int user)
 {
+       unsigned long j;
+
+       if (IS_ENABLED(CONFIG_PROVE_RCU)) {
+               j = jiffies;
+               WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
+               __this_cpu_write(rcu_data.last_sched_clock, j);
+       }
        trace_rcu_utilization(TPS("Start scheduler-tick"));
        lockdep_assert_irqs_disabled();
        raw_cpu_inc(rcu_data.ticks_this_gp);
@@ -2624,6 +2674,8 @@ void rcu_sched_clock_irq(int user)
        rcu_flavor_sched_clock_irq(user);
        if (rcu_pending(user))
                invoke_rcu_core();
+       if (user)
+               rcu_tasks_classic_qs(current, false);
        lockdep_assert_irqs_disabled();
 
        trace_rcu_utilization(TPS("End scheduler-tick"));
@@ -3717,7 +3769,9 @@ static int rcu_blocking_is_gp(void)
 {
        int ret;
 
-       if (IS_ENABLED(CONFIG_PREEMPTION))
+       // Invoking preempt_model_*() too early gets a splat.
+       if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE ||
+           preempt_model_full() || preempt_model_rt())
                return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
        might_sleep();  /* Check for RCU read-side critical section. */
        preempt_disable();
@@ -4179,6 +4233,7 @@ rcu_boot_init_percpu_data(int cpu)
        rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
        rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
        rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
+       rdp->last_sched_clock = jiffies;
        rdp->cpu = cpu;
        rcu_boot_init_nocb_percpu_data(rdp);
 }
@@ -4471,6 +4526,51 @@ static int rcu_pm_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
 
+#ifdef CONFIG_RCU_EXP_KTHREAD
+struct kthread_worker *rcu_exp_gp_kworker;
+struct kthread_worker *rcu_exp_par_gp_kworker;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+       const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
+       const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
+       struct sched_param param = { .sched_priority = kthread_prio };
+
+       rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
+       if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+               pr_err("Failed to create %s!\n", gp_kworker_name);
+               return;
+       }
+
+       rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
+       if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
+               pr_err("Failed to create %s!\n", par_gp_kworker_name);
+               kthread_destroy_worker(rcu_exp_gp_kworker);
+               return;
+       }
+
+       sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
+       sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
+                                  &param);
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+struct workqueue_struct *rcu_par_gp_wq;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+       rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+       WARN_ON(!rcu_par_gp_wq);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
 /*
  * Spawn the kthreads that handle RCU's grace periods.
  */
@@ -4480,6 +4580,7 @@ static int __init rcu_spawn_gp_kthread(void)
        struct rcu_node *rnp;
        struct sched_param sp;
        struct task_struct *t;
+       struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
        rcu_scheduler_fully_active = 1;
        t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
@@ -4497,9 +4598,17 @@ static int __init rcu_spawn_gp_kthread(void)
        smp_store_release(&rcu_state.gp_kthread, t);  /* ^^^ */
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        wake_up_process(t);
-       rcu_spawn_nocb_kthreads();
-       rcu_spawn_boost_kthreads();
+       /* This is a pre-SMP initcall, we expect a single CPU */
+       WARN_ON(num_online_cpus() > 1);
+       /*
+        * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()
+        * due to rcu_scheduler_fully_active.
+        */
+       rcu_spawn_cpu_nocb_kthread(smp_processor_id());
+       rcu_spawn_one_boost_kthread(rdp->mynode);
        rcu_spawn_core_kthreads();
+       /* Create kthread worker for expedited GPs */
+       rcu_start_exp_gp_kworkers();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);
@@ -4745,7 +4854,6 @@ static void __init rcu_dump_rcu_node_tree(void)
 }
 
 struct workqueue_struct *rcu_gp_wq;
-struct workqueue_struct *rcu_par_gp_wq;
 
 static void __init kfree_rcu_batch_init(void)
 {
@@ -4782,7 +4890,7 @@ static void __init kfree_rcu_batch_init(void)
 
 void __init rcu_init(void)
 {
-       int cpu;
+       int cpu = smp_processor_id();
 
        rcu_early_boot_tests();
 
@@ -4802,17 +4910,15 @@ void __init rcu_init(void)
         * or the scheduler are operational.
         */
        pm_notifier(rcu_pm_notify, 0);
-       for_each_online_cpu(cpu) {
-               rcutree_prepare_cpu(cpu);
-               rcu_cpu_starting(cpu);
-               rcutree_online_cpu(cpu);
-       }
+       WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
+       rcutree_prepare_cpu(cpu);
+       rcu_cpu_starting(cpu);
+       rcutree_online_cpu(cpu);
 
        /* Create workqueue for Tree SRCU and for expedited GPs. */
        rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
        WARN_ON(!rcu_gp_wq);
-       rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
-       WARN_ON(!rcu_par_gp_wq);
+       rcu_alloc_par_gp_wq();
 
        /* Fill in default value for rcutree.qovld boot parameter. */
        /* -After- the rcu_node ->lock fields are initialized! */
index 926673ebe355f123c1b0a76d6aaed4c211f9dcd6..2ccf5845957df4201a814de0540b8fbabc6e9412 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/cache.h>
+#include <linux/kthread.h>
 #include <linux/spinlock.h>
 #include <linux/rtmutex.h>
 #include <linux/threads.h>
 /* Communicate arguments to a workqueue handler. */
 struct rcu_exp_work {
        unsigned long rew_s;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+       struct kthread_work rew_work;
+#else
        struct work_struct rew_work;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
 };
 
 /* RCU's kthread states for tracing. */
@@ -254,6 +259,7 @@ struct rcu_data {
        unsigned long rcu_onl_gp_seq;   /* ->gp_seq at last online. */
        short rcu_onl_gp_flags;         /* ->gp_flags at last online. */
        unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
+       unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
 
        int cpu;
 };
@@ -364,6 +370,7 @@ struct rcu_state {
        arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
                                                /* Synchronize offline with */
                                                /*  GP pre-initialization. */
+       int nocb_is_setup;                      /* nocb is setup from boot */
 };
 
 /* Values for rcu_state structure's gp_flags field. */
@@ -421,7 +428,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
 static bool rcu_is_callbacks_kthread(void);
 static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
-static void __init rcu_spawn_boost_kthreads(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
 static void rcu_preempt_deferred_qs(struct task_struct *t);
@@ -439,7 +445,6 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
 static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_cpu_nocb_kthread(int cpu);
-static void __init rcu_spawn_nocb_kthreads(void);
 static void show_rcu_nocb_state(struct rcu_data *rdp);
 static void rcu_nocb_lock(struct rcu_data *rdp);
 static void rcu_nocb_unlock(struct rcu_data *rdp);
index 60197ea24ceb9c73a17382b3c031be7fff582557..0f70f62039a909067eb7cc9a7884bb09a6e3e612 100644 (file)
@@ -334,15 +334,13 @@ fastpath:
  * Select the CPUs within the specified rcu_node that the upcoming
  * expedited grace period needs to wait for.
  */
-static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
 {
        int cpu;
        unsigned long flags;
        unsigned long mask_ofl_test;
        unsigned long mask_ofl_ipi;
        int ret;
-       struct rcu_exp_work *rewp =
-               container_of(wp, struct rcu_exp_work, rew_work);
        struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
 
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -417,13 +415,119 @@ retry_ipi:
                rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
 }
 
+static void rcu_exp_sel_wait_wake(unsigned long s);
+
+#ifdef CONFIG_RCU_EXP_KTHREAD
+static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
+{
+       struct rcu_exp_work *rewp =
+               container_of(wp, struct rcu_exp_work, rew_work);
+
+       __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+       return !!READ_ONCE(rcu_exp_par_gp_kworker);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+       kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+       /*
+        * Use rcu_exp_par_gp_kworker, because flushing a work item from
+        * another work item on the same kthread worker can result in
+        * deadlock.
+        */
+       kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+       kthread_flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct kthread_work *wp)
+{
+       struct rcu_exp_work *rewp;
+
+       rewp = container_of(wp, struct rcu_exp_work, rew_work);
+       rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+       kthread_init_work(&rew->rew_work, wait_rcu_exp_gp);
+       kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+{
+       struct rcu_exp_work *rewp =
+               container_of(wp, struct rcu_exp_work, rew_work);
+
+       __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+       return !!READ_ONCE(rcu_par_gp_wq);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+       int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
+
+       INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+       /* If all offline, queue the work on an unbound CPU. */
+       if (unlikely(cpu > rnp->grphi - rnp->grplo))
+               cpu = WORK_CPU_UNBOUND;
+       else
+               cpu += rnp->grplo;
+       queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+       flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+       struct rcu_exp_work *rewp;
+
+       rewp = container_of(wp, struct rcu_exp_work, rew_work);
+       rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+       INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
+       queue_work(rcu_gp_wq, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+       destroy_work_on_stack(&rew->rew_work);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
 /*
  * Select the nodes that the upcoming expedited grace period needs
  * to wait for.
  */
 static void sync_rcu_exp_select_cpus(void)
 {
-       int cpu;
        struct rcu_node *rnp;
 
        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
@@ -435,28 +539,21 @@ static void sync_rcu_exp_select_cpus(void)
                rnp->exp_need_flush = false;
                if (!READ_ONCE(rnp->expmask))
                        continue; /* Avoid early boot non-existent wq. */
-               if (!READ_ONCE(rcu_par_gp_wq) ||
+               if (!rcu_gp_par_worker_started() ||
                    rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
                    rcu_is_last_leaf_node(rnp)) {
-                       /* No workqueues yet or last leaf, do direct call. */
+                       /* No worker started yet or last leaf, do direct call. */
                        sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
                        continue;
                }
-               INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
-               cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
-               /* If all offline, queue the work on an unbound CPU. */
-               if (unlikely(cpu > rnp->grphi - rnp->grplo))
-                       cpu = WORK_CPU_UNBOUND;
-               else
-                       cpu += rnp->grplo;
-               queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+               sync_rcu_exp_select_cpus_queue_work(rnp);
                rnp->exp_need_flush = true;
        }
 
-       /* Wait for workqueue jobs (if any) to complete. */
+       /* Wait for jobs (if any) to complete. */
        rcu_for_each_leaf_node(rnp)
                if (rnp->exp_need_flush)
-                       flush_work(&rnp->rew.rew_work);
+                       sync_rcu_exp_select_cpus_flush_work(rnp);
 }
 
 /*
@@ -496,7 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
        struct rcu_node *rnp_root = rcu_get_root();
 
        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
-       jiffies_stall = rcu_jiffies_till_stall_check();
+       jiffies_stall = rcu_exp_jiffies_till_stall_check();
        jiffies_start = jiffies;
        if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
                if (synchronize_rcu_expedited_wait_once(1))
@@ -571,7 +668,7 @@ static void synchronize_rcu_expedited_wait(void)
                                dump_cpu_task(cpu);
                        }
                }
-               jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+               jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
        }
 }
 
@@ -622,17 +719,6 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
        rcu_exp_wait_wake(s);
 }
 
-/*
- * Work-queue handler to drive an expedited grace period forward.
- */
-static void wait_rcu_exp_gp(struct work_struct *wp)
-{
-       struct rcu_exp_work *rewp;
-
-       rewp = container_of(wp, struct rcu_exp_work, rew_work);
-       rcu_exp_sel_wait_wake(rewp->rew_s);
-}
-
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -848,20 +934,19 @@ void synchronize_rcu_expedited(void)
        } else {
                /* Marshall arguments & schedule the expedited grace period. */
                rew.rew_s = s;
-               INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
-               queue_work(rcu_gp_wq, &rew.rew_work);
+               synchronize_rcu_expedited_queue_work(&rew);
        }
 
        /* Wait for expedited grace period to complete. */
        rnp = rcu_get_root();
        wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
                   sync_exp_work_done(s));
-       smp_mb(); /* Workqueue actions happen before return. */
+       smp_mb(); /* Work actions happen before return. */
 
        /* Let the next expedited grace period start. */
        mutex_unlock(&rcu_state.exp_mutex);
 
        if (likely(!boottime))
-               destroy_work_on_stack(&rew.rew_work);
+               synchronize_rcu_expedited_destroy_work(&rew);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
index 636d0546a4e932e57096e225ddd9698d9cd9eab4..46694e13398a3ee44746723c44f49e40312906eb 100644 (file)
@@ -60,9 +60,6 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
  * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
  * If the list is invalid, a warning is emitted and all CPUs are offloaded.
  */
-
-static bool rcu_nocb_is_setup;
-
 static int __init rcu_nocb_setup(char *str)
 {
        alloc_bootmem_cpumask_var(&rcu_nocb_mask);
@@ -72,7 +69,7 @@ static int __init rcu_nocb_setup(char *str)
                        cpumask_setall(rcu_nocb_mask);
                }
        }
-       rcu_nocb_is_setup = true;
+       rcu_state.nocb_is_setup = true;
        return 1;
 }
 __setup("rcu_nocbs", rcu_nocb_setup);
@@ -215,14 +212,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
        init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
-/* Is the specified CPU a no-CBs CPU? */
-bool rcu_is_nocb_cpu(int cpu)
-{
-       if (cpumask_available(rcu_nocb_mask))
-               return cpumask_test_cpu(cpu, rcu_nocb_mask);
-       return false;
-}
-
 static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
                           struct rcu_data *rdp,
                           bool force, unsigned long flags)
@@ -1180,10 +1169,10 @@ void __init rcu_init_nohz(void)
                                return;
                        }
                }
-               rcu_nocb_is_setup = true;
+               rcu_state.nocb_is_setup = true;
        }
 
-       if (!rcu_nocb_is_setup)
+       if (!rcu_state.nocb_is_setup)
                return;
 
 #if defined(CONFIG_NO_HZ_FULL)
@@ -1241,7 +1230,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
        struct task_struct *t;
        struct sched_param sp;
 
-       if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
+       if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup)
                return;
 
        /* If there already is an rcuo kthread, then nothing to do. */
@@ -1277,22 +1266,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
        WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
 }
 
-/*
- * Once the scheduler is running, spawn rcuo kthreads for all online
- * no-CBs CPUs.  This assumes that the early_initcall()s happen before
- * non-boot CPUs come online -- if this changes, we will need to add
- * some mutual exclusion.
- */
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-       int cpu;
-
-       if (rcu_nocb_is_setup) {
-               for_each_online_cpu(cpu)
-                       rcu_spawn_cpu_nocb_kthread(cpu);
-       }
-}
-
 /* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */
 static int rcu_nocb_gp_stride = -1;
 module_param(rcu_nocb_gp_stride, int, 0444);
@@ -1549,10 +1522,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 {
 }
 
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-}
-
 static void show_rcu_nocb_state(struct rcu_data *rdp)
 {
 }
index 8360d86db1c028b39ecb0fe40cf6462aefd12f41..c8ba0fe17267c1c0b68e438616c08a00877ea409 100644 (file)
@@ -486,6 +486,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
        t->rcu_read_unlock_special.s = 0;
        if (special.b.need_qs) {
                if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+                       rdp->cpu_no_qs.b.norm = false;
                        rcu_report_qs_rdp(rdp);
                        udelay(rcu_unlock_delay);
                } else {
@@ -660,7 +661,13 @@ static void rcu_read_unlock_special(struct task_struct *t)
                            expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
                                // Get scheduler to re-evaluate and call hooks.
                                // If !IRQ_WORK, FQS scan will eventually IPI.
-                               init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler);
+                               if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
+                                   IS_ENABLED(CONFIG_PREEMPT_RT))
+                                       rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(
+                                                               rcu_preempt_deferred_qs_handler);
+                               else
+                                       init_irq_work(&rdp->defer_qs_iw,
+                                                     rcu_preempt_deferred_qs_handler);
                                rdp->defer_qs_iw_pending = true;
                                irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
                        }
@@ -1124,7 +1131,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
        __releases(rnp->lock)
 {
        raw_lockdep_assert_held_rcu_node(rnp);
-       if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
+       if (!rnp->boost_kthread_task ||
+           (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
                return;
        }
@@ -1226,18 +1234,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
        free_cpumask_var(cm);
 }
 
-/*
- * Spawn boost kthreads -- called as soon as the scheduler is running.
- */
-static void __init rcu_spawn_boost_kthreads(void)
-{
-       struct rcu_node *rnp;
-
-       rcu_for_each_leaf_node(rnp)
-               if (rcu_rnp_online_cpus(rnp))
-                       rcu_spawn_one_boost_kthread(rnp);
-}
-
 #else /* #ifdef CONFIG_RCU_BOOST */
 
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1263,10 +1259,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 }
 
-static void __init rcu_spawn_boost_kthreads(void)
-{
-}
-
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 /*
index 0c5d8516516af5780c266c87b4705b84739eeb16..a001e1e7a99269c9968059a00cff25ea496dbc99 100644 (file)
@@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
 #define RCU_STALL_MIGHT_DIV            8
 #define RCU_STALL_MIGHT_MIN            (2 * HZ)
 
+int rcu_exp_jiffies_till_stall_check(void)
+{
+       int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
+       int exp_stall_delay_delta = 0;
+       int till_stall_check;
+
+       // Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
+       if (!cpu_stall_timeout)
+               cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
+
+       // Limit check must be consistent with the Kconfig limits for
+       // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
+       // The minimum clamped value is "2UL", because at least one full
+       // tick has to be guaranteed.
+       till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
+
+       if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
+               WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
+
+#ifdef CONFIG_PROVE_RCU
+       /* Add extra ~25% out of till_stall_check. */
+       exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
+#endif
+
+       return till_stall_check + exp_stall_delay_delta;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
+
 /* Limit-check stall timeouts specified at boottime and runtime. */
 int rcu_jiffies_till_stall_check(void)
 {
@@ -565,9 +593,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 
        for_each_possible_cpu(cpu)
                totqlen += rcu_get_n_cbs_cpu(cpu);
-       pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+       pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
               smp_processor_id(), (long)(jiffies - gps),
-              (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+              (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
        if (ndetected) {
                rcu_dump_cpu_stacks();
 
@@ -626,9 +654,9 @@ static void print_cpu_stall(unsigned long gps)
        raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
        for_each_possible_cpu(cpu)
                totqlen += rcu_get_n_cbs_cpu(cpu);
-       pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
+       pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
                jiffies - gps,
-               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
 
        rcu_check_gp_kthread_expired_fqs_timer();
        rcu_check_gp_kthread_starvation();
index 180ff9c41fa87e228e5df9f435b7d4f0a1dfb3e6..fc7fef57560646d5a8e64e757434e4d4ad988840 100644 (file)
@@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
 module_param(rcu_cpu_stall_suppress, int, 0644);
 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_timeout, int, 0644);
+int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
+module_param(rcu_exp_cpu_stall_timeout, int, 0644);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
index dcb0410950e45cd6eb0d436c185f1dd281631614..5d113aa59e7732ecb285ac746a5eee4fcb111932 100644 (file)
@@ -267,9 +267,10 @@ static void scf_handler(void *scfc_in)
        }
        this_cpu_inc(scf_invoked_count);
        if (longwait <= 0) {
-               if (!(r & 0xffc0))
+               if (!(r & 0xffc0)) {
                        udelay(r & 0x3f);
-               goto out;
+                       goto out;
+               }
        }
        if (r & 0xfff)
                goto out;
index d575b491492593d98a098e2e5486ddafed6cd86c..2a05096559a263e3559fd60e5f4cbc02181f1a34 100644 (file)
@@ -5752,6 +5752,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
 
 extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
 
+static void queue_core_balance(struct rq *rq);
+
 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -5801,7 +5803,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                }
 
                rq->core_pick = NULL;
-               return next;
+               goto out;
        }
 
        put_prev_task_balance(rq, prev, rf);
@@ -5851,7 +5853,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                         */
                        WARN_ON_ONCE(fi_before);
                        task_vruntime_update(rq, next, false);
-                       goto done;
+                       goto out_set_next;
                }
        }
 
@@ -5970,8 +5972,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                resched_curr(rq_i);
        }
 
-done:
+out_set_next:
        set_next_task(rq, next);
+out:
+       if (rq->core->core_forceidle_count && next == rq->idle)
+               queue_core_balance(rq);
+
        return next;
 }
 
@@ -6000,7 +6006,7 @@ static bool try_steal_cookie(int this, int that)
                if (p == src->core_pick || p == src->curr)
                        goto next;
 
-               if (!cpumask_test_cpu(this, &p->cpus_mask))
+               if (!is_cpu_allowed(p, this))
                        goto next;
 
                if (p->core_occupation > dst->idle->core_occupation)
@@ -6066,7 +6072,7 @@ static void sched_core_balance(struct rq *rq)
 
 static DEFINE_PER_CPU(struct callback_head, core_balance_head);
 
-void queue_core_balance(struct rq *rq)
+static void queue_core_balance(struct rq *rq)
 {
        if (!sched_core_enabled(rq))
                return;
@@ -6376,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
                migrate_disable_switch(rq, prev);
                psi_sched_switch(prev, next, !task_on_rq_queued(prev));
 
-               trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
+               trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
 
                /* Also unlocks the rq: */
                rq = context_switch(rq, prev, next, &rf);
@@ -8409,6 +8415,18 @@ static void __init preempt_dynamic_init(void)
        }
 }
 
+#define PREEMPT_MODEL_ACCESSOR(mode) \
+       bool preempt_model_##mode(void)                                          \
+       {                                                                        \
+               WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \
+               return preempt_dynamic_mode == preempt_dynamic_##mode;           \
+       }                                                                        \
+       EXPORT_SYMBOL_GPL(preempt_model_##mode)
+
+PREEMPT_MODEL_ACCESSOR(none);
+PREEMPT_MODEL_ACCESSOR(voluntary);
+PREEMPT_MODEL_ACCESSOR(full);
+
 #else /* !CONFIG_PREEMPT_DYNAMIC */
 
 static inline void preempt_dynamic_init(void) { }
index d4bd299d67abfe84e56cf99693adc9a12a0ae27d..a68482d66535588d0b8dd9b0738482fda17f2fb6 100644 (file)
@@ -3829,11 +3829,11 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
        se->avg.runnable_sum = se->avg.runnable_avg * divider;
 
-       se->avg.load_sum = divider;
-       if (se_weight(se)) {
-               se->avg.load_sum =
-                       div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
-       }
+       se->avg.load_sum = se->avg.load_avg * divider;
+       if (se_weight(se) < se->avg.load_sum)
+               se->avg.load_sum = div_u64(se->avg.load_sum, se_weight(se));
+       else
+               se->avg.load_sum = 1;
 
        enqueue_load_avg(cfs_rq, se);
        cfs_rq->avg.util_avg += se->avg.util_avg;
index 8f8b5020e76af237e4f15ee5db780c050dd96744..ecb0d705287753f080d347bcd3ae1961a5292f61 100644 (file)
@@ -434,7 +434,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
 {
        update_idle_core(rq);
        schedstat_inc(rq->sched_goidle);
-       queue_core_balance(rq);
 }
 
 #ifdef CONFIG_SMP
index 58263f90c5598b7c658b007285dbce60726bc78a..8dccb34eb1908b07379284d9708c37274cca2cec 100644 (file)
@@ -1232,8 +1232,6 @@ static inline bool sched_group_cookie_match(struct rq *rq,
        return false;
 }
 
-extern void queue_core_balance(struct rq *rq);
-
 static inline bool sched_core_enqueued(struct task_struct *p)
 {
        return !RB_EMPTY_NODE(&p->core_node);
@@ -1267,10 +1265,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
        return &rq->__lock;
 }
 
-static inline void queue_core_balance(struct rq *rq)
-{
-}
-
 static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
 {
        return true;
index 01a7c1706a58b1df8724f8afc49b4a3a46919bcd..df9393aeae28701fd6eec775dad70f64da1f042e 100644 (file)
@@ -183,7 +183,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
 static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
 
-#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
+module_param(csd_lock_timeout, ulong, 0444);
+
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 static u64 cfd_seq;
 
@@ -329,6 +331,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
        u64 ts2, ts_delta;
        call_single_data_t *cpu_cur_csd;
        unsigned int flags = READ_ONCE(csd->node.u_flags);
+       unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
 
        if (!(flags & CSD_FLAG_LOCK)) {
                if (!unlikely(*bug_id))
@@ -341,7 +344,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
 
        ts2 = sched_clock();
        ts_delta = ts2 - *ts1;
-       if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+       if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
                return false;
 
        firsttime = !*bug_id;
@@ -579,7 +582,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 
        /* There shouldn't be any pending callbacks on an offline CPU. */
        if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
-                    !warned && !llist_empty(head))) {
+                    !warned && entry != NULL)) {
                warned = true;
                WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
 
index f6bc0bc8a2aab322c0f6e89f1d53a4302878e438..b9f54544e7499bb068a809436dbc1e1913f1a3ce 100644 (file)
@@ -392,6 +392,13 @@ int cpu_check_up_prepare(int cpu)
                 */
                return -EAGAIN;
 
+       case CPU_UP_PREPARE:
+               /*
+                * Timeout while waiting for the CPU to show up. Allow to try
+                * again later.
+                */
+               return 0;
+
        default:
 
                /* Should not happen.  Famous last words. */
index f2b8baea35d2ef4b75342083dcb44254c738f454..e9c3e69f383792464c01b2aec9e63967829537f9 100644 (file)
@@ -1,549 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
 #include <linux/static_call.h>
-#include <linux/bug.h>
-#include <linux/smp.h>
-#include <linux/sort.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/processor.h>
-#include <asm/sections.h>
-
-extern struct static_call_site __start_static_call_sites[],
-                              __stop_static_call_sites[];
-extern struct static_call_tramp_key __start_static_call_tramp_key[],
-                                   __stop_static_call_tramp_key[];
-
-static bool static_call_initialized;
-
-/* mutex to protect key modules/sites */
-static DEFINE_MUTEX(static_call_mutex);
-
-static void static_call_lock(void)
-{
-       mutex_lock(&static_call_mutex);
-}
-
-static void static_call_unlock(void)
-{
-       mutex_unlock(&static_call_mutex);
-}
-
-static inline void *static_call_addr(struct static_call_site *site)
-{
-       return (void *)((long)site->addr + (long)&site->addr);
-}
-
-static inline unsigned long __static_call_key(const struct static_call_site *site)
-{
-       return (long)site->key + (long)&site->key;
-}
-
-static inline struct static_call_key *static_call_key(const struct static_call_site *site)
-{
-       return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
-}
-
-/* These assume the key is word-aligned. */
-static inline bool static_call_is_init(struct static_call_site *site)
-{
-       return __static_call_key(site) & STATIC_CALL_SITE_INIT;
-}
-
-static inline bool static_call_is_tail(struct static_call_site *site)
-{
-       return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
-}
-
-static inline void static_call_set_init(struct static_call_site *site)
-{
-       site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
-                   (long)&site->key;
-}
-
-static int static_call_site_cmp(const void *_a, const void *_b)
-{
-       const struct static_call_site *a = _a;
-       const struct static_call_site *b = _b;
-       const struct static_call_key *key_a = static_call_key(a);
-       const struct static_call_key *key_b = static_call_key(b);
-
-       if (key_a < key_b)
-               return -1;
-
-       if (key_a > key_b)
-               return 1;
-
-       return 0;
-}
-
-static void static_call_site_swap(void *_a, void *_b, int size)
-{
-       long delta = (unsigned long)_a - (unsigned long)_b;
-       struct static_call_site *a = _a;
-       struct static_call_site *b = _b;
-       struct static_call_site tmp = *a;
-
-       a->addr = b->addr  - delta;
-       a->key  = b->key   - delta;
-
-       b->addr = tmp.addr + delta;
-       b->key  = tmp.key  + delta;
-}
-
-static inline void static_call_sort_entries(struct static_call_site *start,
-                                           struct static_call_site *stop)
-{
-       sort(start, stop - start, sizeof(struct static_call_site),
-            static_call_site_cmp, static_call_site_swap);
-}
-
-static inline bool static_call_key_has_mods(struct static_call_key *key)
-{
-       return !(key->type & 1);
-}
-
-static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
-{
-       if (!static_call_key_has_mods(key))
-               return NULL;
-
-       return key->mods;
-}
-
-static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
-{
-       if (static_call_key_has_mods(key))
-               return NULL;
-
-       return (struct static_call_site *)(key->type & ~1);
-}
-
-void __static_call_update(struct static_call_key *key, void *tramp, void *func)
-{
-       struct static_call_site *site, *stop;
-       struct static_call_mod *site_mod, first;
-
-       cpus_read_lock();
-       static_call_lock();
-
-       if (key->func == func)
-               goto done;
-
-       key->func = func;
-
-       arch_static_call_transform(NULL, tramp, func, false);
-
-       /*
-        * If uninitialized, we'll not update the callsites, but they still
-        * point to the trampoline and we just patched that.
-        */
-       if (WARN_ON_ONCE(!static_call_initialized))
-               goto done;
-
-       first = (struct static_call_mod){
-               .next = static_call_key_next(key),
-               .mod = NULL,
-               .sites = static_call_key_sites(key),
-       };
-
-       for (site_mod = &first; site_mod; site_mod = site_mod->next) {
-               bool init = system_state < SYSTEM_RUNNING;
-               struct module *mod = site_mod->mod;
-
-               if (!site_mod->sites) {
-                       /*
-                        * This can happen if the static call key is defined in
-                        * a module which doesn't use it.
-                        *
-                        * It also happens in the has_mods case, where the
-                        * 'first' entry has no sites associated with it.
-                        */
-                       continue;
-               }
-
-               stop = __stop_static_call_sites;
-
-               if (mod) {
-#ifdef CONFIG_MODULES
-                       stop = mod->static_call_sites +
-                              mod->num_static_call_sites;
-                       init = mod->state == MODULE_STATE_COMING;
-#endif
-               }
-
-               for (site = site_mod->sites;
-                    site < stop && static_call_key(site) == key; site++) {
-                       void *site_addr = static_call_addr(site);
-
-                       if (!init && static_call_is_init(site))
-                               continue;
-
-                       if (!kernel_text_address((unsigned long)site_addr)) {
-                               /*
-                                * This skips patching built-in __exit, which
-                                * is part of init_section_contains() but is
-                                * not part of kernel_text_address().
-                                *
-                                * Skipping built-in __exit is fine since it
-                                * will never be executed.
-                                */
-                               WARN_ONCE(!static_call_is_init(site),
-                                         "can't patch static call site at %pS",
-                                         site_addr);
-                               continue;
-                       }
-
-                       arch_static_call_transform(site_addr, NULL, func,
-                                                  static_call_is_tail(site));
-               }
-       }
-
-done:
-       static_call_unlock();
-       cpus_read_unlock();
-}
-EXPORT_SYMBOL_GPL(__static_call_update);
-
-static int __static_call_init(struct module *mod,
-                             struct static_call_site *start,
-                             struct static_call_site *stop)
-{
-       struct static_call_site *site;
-       struct static_call_key *key, *prev_key = NULL;
-       struct static_call_mod *site_mod;
-
-       if (start == stop)
-               return 0;
-
-       static_call_sort_entries(start, stop);
-
-       for (site = start; site < stop; site++) {
-               void *site_addr = static_call_addr(site);
-
-               if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
-                   (!mod && init_section_contains(site_addr, 1)))
-                       static_call_set_init(site);
-
-               key = static_call_key(site);
-               if (key != prev_key) {
-                       prev_key = key;
-
-                       /*
-                        * For vmlinux (!mod) avoid the allocation by storing
-                        * the sites pointer in the key itself. Also see
-                        * __static_call_update()'s @first.
-                        *
-                        * This allows architectures (eg. x86) to call
-                        * static_call_init() before memory allocation works.
-                        */
-                       if (!mod) {
-                               key->sites = site;
-                               key->type |= 1;
-                               goto do_transform;
-                       }
-
-                       site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
-                       if (!site_mod)
-                               return -ENOMEM;
-
-                       /*
-                        * When the key has a direct sites pointer, extract
-                        * that into an explicit struct static_call_mod, so we
-                        * can have a list of modules.
-                        */
-                       if (static_call_key_sites(key)) {
-                               site_mod->mod = NULL;
-                               site_mod->next = NULL;
-                               site_mod->sites = static_call_key_sites(key);
-
-                               key->mods = site_mod;
-
-                               site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
-                               if (!site_mod)
-                                       return -ENOMEM;
-                       }
-
-                       site_mod->mod = mod;
-                       site_mod->sites = site;
-                       site_mod->next = static_call_key_next(key);
-                       key->mods = site_mod;
-               }
-
-do_transform:
-               arch_static_call_transform(site_addr, NULL, key->func,
-                               static_call_is_tail(site));
-       }
-
-       return 0;
-}
-
-static int addr_conflict(struct static_call_site *site, void *start, void *end)
-{
-       unsigned long addr = (unsigned long)static_call_addr(site);
-
-       if (addr <= (unsigned long)end &&
-           addr + CALL_INSN_SIZE > (unsigned long)start)
-               return 1;
-
-       return 0;
-}
-
-static int __static_call_text_reserved(struct static_call_site *iter_start,
-                                      struct static_call_site *iter_stop,
-                                      void *start, void *end, bool init)
-{
-       struct static_call_site *iter = iter_start;
-
-       while (iter < iter_stop) {
-               if (init || !static_call_is_init(iter)) {
-                       if (addr_conflict(iter, start, end))
-                               return 1;
-               }
-               iter++;
-       }
-
-       return 0;
-}
-
-#ifdef CONFIG_MODULES
-
-static int __static_call_mod_text_reserved(void *start, void *end)
-{
-       struct module *mod;
-       int ret;
-
-       preempt_disable();
-       mod = __module_text_address((unsigned long)start);
-       WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
-       if (!try_module_get(mod))
-               mod = NULL;
-       preempt_enable();
-
-       if (!mod)
-               return 0;
-
-       ret = __static_call_text_reserved(mod->static_call_sites,
-                       mod->static_call_sites + mod->num_static_call_sites,
-                       start, end, mod->state == MODULE_STATE_COMING);
-
-       module_put(mod);
-
-       return ret;
-}
-
-static unsigned long tramp_key_lookup(unsigned long addr)
-{
-       struct static_call_tramp_key *start = __start_static_call_tramp_key;
-       struct static_call_tramp_key *stop = __stop_static_call_tramp_key;
-       struct static_call_tramp_key *tramp_key;
-
-       for (tramp_key = start; tramp_key != stop; tramp_key++) {
-               unsigned long tramp;
-
-               tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp;
-               if (tramp == addr)
-                       return (long)tramp_key->key + (long)&tramp_key->key;
-       }
-
-       return 0;
-}
-
-static int static_call_add_module(struct module *mod)
-{
-       struct static_call_site *start = mod->static_call_sites;
-       struct static_call_site *stop = start + mod->num_static_call_sites;
-       struct static_call_site *site;
-
-       for (site = start; site != stop; site++) {
-               unsigned long s_key = __static_call_key(site);
-               unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
-               unsigned long key;
-
-               /*
-                * Is the key is exported, 'addr' points to the key, which
-                * means modules are allowed to call static_call_update() on
-                * it.
-                *
-                * Otherwise, the key isn't exported, and 'addr' points to the
-                * trampoline so we need to lookup the key.
-                *
-                * We go through this dance to prevent crazy modules from
-                * abusing sensitive static calls.
-                */
-               if (!kernel_text_address(addr))
-                       continue;
-
-               key = tramp_key_lookup(addr);
-               if (!key) {
-                       pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n",
-                               static_call_addr(site));
-                       return -EINVAL;
-               }
-
-               key |= s_key & STATIC_CALL_SITE_FLAGS;
-               site->key = key - (long)&site->key;
-       }
-
-       return __static_call_init(mod, start, stop);
-}
-
-static void static_call_del_module(struct module *mod)
-{
-       struct static_call_site *start = mod->static_call_sites;
-       struct static_call_site *stop = mod->static_call_sites +
-                                       mod->num_static_call_sites;
-       struct static_call_key *key, *prev_key = NULL;
-       struct static_call_mod *site_mod, **prev;
-       struct static_call_site *site;
-
-       for (site = start; site < stop; site++) {
-               key = static_call_key(site);
-               if (key == prev_key)
-                       continue;
-
-               prev_key = key;
-
-               for (prev = &key->mods, site_mod = key->mods;
-                    site_mod && site_mod->mod != mod;
-                    prev = &site_mod->next, site_mod = site_mod->next)
-                       ;
-
-               if (!site_mod)
-                       continue;
-
-               *prev = site_mod->next;
-               kfree(site_mod);
-       }
-}
-
-static int static_call_module_notify(struct notifier_block *nb,
-                                    unsigned long val, void *data)
-{
-       struct module *mod = data;
-       int ret = 0;
-
-       cpus_read_lock();
-       static_call_lock();
-
-       switch (val) {
-       case MODULE_STATE_COMING:
-               ret = static_call_add_module(mod);
-               if (ret) {
-                       WARN(1, "Failed to allocate memory for static calls");
-                       static_call_del_module(mod);
-               }
-               break;
-       case MODULE_STATE_GOING:
-               static_call_del_module(mod);
-               break;
-       }
-
-       static_call_unlock();
-       cpus_read_unlock();
-
-       return notifier_from_errno(ret);
-}
-
-static struct notifier_block static_call_module_nb = {
-       .notifier_call = static_call_module_notify,
-};
-
-#else
-
-static inline int __static_call_mod_text_reserved(void *start, void *end)
-{
-       return 0;
-}
-
-#endif /* CONFIG_MODULES */
-
-int static_call_text_reserved(void *start, void *end)
-{
-       bool init = system_state < SYSTEM_RUNNING;
-       int ret = __static_call_text_reserved(__start_static_call_sites,
-                       __stop_static_call_sites, start, end, init);
-
-       if (ret)
-               return ret;
-
-       return __static_call_mod_text_reserved(start, end);
-}
-
-int __init static_call_init(void)
-{
-       int ret;
-
-       if (static_call_initialized)
-               return 0;
-
-       cpus_read_lock();
-       static_call_lock();
-       ret = __static_call_init(NULL, __start_static_call_sites,
-                                __stop_static_call_sites);
-       static_call_unlock();
-       cpus_read_unlock();
-
-       if (ret) {
-               pr_err("Failed to allocate memory for static_call!\n");
-               BUG();
-       }
-
-       static_call_initialized = true;
-
-#ifdef CONFIG_MODULES
-       register_module_notifier(&static_call_module_nb);
-#endif
-       return 0;
-}
-early_initcall(static_call_init);
 
 long __static_call_return0(void)
 {
        return 0;
 }
 EXPORT_SYMBOL_GPL(__static_call_return0);
-
-#ifdef CONFIG_STATIC_CALL_SELFTEST
-
-static int func_a(int x)
-{
-       return x+1;
-}
-
-static int func_b(int x)
-{
-       return x+2;
-}
-
-DEFINE_STATIC_CALL(sc_selftest, func_a);
-
-static struct static_call_data {
-      int (*func)(int);
-      int val;
-      int expect;
-} static_call_data [] __initdata = {
-      { NULL,   2, 3 },
-      { func_b, 2, 4 },
-      { func_a, 2, 3 }
-};
-
-static int __init test_static_call_init(void)
-{
-      int i;
-
-      for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
-             struct static_call_data *scd = &static_call_data[i];
-
-              if (scd->func)
-                      static_call_update(sc_selftest, scd->func);
-
-              WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
-      }
-
-      return 0;
-}
-early_initcall(test_static_call_init);
-
-#endif /* CONFIG_STATIC_CALL_SELFTEST */
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
new file mode 100644 (file)
index 0000000..dc5665b
--- /dev/null
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/static_call.h>
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/processor.h>
+#include <asm/sections.h>
+
+extern struct static_call_site __start_static_call_sites[],
+                              __stop_static_call_sites[];
+extern struct static_call_tramp_key __start_static_call_tramp_key[],
+                                   __stop_static_call_tramp_key[];
+
+static bool static_call_initialized;
+
+/* mutex to protect key modules/sites */
+static DEFINE_MUTEX(static_call_mutex);
+
+static void static_call_lock(void)
+{
+       mutex_lock(&static_call_mutex);
+}
+
+static void static_call_unlock(void)
+{
+       mutex_unlock(&static_call_mutex);
+}
+
+static inline void *static_call_addr(struct static_call_site *site)
+{
+       return (void *)((long)site->addr + (long)&site->addr);
+}
+
+static inline unsigned long __static_call_key(const struct static_call_site *site)
+{
+       return (long)site->key + (long)&site->key;
+}
+
+static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+{
+       return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
+}
+
+/* These assume the key is word-aligned. */
+static inline bool static_call_is_init(struct static_call_site *site)
+{
+       return __static_call_key(site) & STATIC_CALL_SITE_INIT;
+}
+
+static inline bool static_call_is_tail(struct static_call_site *site)
+{
+       return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
+}
+
+static inline void static_call_set_init(struct static_call_site *site)
+{
+       site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
+                   (long)&site->key;
+}
+
+static int static_call_site_cmp(const void *_a, const void *_b)
+{
+       const struct static_call_site *a = _a;
+       const struct static_call_site *b = _b;
+       const struct static_call_key *key_a = static_call_key(a);
+       const struct static_call_key *key_b = static_call_key(b);
+
+       if (key_a < key_b)
+               return -1;
+
+       if (key_a > key_b)
+               return 1;
+
+       return 0;
+}
+
+static void static_call_site_swap(void *_a, void *_b, int size)
+{
+       long delta = (unsigned long)_a - (unsigned long)_b;
+       struct static_call_site *a = _a;
+       struct static_call_site *b = _b;
+       struct static_call_site tmp = *a;
+
+       a->addr = b->addr  - delta;
+       a->key  = b->key   - delta;
+
+       b->addr = tmp.addr + delta;
+       b->key  = tmp.key  + delta;
+}
+
+static inline void static_call_sort_entries(struct static_call_site *start,
+                                           struct static_call_site *stop)
+{
+       sort(start, stop - start, sizeof(struct static_call_site),
+            static_call_site_cmp, static_call_site_swap);
+}
+
+static inline bool static_call_key_has_mods(struct static_call_key *key)
+{
+       return !(key->type & 1);
+}
+
+static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
+{
+       if (!static_call_key_has_mods(key))
+               return NULL;
+
+       return key->mods;
+}
+
+static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
+{
+       if (static_call_key_has_mods(key))
+               return NULL;
+
+       return (struct static_call_site *)(key->type & ~1);
+}
+
+void __static_call_update(struct static_call_key *key, void *tramp, void *func)
+{
+       struct static_call_site *site, *stop;
+       struct static_call_mod *site_mod, first;
+
+       cpus_read_lock();
+       static_call_lock();
+
+       if (key->func == func)
+               goto done;
+
+       key->func = func;
+
+       arch_static_call_transform(NULL, tramp, func, false);
+
+       /*
+        * If uninitialized, we'll not update the callsites, but they still
+        * point to the trampoline and we just patched that.
+        */
+       if (WARN_ON_ONCE(!static_call_initialized))
+               goto done;
+
+       first = (struct static_call_mod){
+               .next = static_call_key_next(key),
+               .mod = NULL,
+               .sites = static_call_key_sites(key),
+       };
+
+       for (site_mod = &first; site_mod; site_mod = site_mod->next) {
+               bool init = system_state < SYSTEM_RUNNING;
+               struct module *mod = site_mod->mod;
+
+               if (!site_mod->sites) {
+                       /*
+                        * This can happen if the static call key is defined in
+                        * a module which doesn't use it.
+                        *
+                        * It also happens in the has_mods case, where the
+                        * 'first' entry has no sites associated with it.
+                        */
+                       continue;
+               }
+
+               stop = __stop_static_call_sites;
+
+               if (mod) {
+#ifdef CONFIG_MODULES
+                       stop = mod->static_call_sites +
+                              mod->num_static_call_sites;
+                       init = mod->state == MODULE_STATE_COMING;
+#endif
+               }
+
+               for (site = site_mod->sites;
+                    site < stop && static_call_key(site) == key; site++) {
+                       void *site_addr = static_call_addr(site);
+
+                       if (!init && static_call_is_init(site))
+                               continue;
+
+                       if (!kernel_text_address((unsigned long)site_addr)) {
+                               /*
+                                * This skips patching built-in __exit, which
+                                * is part of init_section_contains() but is
+                                * not part of kernel_text_address().
+                                *
+                                * Skipping built-in __exit is fine since it
+                                * will never be executed.
+                                */
+                               WARN_ONCE(!static_call_is_init(site),
+                                         "can't patch static call site at %pS",
+                                         site_addr);
+                               continue;
+                       }
+
+                       arch_static_call_transform(site_addr, NULL, func,
+                                                  static_call_is_tail(site));
+               }
+       }
+
+done:
+       static_call_unlock();
+       cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+
+static int __static_call_init(struct module *mod,
+                             struct static_call_site *start,
+                             struct static_call_site *stop)
+{
+       struct static_call_site *site;
+       struct static_call_key *key, *prev_key = NULL;
+       struct static_call_mod *site_mod;
+
+       if (start == stop)
+               return 0;
+
+       static_call_sort_entries(start, stop);
+
+       for (site = start; site < stop; site++) {
+               void *site_addr = static_call_addr(site);
+
+               if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
+                   (!mod && init_section_contains(site_addr, 1)))
+                       static_call_set_init(site);
+
+               key = static_call_key(site);
+               if (key != prev_key) {
+                       prev_key = key;
+
+                       /*
+                        * For vmlinux (!mod) avoid the allocation by storing
+                        * the sites pointer in the key itself. Also see
+                        * __static_call_update()'s @first.
+                        *
+                        * This allows architectures (eg. x86) to call
+                        * static_call_init() before memory allocation works.
+                        */
+                       if (!mod) {
+                               key->sites = site;
+                               key->type |= 1;
+                               goto do_transform;
+                       }
+
+                       site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+                       if (!site_mod)
+                               return -ENOMEM;
+
+                       /*
+                        * When the key has a direct sites pointer, extract
+                        * that into an explicit struct static_call_mod, so we
+                        * can have a list of modules.
+                        */
+                       if (static_call_key_sites(key)) {
+                               site_mod->mod = NULL;
+                               site_mod->next = NULL;
+                               site_mod->sites = static_call_key_sites(key);
+
+                               key->mods = site_mod;
+
+                               site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+                               if (!site_mod)
+                                       return -ENOMEM;
+                       }
+
+                       site_mod->mod = mod;
+                       site_mod->sites = site;
+                       site_mod->next = static_call_key_next(key);
+                       key->mods = site_mod;
+               }
+
+do_transform:
+               arch_static_call_transform(site_addr, NULL, key->func,
+                               static_call_is_tail(site));
+       }
+
+       return 0;
+}
+
+static int addr_conflict(struct static_call_site *site, void *start, void *end)
+{
+       unsigned long addr = (unsigned long)static_call_addr(site);
+
+       if (addr <= (unsigned long)end &&
+           addr + CALL_INSN_SIZE > (unsigned long)start)
+               return 1;
+
+       return 0;
+}
+
+static int __static_call_text_reserved(struct static_call_site *iter_start,
+                                      struct static_call_site *iter_stop,
+                                      void *start, void *end, bool init)
+{
+       struct static_call_site *iter = iter_start;
+
+       while (iter < iter_stop) {
+               if (init || !static_call_is_init(iter)) {
+                       if (addr_conflict(iter, start, end))
+                               return 1;
+               }
+               iter++;
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_MODULES
+
+static int __static_call_mod_text_reserved(void *start, void *end)
+{
+       struct module *mod;
+       int ret;
+
+       preempt_disable();
+       mod = __module_text_address((unsigned long)start);
+       WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+       if (!try_module_get(mod))
+               mod = NULL;
+       preempt_enable();
+
+       if (!mod)
+               return 0;
+
+       ret = __static_call_text_reserved(mod->static_call_sites,
+                       mod->static_call_sites + mod->num_static_call_sites,
+                       start, end, mod->state == MODULE_STATE_COMING);
+
+       module_put(mod);
+
+       return ret;
+}
+
+static unsigned long tramp_key_lookup(unsigned long addr)
+{
+       struct static_call_tramp_key *start = __start_static_call_tramp_key;
+       struct static_call_tramp_key *stop = __stop_static_call_tramp_key;
+       struct static_call_tramp_key *tramp_key;
+
+       for (tramp_key = start; tramp_key != stop; tramp_key++) {
+               unsigned long tramp;
+
+               tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp;
+               if (tramp == addr)
+                       return (long)tramp_key->key + (long)&tramp_key->key;
+       }
+
+       return 0;
+}
+
+static int static_call_add_module(struct module *mod)
+{
+       struct static_call_site *start = mod->static_call_sites;
+       struct static_call_site *stop = start + mod->num_static_call_sites;
+       struct static_call_site *site;
+
+       for (site = start; site != stop; site++) {
+               unsigned long s_key = __static_call_key(site);
+               unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
+               unsigned long key;
+
+               /*
+                * Is the key is exported, 'addr' points to the key, which
+                * means modules are allowed to call static_call_update() on
+                * it.
+                *
+                * Otherwise, the key isn't exported, and 'addr' points to the
+                * trampoline so we need to lookup the key.
+                *
+                * We go through this dance to prevent crazy modules from
+                * abusing sensitive static calls.
+                */
+               if (!kernel_text_address(addr))
+                       continue;
+
+               key = tramp_key_lookup(addr);
+               if (!key) {
+                       pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n",
+                               static_call_addr(site));
+                       return -EINVAL;
+               }
+
+               key |= s_key & STATIC_CALL_SITE_FLAGS;
+               site->key = key - (long)&site->key;
+       }
+
+       return __static_call_init(mod, start, stop);
+}
+
+static void static_call_del_module(struct module *mod)
+{
+       struct static_call_site *start = mod->static_call_sites;
+       struct static_call_site *stop = mod->static_call_sites +
+                                       mod->num_static_call_sites;
+       struct static_call_key *key, *prev_key = NULL;
+       struct static_call_mod *site_mod, **prev;
+       struct static_call_site *site;
+
+       for (site = start; site < stop; site++) {
+               key = static_call_key(site);
+               if (key == prev_key)
+                       continue;
+
+               prev_key = key;
+
+               for (prev = &key->mods, site_mod = key->mods;
+                    site_mod && site_mod->mod != mod;
+                    prev = &site_mod->next, site_mod = site_mod->next)
+                       ;
+
+               if (!site_mod)
+                       continue;
+
+               *prev = site_mod->next;
+               kfree(site_mod);
+       }
+}
+
+static int static_call_module_notify(struct notifier_block *nb,
+                                    unsigned long val, void *data)
+{
+       struct module *mod = data;
+       int ret = 0;
+
+       cpus_read_lock();
+       static_call_lock();
+
+       switch (val) {
+       case MODULE_STATE_COMING:
+               ret = static_call_add_module(mod);
+               if (ret) {
+                       WARN(1, "Failed to allocate memory for static calls");
+                       static_call_del_module(mod);
+               }
+               break;
+       case MODULE_STATE_GOING:
+               static_call_del_module(mod);
+               break;
+       }
+
+       static_call_unlock();
+       cpus_read_unlock();
+
+       return notifier_from_errno(ret);
+}
+
+static struct notifier_block static_call_module_nb = {
+       .notifier_call = static_call_module_notify,
+};
+
+#else
+
+static inline int __static_call_mod_text_reserved(void *start, void *end)
+{
+       return 0;
+}
+
+#endif /* CONFIG_MODULES */
+
+int static_call_text_reserved(void *start, void *end)
+{
+       bool init = system_state < SYSTEM_RUNNING;
+       int ret = __static_call_text_reserved(__start_static_call_sites,
+                       __stop_static_call_sites, start, end, init);
+
+       if (ret)
+               return ret;
+
+       return __static_call_mod_text_reserved(start, end);
+}
+
+int __init static_call_init(void)
+{
+       int ret;
+
+       if (static_call_initialized)
+               return 0;
+
+       cpus_read_lock();
+       static_call_lock();
+       ret = __static_call_init(NULL, __start_static_call_sites,
+                                __stop_static_call_sites);
+       static_call_unlock();
+       cpus_read_unlock();
+
+       if (ret) {
+               pr_err("Failed to allocate memory for static_call!\n");
+               BUG();
+       }
+
+       static_call_initialized = true;
+
+#ifdef CONFIG_MODULES
+       register_module_notifier(&static_call_module_nb);
+#endif
+       return 0;
+}
+early_initcall(static_call_init);
+
+#ifdef CONFIG_STATIC_CALL_SELFTEST
+
+static int func_a(int x)
+{
+       return x+1;
+}
+
+static int func_b(int x)
+{
+       return x+2;
+}
+
+DEFINE_STATIC_CALL(sc_selftest, func_a);
+
+static struct static_call_data {
+      int (*func)(int);
+      int val;
+      int expect;
+} static_call_data [] __initdata = {
+      { NULL,   2, 3 },
+      { func_b, 2, 4 },
+      { func_a, 2, 3 }
+};
+
+static int __init test_static_call_init(void)
+{
+      int i;
+
+      for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
+             struct static_call_data *scd = &static_call_data[i];
+
+              if (scd->func)
+                      static_call_update(sc_selftest, scd->func);
+
+              WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
+      }
+
+      return 0;
+}
+early_initcall(test_static_call_init);
+
+#endif /* CONFIG_STATIC_CALL_SELFTEST */
index 830aaf8ca08ee0cf3ca5f816fc347a0d4189ba3d..5b7b1a82ae6a4eb34644f488ad3bc976c682bfe0 100644 (file)
@@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = SYSCTL_ONE,
        },
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-       {
-               .procname       = "timer_migration",
-               .data           = &sysctl_timer_migration,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = timer_migration_handler,
-               .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_ONE,
-       },
-#endif
 #ifdef CONFIG_BPF_SYSCALL
        {
                .procname       = "unprivileged_bpf_disabled",
index c59e1a49bc406c7a2cb0e05a36053dd3ad13a9ba..dff75bcde1514c82fc6580bcf407cd2ff7ae03cb 100644 (file)
@@ -12,12 +12,22 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
  * @notify: how to notify the targeted task
  *
  * Queue @work for task_work_run() below and notify the @task if @notify
- * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
- * it will interrupt the targeted task and run the task_work. @TWA_RESUME
- * work is run only when the task exits the kernel and returns to user mode,
- * or before entering guest mode. Fails if the @task is exiting/exited and thus
- * it can't process this @work. Otherwise @work->func() will be called when the
- * @task goes through one of the aforementioned transitions, or exits.
+ * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
+ *
+ * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
+ * task and run the task_work, regardless of whether the task is currently
+ * running in the kernel or userspace.
+ * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a
+ * reschedule IPI to force the targeted task to reschedule and run task_work.
+ * This can be advantageous if there's no strict requirement that the
+ * task_work be run as soon as possible, just whenever the task enters the
+ * kernel anyway.
+ * @TWA_RESUME work is run only when the task exits the kernel and returns to
+ * user mode, or before entering guest mode.
+ *
+ * Fails if the @task is exiting/exited and thus it can't process this @work.
+ * Otherwise @work->func() will be called when the @task goes through one of
+ * the aforementioned transitions, or exits.
  *
  * If the targeted task is exiting, then an error is returned and the work item
  * is not queued. It's up to the caller to arrange for an alternative mechanism
@@ -53,6 +63,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
        case TWA_SIGNAL:
                set_notify_signal(task);
                break;
+       case TWA_SIGNAL_NO_IPI:
+               __set_notify_signal(task);
+               break;
        default:
                WARN_ON_ONCE(1);
                break;
index 003ccf338d2017c26441d2492be331fbad30d283..5d85014d59b5f829e34e257e1cd2b69ae6d5bef8 100644 (file)
@@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev,
 {
        char name[CS_NAME_LEN];
        ssize_t ret = sysfs_get_uname(buf, name, count);
-       struct clock_event_device *ce;
+       struct clock_event_device *ce = NULL, *iter;
 
        if (ret < 0)
                return ret;
@@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev,
        ret = -ENODEV;
        mutex_lock(&clockevents_mutex);
        raw_spin_lock_irq(&clockevents_lock);
-       list_for_each_entry(ce, &clockevent_devices, list) {
-               if (!strcmp(ce->name, name)) {
-                       ret = __clockevents_try_unbind(ce, dev->id);
+       list_for_each_entry(iter, &clockevent_devices, list) {
+               if (!strcmp(iter->name, name)) {
+                       ret = __clockevents_try_unbind(iter, dev->id);
+                       ce = iter;
                        break;
                }
        }
index 95d7ca35bdf2ce92b0a9929fd20e4356599bb032..cee5da1e54c4121d771bf5fa19c07584e000bf49 100644 (file)
@@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
        cpus_read_lock();
        preempt_disable();
        clocksource_verify_choose_cpus();
-       if (cpumask_weight(&cpus_chosen) == 0) {
+       if (cpumask_empty(&cpus_chosen)) {
                preempt_enable();
                cpus_read_unlock();
                pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
index b1b9b12899f5e43571e9cc39740f545cd6fad795..8464c5acc91338baee78b465933d59daf5740d5d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/jiffies.h>
 #include <linux/ktime.h>
 #include <linux/kernel.h>
+#include <linux/math.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
@@ -199,15 +200,13 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 
        r = rate;
        if (r >= 4000000) {
-               r /= 1000000;
+               r = DIV_ROUND_CLOSEST(r, 1000000);
                r_unit = 'M';
+       } else if (r >= 4000) {
+               r = DIV_ROUND_CLOSEST(r, 1000);
+               r_unit = 'k';
        } else {
-               if (r >= 1000) {
-                       r /= 1000;
-                       r_unit = 'k';
-               } else {
-                       r_unit = ' ';
-               }
+               r_unit = ' ';
        }
 
        /* Calculate the ns resolution of this counter */
index 2d76c91b85de428aac7bd6627533170f6697ad56..58a11f859ac79d7ea92d52a4e0d235201b0c65ac 100644 (file)
@@ -188,7 +188,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
         */
        if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
 #ifdef CONFIG_NO_HZ_FULL
-               WARN_ON(tick_nohz_full_running);
+               WARN_ON_ONCE(tick_nohz_full_running);
 #endif
                tick_do_timer_cpu = cpu;
        }
@@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
        if (unlikely(expires == KTIME_MAX)) {
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
                        hrtimer_cancel(&ts->sched_timer);
+               else
+                       tick_program_event(KTIME_MAX, 1);
                return;
        }
 
@@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
        tick_sched_do_timer(ts, now);
        tick_sched_handle(ts, regs);
 
-       /* No need to reprogram if we are running tickless  */
-       if (unlikely(ts->tick_stopped))
+       if (unlikely(ts->tick_stopped)) {
+               /*
+                * The clockevent device is not reprogrammed, so change the
+                * clock event device to ONESHOT_STOPPED to avoid spurious
+                * interrupts on devices which might not be truly one shot.
+                */
+               tick_program_event(KTIME_MAX, 1);
                return;
+       }
 
        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
        tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
@@ -1538,7 +1546,7 @@ void tick_cancel_sched_timer(int cpu)
 }
 #endif
 
-/**
+/*
  * Async notification about clocksource changes
  */
 void tick_clock_notify(void)
@@ -1559,7 +1567,7 @@ void tick_oneshot_notify(void)
        set_bit(0, &ts->check_clocks);
 }
 
-/**
+/*
  * Check, if a change happened, which makes oneshot possible.
  *
  * Called cyclic from the hrtimer softirq (driven by the timer
index dcdcb85121e40ad6319dc768e0a33f9233c15202..4ab9949772d52f23a9270cef821ddf4e623cd097 100644 (file)
@@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
        memcpy(base + 1, base, sizeof(*base));
 }
 
+static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
+{
+       u64 delta, cycles = tk_clock_read(tkr);
+
+       delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+       return timekeeping_delta_to_ns(tkr, delta);
+}
+
 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
        struct tk_read_base *tkr;
@@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                now = ktime_to_ns(tkr->base);
-
-               now += timekeeping_delta_to_ns(tkr,
-                               clocksource_delta(
-                                       tk_clock_read(tkr),
-                                       tkr->cycle_last,
-                                       tkr->mask));
+               now += fast_tk_get_delta_ns(tkr);
        } while (read_seqcount_latch_retry(&tkf->seq, seq));
 
        return now;
@@ -482,7 +485,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
  * of the following timestamps. Callers need to be aware of that and
  * deal with it.
  */
-u64 ktime_get_mono_fast_ns(void)
+u64 notrace ktime_get_mono_fast_ns(void)
 {
        return __ktime_get_fast_ns(&tk_fast_mono);
 }
@@ -494,7 +497,7 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
  * Contrary to ktime_get_mono_fast_ns() this is always correct because the
  * conversion factor is not affected by NTP/PTP correction.
  */
-u64 ktime_get_raw_fast_ns(void)
+u64 notrace ktime_get_raw_fast_ns(void)
 {
        return __ktime_get_fast_ns(&tk_fast_raw);
 }
@@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
 
-       return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+       return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
 }
 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 
+/**
+ * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
+ *
+ * The same limitations as described for ktime_get_boot_fast_ns() apply. The
+ * mono time and the TAI offset are not read atomically which may yield wrong
+ * readouts. However, an update of the TAI offset is an rare event e.g., caused
+ * by settime or adjtimex with an offset. The user of this function has to deal
+ * with the possibility of wrong timestamps in post processing.
+ */
+u64 notrace ktime_get_tai_fast_ns(void)
+{
+       struct timekeeper *tk = &tk_core.timekeeper;
+
+       return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
+}
+EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
+
 static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
 {
        struct tk_read_base *tkr;
@@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
                tkr = tkf->base + (seq & 0x01);
                basem = ktime_to_ns(tkr->base);
                baser = ktime_to_ns(tkr->base_real);
-
-               delta = timekeeping_delta_to_ns(tkr,
-                               clocksource_delta(tk_clock_read(tkr),
-                               tkr->cycle_last, tkr->mask));
+               delta = fast_tk_get_delta_ns(tkr);
        } while (read_seqcount_latch_retry(&tkf->seq, seq));
 
        if (mono)
index 85f1021ad45955026eba845727e53c87693ed2db..a0666d948147684c59d694102eecb0fbb30faffa 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/compat.h>
 #include <linux/random.h>
+#include <linux/sysctl.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work);
 static DECLARE_WORK(timer_update_work, timer_update_keys);
 
 #ifdef CONFIG_SMP
-unsigned int sysctl_timer_migration = 1;
+static unsigned int sysctl_timer_migration = 1;
 
 DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
 
@@ -234,7 +235,42 @@ static void timers_update_migration(void)
        else
                static_branch_disable(&timers_migration_enabled);
 }
-#else
+
+#ifdef CONFIG_SYSCTL
+static int timer_migration_handler(struct ctl_table *table, int write,
+                           void *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       mutex_lock(&timer_keys_mutex);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (!ret && write)
+               timers_update_migration();
+       mutex_unlock(&timer_keys_mutex);
+       return ret;
+}
+
+static struct ctl_table timer_sysctl[] = {
+       {
+               .procname       = "timer_migration",
+               .data           = &sysctl_timer_migration,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = timer_migration_handler,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE,
+       },
+       {}
+};
+
+static int __init timer_sysctl_init(void)
+{
+       register_sysctl("kernel", timer_sysctl);
+       return 0;
+}
+device_initcall(timer_sysctl_init);
+#endif /* CONFIG_SYSCTL */
+#else /* CONFIG_SMP */
 static inline void timers_update_migration(void) { }
 #endif /* !CONFIG_SMP */
 
@@ -251,19 +287,6 @@ void timers_update_nohz(void)
        schedule_work(&timer_update_work);
 }
 
-int timer_migration_handler(struct ctl_table *table, int write,
-                           void *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret;
-
-       mutex_lock(&timer_keys_mutex);
-       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-       if (!ret && write)
-               timers_update_migration();
-       mutex_unlock(&timer_keys_mutex);
-       return ret;
-}
-
 static inline bool is_timers_nohz_active(void)
 {
        return static_branch_unlikely(&timers_nohz_active);
@@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
         *
         * Round up with level granularity to prevent this.
         */
-       expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+       expires = (expires >> LVL_SHIFT(lvl)) + 1;
        *bucket_expiry = expires << LVL_SHIFT(lvl);
        return LVL_OFFS(lvl) + (expires & LVL_MASK);
 }
@@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
 
 static const struct debug_obj_descr timer_debug_descr;
 
+struct timer_hint {
+       void    (*function)(struct timer_list *t);
+       long    offset;
+};
+
+#define TIMER_HINT(fn, container, timr, hintfn)                        \
+       {                                                       \
+               .function = fn,                                 \
+               .offset   = offsetof(container, hintfn) -       \
+                           offsetof(container, timr)           \
+       }
+
+static const struct timer_hint timer_hints[] = {
+       TIMER_HINT(delayed_work_timer_fn,
+                  struct delayed_work, timer, work.func),
+       TIMER_HINT(kthread_delayed_work_timer_fn,
+                  struct kthread_delayed_work, timer, work.func),
+};
+
 static void *timer_debug_hint(void *addr)
 {
-       return ((struct timer_list *) addr)->function;
+       struct timer_list *timer = addr;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
+               if (timer_hints[i].function == timer->function) {
+                       void (**fn)(void) = addr + timer_hints[i].offset;
+
+                       return *fn;
+               }
+       }
+
+       return timer->function;
 }
 
 static bool timer_is_static_object(void *addr)
@@ -1722,11 +1775,14 @@ static inline void __run_timers(struct timer_base *base)
               time_after_eq(jiffies, base->next_expiry)) {
                levels = collect_expired_timers(base, heads);
                /*
-                * The only possible reason for not finding any expired
-                * timer at this clk is that all matching timers have been
-                * dequeued.
+                * The two possible reasons for not finding any expired
+                * timer at this clk are that all matching timers have been
+                * dequeued or no timer has been queued since
+                * base::next_expiry was set to base::clk +
+                * NEXT_TIMER_MAX_DELTA.
                 */
-               WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
+               WARN_ON_ONCE(!levels && !base->next_expiry_recalc
+                            && base->timers_pending);
                base->clk++;
                base->next_expiry = __next_timer_interrupt(base);
 
@@ -1950,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu)
                base = per_cpu_ptr(&timer_bases[b], cpu);
                base->clk = jiffies;
                base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->next_expiry_recalc = false;
                base->timers_pending = false;
                base->is_idle = false;
        }
index 2c43e327a619f77b4e5124851f10f58d643f8f46..bf5da6c4e9992e6575c91475be475dd270026f11 100644 (file)
@@ -144,6 +144,7 @@ config TRACING
        select BINARY_PRINTF
        select EVENT_TRACING
        select TRACE_CLOCK
+       select TASKS_RCU if PREEMPTION
 
 config GENERIC_TRACER
        bool
index 4d5629196d01dcb1592bb2ad483b56cdf8966f9d..10a32b0f2deb6d8e168a75b756837e5750bd13f4 100644 (file)
@@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt)
        local_irq_restore(flags);
 }
 
-void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
-       const char *fmt, ...)
+void __blk_trace_note_message(struct blk_trace *bt,
+               struct cgroup_subsys_state *css, const char *fmt, ...)
 {
        int n;
        va_list args;
        unsigned long flags;
        char *buf;
+       u64 cgid = 0;
 
        if (unlikely(bt->trace_state != Blktrace_running &&
                     !blk_tracer_enabled))
@@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
        n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
        va_end(args);
 
-       if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
-               blkcg = NULL;
 #ifdef CONFIG_BLK_CGROUP
-       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
-                  blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
-#else
-       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
+       if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+               cgid = cgroup_id(css->cgroup);
+       else
+               cgid = 1;
 #endif
+       trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid);
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(__trace_note_message);
+EXPORT_SYMBOL_GPL(__blk_trace_note_message);
 
 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
                         pid_t pid)
@@ -411,7 +411,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
                return PTR_ERR(msg);
 
        bt = filp->private_data;
-       __trace_note_message(bt, NULL, "%s", msg);
+       __blk_trace_note_message(bt, NULL, "%s", msg);
        kfree(msg);
 
        return count;
@@ -783,6 +783,7 @@ void blk_trace_shutdown(struct request_queue *q)
 #ifdef CONFIG_BLK_CGROUP
 static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 {
+       struct cgroup_subsys_state *blkcg_css;
        struct blk_trace *bt;
 
        /* We don't use the 'bt' value here except as an optimization... */
@@ -790,9 +791,10 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
        if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
                return 0;
 
-       if (!bio->bi_blkg)
+       blkcg_css = bio_blkcg_css(bio);
+       if (!blkcg_css)
                return 0;
-       return cgroup_id(bio_blkcg(bio)->css.cgroup);
+       return cgroup_id(blkcg_css->cgroup);
 }
 #else
 static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
index 7fa2ebc07f603e44523a36d73ca888e4eb0c9975..d8553f46caa29797be93acab9c7e8904b898bead 100644 (file)
@@ -2349,11 +2349,11 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
 }
 
 static int
-kprobe_multi_resolve_syms(const void *usyms, u32 cnt,
+kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt,
                          unsigned long *addrs)
 {
        unsigned long addr, size;
-       const char **syms;
+       const char __user **syms;
        int err = -ENOMEM;
        unsigned int i;
        char *func;
index 8f4fb328133abffc747f7f3794c82ca653a25e5d..a7e84c8543cb74934a63484d1dd7ddcb27fcc94c 100644 (file)
@@ -404,9 +404,9 @@ free:
 
 static void
 ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
-                               unsigned int prev_state,
                                struct task_struct *prev,
-                               struct task_struct *next)
+                               struct task_struct *next,
+                               unsigned int prev_state)
 {
        unsigned long long timestamp;
        int index;
index 4f1d2f5e7263412f5c5467fef32621e05cb14450..af899b058c8d09b8eb14e2b5614817122f6bb9b9 100644 (file)
@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
 
 static void
 ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
-                                    unsigned int prev_state,
                                     struct task_struct *prev,
-                                    struct task_struct *next)
+                                    struct task_struct *next,
+                                    unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *pid_list;
index ab463a4d2b2359660c8d725287e642e8f8ec1943..b56833700d23fb515a1b0e7a1f9a53e2e66d82c9 100644 (file)
@@ -65,7 +65,7 @@ static void rethook_free_rcu(struct rcu_head *head)
  */
 void rethook_free(struct rethook *rh)
 {
-       rcu_assign_pointer(rh->handler, NULL);
+       WRITE_ONCE(rh->handler, NULL);
 
        call_rcu(&rh->rcu, rethook_free_rcu);
 }
index e11e167b78090a2b3aa2e621fc73e0335eb600a3..f97de82d1342ac3ab280be6b54e008c5621fbff1 100644 (file)
@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
 
 static void
 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
-                                       unsigned int prev_state,
                                        struct task_struct *prev,
-                                       struct task_struct *next)
+                                       struct task_struct *next,
+                                       unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *no_pid_list;
@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 
 static void
 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
-                                        unsigned int prev_state,
                                         struct task_struct *prev,
-                                        struct task_struct *next)
+                                        struct task_struct *next,
+                                        unsigned int prev_state)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *no_pid_list;
index e9ae1f33a7f03b04af4470094879744ac99906fa..afb92e2f0aeab5327a727d464e9150815050feba 100644 (file)
@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
  */
 static void
 trace_sched_switch_callback(void *data, bool preempt,
-                           unsigned int prev_state,
                            struct task_struct *p,
-                           struct task_struct *n)
+                           struct task_struct *n,
+                           unsigned int prev_state)
 {
        struct osnoise_variables *osn_var = this_cpu_osn_var();
 
index 45796d8bd4b2dcee74bc131203c1c2f64bc219ee..c9ffdcfe622e14ae9e09168f8442333dddda8f85 100644 (file)
@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
 
 static void
 probe_sched_switch(void *ignore, bool preempt,
-                  unsigned int prev_state,
-                  struct task_struct *prev, struct task_struct *next)
+                  struct task_struct *prev, struct task_struct *next,
+                  unsigned int prev_state)
 {
        int flags;
 
index 46429f9a96fafd176cab549a5bdc49030601d844..330aee1c1a49e6804df4c4ce7389282c126f4922 100644 (file)
@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 
 static void notrace
 probe_wakeup_sched_switch(void *ignore, bool preempt,
-                         unsigned int prev_state,
-                         struct task_struct *prev, struct task_struct *next)
+                         struct task_struct *prev, struct task_struct *next,
+                         unsigned int prev_state)
 {
        struct trace_array_cpu *data;
        u64 T0, T1, delta;
index 45a0584f65417e97e1664644b1a21195fd4b231c..c223a2575b7217f8f61ea422b5a93e642afa9505 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -6,8 +6,7 @@
 
   CONFIG_BUG - emit BUG traps.  Nothing happens without this.
   CONFIG_GENERIC_BUG - enable this code.
-  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
-       the containing struct bug_entry for bug_addr and file.
+  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit relative pointers for bug_addr and file
   CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
 
   CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
@@ -53,10 +52,10 @@ extern struct bug_entry __start___bug_table[], __stop___bug_table[];
 
 static inline unsigned long bug_addr(const struct bug_entry *bug)
 {
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-       return bug->bug_addr;
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+       return (unsigned long)&bug->bug_addr_disp + bug->bug_addr_disp;
 #else
-       return (unsigned long)bug + bug->bug_addr_disp;
+       return bug->bug_addr;
 #endif
 }
 
@@ -131,10 +130,10 @@ void bug_get_file_line(struct bug_entry *bug, const char **file,
                       unsigned int *line)
 {
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-       *file = bug->file;
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+       *file = (const char *)&bug->file_disp + bug->file_disp;
 #else
-       *file = (const char *)bug + bug->file_disp;
+       *file = bug->file;
 #endif
        *line = bug->line;
 #else
index 6946f8e204e3950614c979f5e9732ac65546debc..337d797a714163460d2e89e55b6cce443d1b52db 100644 (file)
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generic infrastructure for lifetime debugging of objects.
  *
- * Started by Thomas Gleixner
- *
  * Copyright (C) 2008, Thomas Gleixner <tglx@linutronix.de>
- *
- * For licencing details see kernel-base/COPYING
  */
 
 #define pr_fmt(fmt) "ODEBUG: " fmt
index 06811d866775c0c353a885f119879c7cc992db5d..53f6b9c6e9366200906c8e49a98d2e37161fa51b 100644 (file)
  *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
  */
 #define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
 #define NET_DIM_DEF_PROFILE_CQE 1
 #define NET_DIM_DEF_PROFILE_EQE 1
 
 #define NET_DIM_RX_EQE_PROFILES { \
-       {1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-       {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {.usec = 1,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 8,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 64,  .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+       {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}  \
 }
 
 #define NET_DIM_RX_CQE_PROFILES { \
-       {2,  256},             \
-       {8,  128},             \
-       {16, 64},              \
-       {32, 64},              \
-       {64, 64}               \
+       {.usec = 2,  .pkts = 256,},             \
+       {.usec = 8,  .pkts = 128,},             \
+       {.usec = 16, .pkts = 64,},              \
+       {.usec = 32, .pkts = 64,},              \
+       {.usec = 64, .pkts = 64,}               \
 }
 
 #define NET_DIM_TX_EQE_PROFILES { \
-       {1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-       {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
+       {.usec = 1,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 8,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 32,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 64,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
+       {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}   \
 }
 
 #define NET_DIM_TX_CQE_PROFILES { \
-       {5,  128},  \
-       {8,  64},  \
-       {16, 32},  \
-       {32, 32},  \
-       {64, 32}   \
+       {.usec = 5,  .pkts = 128,},  \
+       {.usec = 8,  .pkts = 64,},  \
+       {.usec = 16, .pkts = 32,},  \
+       {.usec = 32, .pkts = 32,},  \
+       {.usec = 64, .pkts = 32,}   \
 }
 
 static const struct dim_cq_moder
index 9301578f98e8c6d4acb49df297aa5c47b590ea8b..06833d404398d747b41138fbd58e03ab5e5f688a 100644 (file)
@@ -22,15 +22,33 @@ EXPORT_SYMBOL(hex_asc_upper);
  *
  * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
  * input.
+ *
+ * This function is used to load cryptographic keys, so it is coded in such a
+ * way that there are no conditions or memory accesses that depend on data.
+ *
+ * Explanation of the logic:
+ * (ch - '9' - 1) is negative if ch <= '9'
+ * ('0' - 1 - ch) is negative if ch >= '0'
+ * we "and" these two values, so the result is negative if ch is in the range
+ *     '0' ... '9'
+ * we are only interested in the sign, so we do a shift ">> 8"; note that right
+ *     shift of a negative value is implementation-defined, so we cast the
+ *     value to (unsigned) before the shift --- we have 0xffffff if ch is in
+ *     the range '0' ... '9', 0 otherwise
+ * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is
+ *     in the range '0' ... '9', 0 otherwise
+ * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0'
+ *     ... '9', -1 otherwise
+ * the next line is similar to the previous one, but we need to decode both
+ *     uppercase and lowercase letters, so we use (ch & 0xdf), which converts
+ *     lowercase to uppercase
  */
-int hex_to_bin(char ch)
+int hex_to_bin(unsigned char ch)
 {
-       if ((ch >= '0') && (ch <= '9'))
-               return ch - '0';
-       ch = tolower(ch);
-       if ((ch >= 'a') && (ch <= 'f'))
-               return ch - 'a' + 10;
-       return -1;
+       unsigned char cu = ch & 0xdf;
+       return -1 +
+               ((ch - '0' +  1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) +
+               ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8);
 }
 EXPORT_SYMBOL(hex_to_bin);
 
@@ -45,10 +63,13 @@ EXPORT_SYMBOL(hex_to_bin);
 int hex2bin(u8 *dst, const char *src, size_t count)
 {
        while (count--) {
-               int hi = hex_to_bin(*src++);
-               int lo = hex_to_bin(*src++);
+               int hi, lo;
 
-               if ((hi < 0) || (lo < 0))
+               hi = hex_to_bin(*src++);
+               if (unlikely(hi < 0))
+                       return -EINVAL;
+               lo = hex_to_bin(*src++);
+               if (unlikely(lo < 0))
                        return -EINVAL;
 
                *dst++ = (hi << 4) | lo;
index 2f17b488d58e11b51996565dfe5e24adccdae3b1..2d5329a421058f959ea565a1ea2fed0104aa8968 100644 (file)
@@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init);
 static int irq_poll_cpu_dead(unsigned int cpu)
 {
        /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
+        * If a CPU goes away, splice its entries to the current CPU and
+        * set the POLL softirq bit. The local_bh_disable()/enable() pair
+        * ensures that it is handled. Otherwise the current CPU could
+        * reach idle with the POLL softirq pending.
         */
+       local_bh_disable();
        local_irq_disable();
        list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
                         this_cpu_ptr(&blk_cpu_iopoll));
        __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
        local_irq_enable();
+       local_bh_enable();
 
        return 0;
 }
index 56fa037501b5606833c727450e623ca50f22b6e9..5f0e71ab292cb07e3650a09665afe977d149c7df 100644 (file)
@@ -54,32 +54,6 @@ void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
                kobj->ktype->get_ownership(kobj, uid, gid);
 }
 
-/*
- * populate_dir - populate directory with attributes.
- * @kobj: object we're working on.
- *
- * Most subsystems have a set of default attributes that are associated
- * with an object that registers with them.  This is a helper called during
- * object registration that loops through the default attributes of the
- * subsystem and creates attributes files for them in sysfs.
- */
-static int populate_dir(struct kobject *kobj)
-{
-       const struct kobj_type *t = get_ktype(kobj);
-       struct attribute *attr;
-       int error = 0;
-       int i;
-
-       if (t && t->default_attrs) {
-               for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) {
-                       error = sysfs_create_file(kobj, attr);
-                       if (error)
-                               break;
-               }
-       }
-       return error;
-}
-
 static int create_dir(struct kobject *kobj)
 {
        const struct kobj_type *ktype = get_ktype(kobj);
@@ -90,12 +64,6 @@ static int create_dir(struct kobject *kobj)
        if (error)
                return error;
 
-       error = populate_dir(kobj);
-       if (error) {
-               sysfs_remove_dir(kobj);
-               return error;
-       }
-
        if (ktype) {
                error = sysfs_create_groups(kobj, ktype->default_groups);
                if (error) {
index 926f4823d5eac832a5b6e78c5c08228d400ca5ad..fd1728d94babb28f997bf10c2b2180ae5cdec2c0 100644 (file)
@@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic(
                        ip += length;
                        op += length;
 
-                       /* Necessarily EOF, due to parsing restrictions */
-                       if (!partialDecoding || (cpy == oend))
+                       /* Necessarily EOF when !partialDecoding.
+                        * When partialDecoding, it is EOF if we've either
+                        * filled the output buffer or
+                        * can't proceed with reading an offset for following match.
+                        */
+                       if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2)))
                                break;
                } else {
                        /* may overwrite up to WILDCOPYLENGTH beyond cpy */
index af9302141bcf63983b8bac15468174f63ed0edc7..e5c5315da274194e5f8f35b5e9735551c030e5a8 100644 (file)
@@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
        data = kzalloc(sizeof(*ref->data), gfp);
        if (!data) {
                free_percpu((void __percpu *)ref->percpu_count_ptr);
+               ref->percpu_count_ptr = 0;
                return -ENOMEM;
        }
 
index 08fc72d3ed16dbe8ec8f10d6382522882b1fe754..6432b8c3e431ecbbb462326bbfc75e63e16c7c44 100644 (file)
@@ -25,7 +25,7 @@
  * hit it), 'max' is the address space maximum (and we return
  * -EFAULT if we hit it).
  */
-static inline long do_strncpy_from_user(char *dst, const char __user *src,
+static __always_inline long do_strncpy_from_user(char *dst, const char __user *src,
                                        unsigned long count, unsigned long max)
 {
        const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
index bffa0ebf9f8b8273251daa1af910ccd6aebf2424..feeb935a22991174c7146fbeddc9a002b30dcac0 100644 (file)
@@ -20,7 +20,7 @@
  * if it fits in a aligned 'long'. The caller needs to check
  * the return value against "> max".
  */
-static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
+static __always_inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
 {
        const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
        unsigned long align, res = 0;
index 4acc88ea7c21744e6b1faa54af6f2029545a1b32..54e646e8e6ee7ab9d46d7c6ed607ff0900eeb366 100644 (file)
@@ -207,6 +207,8 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node)
        if (xa_is_sibling(entry)) {
                offset = xa_to_sibling(entry);
                entry = xa_entry(xas->xa, node, offset);
+               if (node->shift && xa_is_node(entry))
+                       entry = XA_RETRY_ENTRY;
        }
 
        xas->xa_offset = offset;
index 7176af65b103a4500fda68dc62028fb37794d3a0..ff60bd7d74e0729b81f80ecfad4e9296e1e08079 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <linux/blkdev.h>
 #include <linux/wait.h>
 #include <linux/rbtree.h>
 #include <linux/kthread.h>
@@ -390,7 +391,6 @@ static void cgwb_release_workfn(struct work_struct *work)
 {
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                release_work);
-       struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
        struct backing_dev_info *bdi = wb->bdi;
 
        mutex_lock(&wb->bdi->cgwb_release_mutex);
@@ -401,7 +401,7 @@ static void cgwb_release_workfn(struct work_struct *work)
        mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
        /* triggers blkg destruction if no online users left */
-       blkcg_unpin_online(blkcg);
+       blkcg_unpin_online(wb->blkcg_css);
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
 
@@ -446,7 +446,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
 {
        struct mem_cgroup *memcg;
        struct cgroup_subsys_state *blkcg_css;
-       struct blkcg *blkcg;
        struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
        struct bdi_writeback *wb;
        unsigned long flags;
@@ -454,9 +453,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
        memcg = mem_cgroup_from_css(memcg_css);
        blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
-       blkcg = css_to_blkcg(blkcg_css);
        memcg_cgwb_list = &memcg->cgwb_list;
-       blkcg_cgwb_list = &blkcg->cgwb_list;
+       blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css);
 
        /* look up again under lock and discard on blkcg mismatch */
        spin_lock_irqsave(&cgwb_lock, flags);
@@ -511,7 +509,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
                        list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
                        list_add(&wb->memcg_node, memcg_cgwb_list);
                        list_add(&wb->blkcg_node, blkcg_cgwb_list);
-                       blkcg_pin_online(blkcg);
+                       blkcg_pin_online(blkcg_css);
                        css_get(memcg_css);
                        css_get(blkcg_css);
                }
@@ -724,18 +722,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
 
 /**
  * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
- * @blkcg: blkcg being offlined
+ * @css: blkcg being offlined
  *
  * Also prevents creation of any new wb's associated with @blkcg.
  */
-void wb_blkcg_offline(struct blkcg *blkcg)
+void wb_blkcg_offline(struct cgroup_subsys_state *css)
 {
        struct bdi_writeback *wb, *next;
+       struct list_head *list = blkcg_get_cgwb_list(css);
 
        spin_lock_irq(&cgwb_lock);
-       list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
+       list_for_each_entry_safe(wb, next, list, blkcg_node)
                cgwb_kill(wb);
-       blkcg->cgwb_list.next = NULL;   /* prevent new wb's */
+       list->next = NULL;      /* prevent new wb's */
        spin_unlock_irq(&cgwb_lock);
 }
 
index c3e37aa9ff9e43336cbab1f3a442d016d1dfa75d..fe915db6149b9c4c74a775b1d018b09b8d0fd68d 100644 (file)
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
+/*
+ * Fragmentation score check interval for proactive compaction purposes.
+ */
+#define HPAGE_FRAG_CHECK_INTERVAL_MSEC (500)
+
 static inline void count_compact_event(enum vm_event_item item)
 {
        count_vm_event(item);
@@ -50,11 +55,6 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
 #define pageblock_start_pfn(pfn)       block_start_pfn(pfn, pageblock_order)
 #define pageblock_end_pfn(pfn)         block_end_pfn(pfn, pageblock_order)
 
-/*
- * Fragmentation score check interval for proactive compaction purposes.
- */
-static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
-
 /*
  * Page order with-respect-to which proactive compaction
  * calculates external fragmentation, which is used as
index 3a5ffb5587cd055323a49ae63713528f47bb2caf..9a1eef6c5d350e6fadb77be23a48c34818d6310e 100644 (file)
@@ -1063,12 +1063,6 @@ void __init pagecache_init(void)
                init_waitqueue_head(&folio_wait_table[i]);
 
        page_writeback_init();
-
-       /*
-        * tmpfs uses the ZERO_PAGE for reading holes: it is up-to-date,
-        * and splice's page_cache_pipe_buf_confirm() needs to see that.
-        */
-       SetPageUptodate(ZERO_PAGE(0));
 }
 
 /*
index 0cc0c4da7ed9fdb89db9cc93d94c7bceb56b148a..1a692997fac4c9c913279a10d2077d20ddcb0751 100644 (file)
@@ -624,7 +624,7 @@ void __kmap_local_sched_out(void)
 
                /* With debug all even slots are unmapped and act as guard */
                if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
-                       WARN_ON_ONCE(!pte_none(pteval));
+                       WARN_ON_ONCE(pte_val(pteval) != 0);
                        continue;
                }
                if (WARN_ON_ONCE(pte_none(pteval)))
@@ -661,7 +661,7 @@ void __kmap_local_sched_in(void)
 
                /* With debug all even slots are unmapped and act as guard */
                if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
-                       WARN_ON_ONCE(!pte_none(pteval));
+                       WARN_ON_ONCE(pte_val(pteval) != 0);
                        continue;
                }
                if (WARN_ON_ONCE(pte_none(pteval)))
index 2fe38212e07c665b7a7fe29d575598f371e183c5..910a138e9859e9a6c02e7bf234c5431c8e3cebb5 100644 (file)
@@ -2145,15 +2145,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
         * pmd against. Otherwise we can end up replacing wrong folio.
         */
        VM_BUG_ON(freeze && !folio);
-       if (folio) {
-               VM_WARN_ON_ONCE(!folio_test_locked(folio));
-               if (folio != page_folio(pmd_page(*pmd)))
-                       goto out;
-       }
+       VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
 
        if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
-           is_pmd_migration_entry(*pmd))
+           is_pmd_migration_entry(*pmd)) {
+               if (folio && folio != page_folio(pmd_page(*pmd)))
+                       goto out;
                __split_huge_pmd_locked(vma, pmd, range.start, freeze);
+       }
 
 out:
        spin_unlock(ptl);
@@ -2496,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        struct address_space *mapping = NULL;
        int extra_pins, ret;
        pgoff_t end;
+       bool is_hzp;
 
-       VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
        VM_BUG_ON_PAGE(!PageLocked(head), head);
        VM_BUG_ON_PAGE(!PageCompound(head), head);
 
+       is_hzp = is_huge_zero_page(head);
+       VM_WARN_ON_ONCE_PAGE(is_hzp, head);
+       if (is_hzp)
+               return -EBUSY;
+
        if (PageWriteback(head))
                return -EBUSY;
 
index b34f50156f7ec29cdfa23006be920afac8954a2b..3fc721789743e6adb95d3005067392c9d7b53dff 100644 (file)
@@ -3475,7 +3475,6 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
 {
        int nr_nodes, node;
        struct page *page;
-       int rc = 0;
 
        lockdep_assert_held(&hugetlb_lock);
 
@@ -3486,15 +3485,19 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
        }
 
        for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
-               if (!list_empty(&h->hugepage_freelists[node])) {
-                       page = list_entry(h->hugepage_freelists[node].next,
-                                       struct page, lru);
-                       rc = demote_free_huge_page(h, page);
-                       break;
+               list_for_each_entry(page, &h->hugepage_freelists[node], lru) {
+                       if (PageHWPoison(page))
+                               continue;
+
+                       return demote_free_huge_page(h, page);
                }
        }
 
-       return rc;
+       /*
+        * Only way to get here is if all pages on free lists are poisoned.
+        * Return -EBUSY so that caller will not retry.
+        */
+       return -EBUSY;
 }
 
 #define HSTATE_ATTR_RO(_name) \
@@ -6782,6 +6785,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
        return ret;
 }
 
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+       int ret;
+
+       spin_lock_irq(&hugetlb_lock);
+       ret = __get_huge_page_for_hwpoison(pfn, flags);
+       spin_unlock_irq(&hugetlb_lock);
+       return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
        spin_lock_irq(&hugetlb_lock);
index 07a76c46daa5acf8487ff8bfc1bd9cee317773a0..9e1b6544bfa8e6263fc6a9db58d68b345692e591 100644 (file)
@@ -336,8 +336,6 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size)
 
 #endif
 
-#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
-
 void kasan_enable_tagging(void)
 {
        if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
@@ -347,6 +345,9 @@ void kasan_enable_tagging(void)
        else
                hw_enable_tagging_sync();
 }
+
+#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
+
 EXPORT_SYMBOL_GPL(kasan_enable_tagging);
 
 void kasan_force_async_fault(void)
index d79b83d673b144773c8cc9c4eee6eaf7ec45ebde..b01b4bbe040958f3e345306de5cfab5cecb32ab3 100644 (file)
@@ -355,25 +355,27 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #define hw_set_mem_tag_range(addr, size, tag, init) \
                        arch_set_mem_tag_range((addr), (size), (tag), (init))
 
+void kasan_enable_tagging(void);
+
 #else /* CONFIG_KASAN_HW_TAGS */
 
 #define hw_enable_tagging_sync()
 #define hw_enable_tagging_async()
 #define hw_enable_tagging_asymm()
 
+static inline void kasan_enable_tagging(void) { }
+
 #endif /* CONFIG_KASAN_HW_TAGS */
 
 #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
-void kasan_enable_tagging(void);
 void kasan_force_async_fault(void);
 
-#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
+#else /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */
 
-static inline void kasan_enable_tagging(void) { }
 static inline void kasan_force_async_fault(void) { }
 
-#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
+#endif /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */
 
 #ifdef CONFIG_KASAN_SW_TAGS
 u8 kasan_random_tag(void);
index 08291ed33e93af757e436b8e2efbe626d50c1f79..0a9def8ce5e8b6d68873231f418b85ac96b40bfa 100644 (file)
@@ -315,6 +315,13 @@ static void per_cpu_remove_cache(void *arg)
        struct qlist_head *q;
 
        q = this_cpu_ptr(&cpu_quarantine);
+       /*
+        * Ensure the ordering between the writing to q->offline and
+        * per_cpu_remove_cache.  Prevent cpu_quarantine from being corrupted
+        * by interrupt.
+        */
+       if (READ_ONCE(q->offline))
+               return;
        qlist_move_cache(q, &to_free, cache);
        qlist_free_all(&to_free, cache);
 }
index a203747ad2c06e3de83e2b12f695cae7625d1724..11a954763be9cd04d16707e0817b9e2b24ad1d58 100644 (file)
@@ -231,27 +231,6 @@ static bool kfence_unprotect(unsigned long addr)
        return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
 }
 
-static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
-{
-       long index;
-
-       /* The checks do not affect performance; only called from slow-paths. */
-
-       if (!is_kfence_address((void *)addr))
-               return NULL;
-
-       /*
-        * May be an invalid index if called with an address at the edge of
-        * __kfence_pool, in which case we would report an "invalid access"
-        * error.
-        */
-       index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
-       if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
-               return NULL;
-
-       return &kfence_metadata[index];
-}
-
 static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
 {
        unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
@@ -642,6 +621,16 @@ static bool __init kfence_init_pool_early(void)
         * fails for the first page, and therefore expect addr==__kfence_pool in
         * most failure cases.
         */
+       for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
+               struct slab *slab = virt_to_slab(p);
+
+               if (!slab)
+                       continue;
+#ifdef CONFIG_MEMCG
+               slab->memcg_data = 0;
+#endif
+               __folio_clear_slab(slab_folio(slab));
+       }
        memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
        __kfence_pool = NULL;
        return false;
index 9a6c4b1b12a88dfe8362c4d3031997dc76e4aeff..600f2e2431d6dc542420ce950956abe46f46d5a7 100644 (file)
@@ -96,6 +96,27 @@ struct kfence_metadata {
 
 extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
 
+static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
+{
+       long index;
+
+       /* The checks do not affect performance; only called from slow-paths. */
+
+       if (!is_kfence_address((void *)addr))
+               return NULL;
+
+       /*
+        * May be an invalid index if called with an address at the edge of
+        * __kfence_pool, in which case we would report an "invalid access"
+        * error.
+        */
+       index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
+       if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
+               return NULL;
+
+       return &kfence_metadata[index];
+}
+
 /* KFENCE error types for report generation. */
 enum kfence_error_type {
        KFENCE_ERROR_OOB,               /* Detected a out-of-bounds access. */
index f93a7b2a338be7d6906f51a22ba2faa10615c6cf..f5a6d8ba3e21fef25cee0ec6b449d9ddd2de646d 100644 (file)
@@ -273,3 +273,50 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
        /* We encountered a memory safety error, taint the kernel! */
        add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
 }
+
+#ifdef CONFIG_PRINTK
+static void kfence_to_kp_stack(const struct kfence_track *track, void **kp_stack)
+{
+       int i, j;
+
+       i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
+       for (j = 0; i < track->num_stack_entries && j < KS_ADDRS_COUNT; ++i, ++j)
+               kp_stack[j] = (void *)track->stack_entries[i];
+       if (j < KS_ADDRS_COUNT)
+               kp_stack[j] = NULL;
+}
+
+bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+{
+       struct kfence_metadata *meta = addr_to_metadata((unsigned long)object);
+       unsigned long flags;
+
+       if (!meta)
+               return false;
+
+       /*
+        * If state is UNUSED at least show the pointer requested; the rest
+        * would be garbage data.
+        */
+       kpp->kp_ptr = object;
+
+       /* Requesting info an a never-used object is almost certainly a bug. */
+       if (WARN_ON(meta->state == KFENCE_OBJECT_UNUSED))
+               return true;
+
+       raw_spin_lock_irqsave(&meta->lock, flags);
+
+       kpp->kp_slab = slab;
+       kpp->kp_slab_cache = meta->cache;
+       kpp->kp_objp = (void *)meta->addr;
+       kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack);
+       if (meta->state == KFENCE_OBJECT_FREED)
+               kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack);
+       /* get_stack_skipnr() ensures the first entry is outside allocator. */
+       kpp->kp_ret = kpp->kp_stack[0];
+
+       raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+       return true;
+}
+#endif
index acd7cbb82e16048e2727ce89d193e3a81769a111..a182f5ddaf68b3bcae74287c1ad33b2a31ccd60d 100644 (file)
@@ -1132,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
 void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
                               gfp_t gfp)
 {
-       if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+       if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
                kmemleak_alloc(__va(phys), size, min_count, gfp);
 }
 EXPORT_SYMBOL(kmemleak_alloc_phys);
@@ -1146,7 +1146,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
  */
 void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
 {
-       if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+       if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
                kmemleak_free_part(__va(phys), size);
 }
 EXPORT_SYMBOL(kmemleak_free_part_phys);
@@ -1158,7 +1158,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
  */
 void __ref kmemleak_not_leak_phys(phys_addr_t phys)
 {
-       if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+       if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
                kmemleak_not_leak(__va(phys));
 }
 EXPORT_SYMBOL(kmemleak_not_leak_phys);
@@ -1170,7 +1170,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
  */
 void __ref kmemleak_ignore_phys(phys_addr_t phys)
 {
-       if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+       if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
                kmemleak_ignore(__va(phys));
 }
 EXPORT_SYMBOL(kmemleak_ignore_phys);
index c669d87001a63450de08d1170f7a097612267f4f..ba76428ceecea18d6eab5d88c457169559daade6 100644 (file)
@@ -394,12 +394,6 @@ static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
        int dst_idx = dst_memcg->kmemcg_id;
        struct list_lru_one *src, *dst;
 
-       /*
-        * If there is no lru entry in this nlru, we can skip it immediately.
-        */
-       if (!READ_ONCE(nlru->nr_items))
-               return;
-
        /*
         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
         * we have to use IRQ-safe primitives here to avoid deadlock.
index 725f767232207dc5c20c7630556ce087b9db3905..598fece89e2b735183103dcd66c04f060128693b 100644 (file)
@@ -587,6 +587,9 @@ static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
 static DEFINE_SPINLOCK(stats_flush_lock);
 static DEFINE_PER_CPU(unsigned int, stats_updates);
 static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
+static u64 flush_next_time;
+
+#define FLUSH_TIME (2UL*HZ)
 
 /*
  * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
@@ -637,6 +640,7 @@ static void __mem_cgroup_flush_stats(void)
        if (!spin_trylock_irqsave(&stats_flush_lock, flag))
                return;
 
+       flush_next_time = jiffies_64 + 2*FLUSH_TIME;
        cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
        atomic_set(&stats_flush_threshold, 0);
        spin_unlock_irqrestore(&stats_flush_lock, flag);
@@ -648,10 +652,16 @@ void mem_cgroup_flush_stats(void)
                __mem_cgroup_flush_stats();
 }
 
+void mem_cgroup_flush_stats_delayed(void)
+{
+       if (time_after64(jiffies_64, flush_next_time))
+               mem_cgroup_flush_stats();
+}
+
 static void flush_memcg_stats_dwork(struct work_struct *w)
 {
        __mem_cgroup_flush_stats();
-       queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+       queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }
 
 /**
index dcb6bb9cf73152f99824cdb5db0c2e2028c3e1b4..d4a4adcca01f3f77721fedd241a80de55e1b91ee 100644 (file)
@@ -1274,7 +1274,7 @@ try_again:
        }
 out:
        if (ret == -EIO)
-               dump_page(p, "hwpoison: unhandlable page");
+               pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p));
 
        return ret;
 }
@@ -1498,50 +1498,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
        return 0;
 }
 
-static int memory_failure_hugetlb(unsigned long pfn, int flags)
+/*
+ * Called from hugetlb code with hugetlb_lock held.
+ *
+ * Return values:
+ *   0             - free hugepage
+ *   1             - in-use hugepage
+ *   2             - not a hugepage
+ *   -EBUSY        - the hugepage is busy (try to retry)
+ *   -EHWPOISON    - the hugepage is already hwpoisoned
+ */
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+       struct page *page = pfn_to_page(pfn);
+       struct page *head = compound_head(page);
+       int ret = 2;    /* fallback to normal page handling */
+       bool count_increased = false;
+
+       if (!PageHeadHuge(head))
+               goto out;
+
+       if (flags & MF_COUNT_INCREASED) {
+               ret = 1;
+               count_increased = true;
+       } else if (HPageFreed(head) || HPageMigratable(head)) {
+               ret = get_page_unless_zero(head);
+               if (ret)
+                       count_increased = true;
+       } else {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (TestSetPageHWPoison(head)) {
+               ret = -EHWPOISON;
+               goto out;
+       }
+
+       return ret;
+out:
+       if (count_increased)
+               put_page(head);
+       return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Taking refcount of hugetlb pages needs extra care about race conditions
+ * with basic operations like hugepage allocation/free/demotion.
+ * So some of prechecks for hwpoison (pinning, and testing/setting
+ * PageHWPoison) should be done in single hugetlb_lock range.
+ */
+static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
 {
-       struct page *p = pfn_to_page(pfn);
-       struct page *head = compound_head(p);
        int res;
+       struct page *p = pfn_to_page(pfn);
+       struct page *head;
        unsigned long page_flags;
+       bool retry = true;
 
-       if (TestSetPageHWPoison(head)) {
-               pr_err("Memory failure: %#lx: already hardware poisoned\n",
-                      pfn);
-               res = -EHWPOISON;
-               if (flags & MF_ACTION_REQUIRED)
+       *hugetlb = 1;
+retry:
+       res = get_huge_page_for_hwpoison(pfn, flags);
+       if (res == 2) { /* fallback to normal page handling */
+               *hugetlb = 0;
+               return 0;
+       } else if (res == -EHWPOISON) {
+               pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+               if (flags & MF_ACTION_REQUIRED) {
+                       head = compound_head(p);
                        res = kill_accessing_process(current, page_to_pfn(head), flags);
+               }
                return res;
+       } else if (res == -EBUSY) {
+               if (retry) {
+                       retry = false;
+                       goto retry;
+               }
+               action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+               return res;
+       }
+
+       head = compound_head(p);
+       lock_page(head);
+
+       if (hwpoison_filter(p)) {
+               ClearPageHWPoison(head);
+               res = -EOPNOTSUPP;
+               goto out;
        }
 
        num_poisoned_pages_inc();
 
-       if (!(flags & MF_COUNT_INCREASED)) {
-               res = get_hwpoison_page(p, flags);
-               if (!res) {
-                       lock_page(head);
-                       if (hwpoison_filter(p)) {
-                               if (TestClearPageHWPoison(head))
-                                       num_poisoned_pages_dec();
-                               unlock_page(head);
-                               return -EOPNOTSUPP;
-                       }
-                       unlock_page(head);
-                       res = MF_FAILED;
-                       if (__page_handle_poison(p)) {
-                               page_ref_inc(p);
-                               res = MF_RECOVERED;
-                       }
-                       action_result(pfn, MF_MSG_FREE_HUGE, res);
-                       return res == MF_RECOVERED ? 0 : -EBUSY;
-               } else if (res < 0) {
-                       action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
-                       return -EBUSY;
+       /*
+        * Handling free hugepage.  The possible race with hugepage allocation
+        * or demotion can be prevented by PageHWPoison flag.
+        */
+       if (res == 0) {
+               unlock_page(head);
+               res = MF_FAILED;
+               if (__page_handle_poison(p)) {
+                       page_ref_inc(p);
+                       res = MF_RECOVERED;
                }
+               action_result(pfn, MF_MSG_FREE_HUGE, res);
+               return res == MF_RECOVERED ? 0 : -EBUSY;
        }
 
-       lock_page(head);
-
        /*
         * The page could have changed compound pages due to race window.
         * If this happens just bail out.
@@ -1554,14 +1617,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 
        page_flags = head->flags;
 
-       if (hwpoison_filter(p)) {
-               if (TestClearPageHWPoison(head))
-                       num_poisoned_pages_dec();
-               put_page(p);
-               res = -EOPNOTSUPP;
-               goto out;
-       }
-
        /*
         * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
         * simply disable it. In order to make it work properly, we need
@@ -1588,6 +1643,12 @@ out:
        unlock_page(head);
        return res;
 }
+#else
+static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+{
+       return 0;
+}
+#endif
 
 static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
                struct dev_pagemap *pgmap)
@@ -1712,6 +1773,7 @@ int memory_failure(unsigned long pfn, int flags)
        int res = 0;
        unsigned long page_flags;
        bool retry = true;
+       int hugetlb = 0;
 
        if (!sysctl_memory_failure_recovery)
                panic("Memory failure on page %lx", pfn);
@@ -1739,10 +1801,9 @@ int memory_failure(unsigned long pfn, int flags)
        }
 
 try_again:
-       if (PageHuge(p)) {
-               res = memory_failure_hugetlb(pfn, flags);
+       res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
+       if (hugetlb)
                goto unlock_mutex;
-       }
 
        if (TestSetPageHWPoison(p)) {
                pr_err("Memory failure: %#lx: already hardware poisoned\n",
index a2516d31db6ca8b3646d95344d5fdc534bbdcedc..8c74107a2b15e008a7cfa24e95811552f0dbf37e 100644 (file)
@@ -1191,8 +1191,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
  */
 static struct page *new_page(struct page *page, unsigned long start)
 {
+       struct folio *dst, *src = page_folio(page);
        struct vm_area_struct *vma;
        unsigned long address;
+       gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
 
        vma = find_vma(current->mm, start);
        while (vma) {
@@ -1202,24 +1204,19 @@ static struct page *new_page(struct page *page, unsigned long start)
                vma = vma->vm_next;
        }
 
-       if (PageHuge(page)) {
-               return alloc_huge_page_vma(page_hstate(compound_head(page)),
+       if (folio_test_hugetlb(src))
+               return alloc_huge_page_vma(page_hstate(&src->page),
                                vma, address);
-       } else if (PageTransHuge(page)) {
-               struct page *thp;
 
-               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-                                        HPAGE_PMD_ORDER);
-               if (!thp)
-                       return NULL;
-               prep_transhuge_page(thp);
-               return thp;
-       }
+       if (folio_test_large(src))
+               gfp = GFP_TRANSHUGE;
+
        /*
-        * if !vma, alloc_page_vma() will use task or system default policy
+        * if !vma, vma_alloc_folio() will use task or system default policy
         */
-       return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
-                       vma, address);
+       dst = vma_alloc_folio(gfp, folio_order(src), vma, address,
+                       folio_test_large(src));
+       return &dst->page;
 }
 #else
 
@@ -2227,6 +2224,19 @@ out:
 }
 EXPORT_SYMBOL(alloc_pages_vma);
 
+struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+               unsigned long addr, bool hugepage)
+{
+       struct folio *folio;
+
+       folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr,
+                       hugepage);
+       if (folio && order > 1)
+               prep_transhuge_page(&folio->page);
+
+       return folio;
+}
+
 /**
  * alloc_pages - Allocate pages.
  * @gfp: GFP flags.
@@ -2733,6 +2743,7 @@ alloc_new:
        mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
        if (!mpol_new)
                goto err_out;
+       atomic_set(&mpol_new->refcnt, 1);
        goto restart;
 }
 
index de175e2fdba5d8c4b91ca68460ecb13cf06a2c66..6c31ee1e1c9b061b70bf99a3463fcf9ea07686ed 100644 (file)
@@ -1520,10 +1520,11 @@ out:
 
 struct page *alloc_migration_target(struct page *page, unsigned long private)
 {
+       struct folio *folio = page_folio(page);
        struct migration_target_control *mtc;
        gfp_t gfp_mask;
        unsigned int order = 0;
-       struct page *new_page = NULL;
+       struct folio *new_folio = NULL;
        int nid;
        int zidx;
 
@@ -1531,34 +1532,31 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
        gfp_mask = mtc->gfp_mask;
        nid = mtc->nid;
        if (nid == NUMA_NO_NODE)
-               nid = page_to_nid(page);
+               nid = folio_nid(folio);
 
-       if (PageHuge(page)) {
-               struct hstate *h = page_hstate(compound_head(page));
+       if (folio_test_hugetlb(folio)) {
+               struct hstate *h = page_hstate(&folio->page);
 
                gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
                return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
        }
 
-       if (PageTransHuge(page)) {
+       if (folio_test_large(folio)) {
                /*
                 * clear __GFP_RECLAIM to make the migration callback
                 * consistent with regular THP allocations.
                 */
                gfp_mask &= ~__GFP_RECLAIM;
                gfp_mask |= GFP_TRANSHUGE;
-               order = HPAGE_PMD_ORDER;
+               order = folio_order(folio);
        }
-       zidx = zone_idx(page_zone(page));
+       zidx = zone_idx(folio_zone(folio));
        if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
                gfp_mask |= __GFP_HIGHMEM;
 
-       new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask);
-
-       if (new_page && PageTransHuge(new_page))
-               prep_transhuge_page(new_page);
+       new_folio = __folio_alloc(gfp_mask, order, nid, mtc->nmask);
 
-       return new_page;
+       return &new_folio->page;
 }
 
 #ifdef CONFIG_NUMA
@@ -1999,32 +1997,20 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
                                           unsigned long data)
 {
        int nid = (int) data;
-       struct page *newpage;
-
-       newpage = __alloc_pages_node(nid,
-                                        (GFP_HIGHUSER_MOVABLE |
-                                         __GFP_THISNODE | __GFP_NOMEMALLOC |
-                                         __GFP_NORETRY | __GFP_NOWARN) &
-                                        ~__GFP_RECLAIM, 0);
-
-       return newpage;
-}
-
-static struct page *alloc_misplaced_dst_page_thp(struct page *page,
-                                                unsigned long data)
-{
-       int nid = (int) data;
-       struct page *newpage;
-
-       newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
-                                  HPAGE_PMD_ORDER);
-       if (!newpage)
-               goto out;
-
-       prep_transhuge_page(newpage);
+       int order = compound_order(page);
+       gfp_t gfp = __GFP_THISNODE;
+       struct folio *new;
+
+       if (order > 0)
+               gfp |= GFP_TRANSHUGE_LIGHT;
+       else {
+               gfp |= GFP_HIGHUSER_MOVABLE | __GFP_NOMEMALLOC | __GFP_NORETRY |
+                       __GFP_NOWARN;
+               gfp &= ~__GFP_RECLAIM;
+       }
+       new = __folio_alloc_node(gfp, order, nid);
 
-out:
-       return newpage;
+       return &new->page;
 }
 
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
@@ -2082,22 +2068,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        int nr_remaining;
        unsigned int nr_succeeded;
        LIST_HEAD(migratepages);
-       new_page_t *new;
-       bool compound;
        int nr_pages = thp_nr_pages(page);
 
-       /*
-        * PTE mapped THP or HugeTLB page can't reach here so the page could
-        * be either base page or THP.  And it must be head page if it is
-        * THP.
-        */
-       compound = PageTransHuge(page);
-
-       if (compound)
-               new = alloc_misplaced_dst_page_thp;
-       else
-               new = alloc_misplaced_dst_page;
-
        /*
         * Don't migrate file pages that are mapped in multiple processes
         * with execute permissions as they are probably shared libraries.
@@ -2118,9 +2090,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
                goto out;
 
        list_add(&page->lru, &migratepages);
-       nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
-                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED,
-                                    &nr_succeeded);
+       nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
+                                    NULL, node, MIGRATE_ASYNC,
+                                    MR_NUMA_MISPLACED, &nr_succeeded);
        if (nr_remaining) {
                if (!list_empty(&migratepages)) {
                        list_del(&page->lru);
index 3aa839f81e63dc8645506af8c99c870b33afd6d8..313b57d55a634a3df997a3c60bfb6d8a29df41ed 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2117,14 +2117,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
        return addr;
 }
 
-#ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr)        (TASK_SIZE)
-#endif
-
-#ifndef arch_get_mmap_base
-#define arch_get_mmap_base(addr, base) (base)
-#endif
-
 /* Get an address range which is currently unmapped.
  * For shmat() with addr=0.
  *
index 459d195d2ff64bb71509189277c71e84d9725033..f45ff1b7626a62834073857f9529d5badd51a934 100644 (file)
@@ -1036,6 +1036,18 @@ int mmu_interval_notifier_insert_locked(
 }
 EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked);
 
+static bool
+mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions,
+                         unsigned long seq)
+{
+       bool ret;
+
+       spin_lock(&subscriptions->lock);
+       ret = subscriptions->invalidate_seq != seq;
+       spin_unlock(&subscriptions->lock);
+       return ret;
+}
+
 /**
  * mmu_interval_notifier_remove - Remove a interval notifier
  * @interval_sub: Interval subscription to unregister
@@ -1083,7 +1095,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
        lock_map_release(&__mmu_notifier_invalidate_range_start_map);
        if (seq)
                wait_event(subscriptions->wq,
-                          READ_ONCE(subscriptions->invalidate_seq) != seq);
+                          mmu_interval_seq_released(subscriptions, seq));
 
        /* pairs with mmgrab in mmu_interval_notifier_insert() */
        mmdrop(mm);
index 9d76da79594d9084b5ee3a1153243b0d6561afaa..0b93fac76851102272c697500f6ceca14888753d 100644 (file)
@@ -486,6 +486,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
        pmd_t *old_pmd, *new_pmd;
        pud_t *old_pud, *new_pud;
 
+       if (!len)
+               return 0;
+
        old_end = old_addr + len;
        flush_cache_range(vma, old_addr, old_end);
 
@@ -944,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
                return -EINTR;
        vma = vma_lookup(mm, addr);
        if (!vma) {
-               ret = EFAULT;
+               ret = -EFAULT;
                goto out;
        }
 
index 55a9e48a7a02681667f33e038bb209c13b1ccb3c..9d7afc2d959e4ca99e305c0439991cccdf5ba194 100644 (file)
@@ -226,6 +226,8 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc);
+
 /*
  *     vzalloc - allocate virtually contiguous memory with zero fill
  *
index 7ec38194f8e11c927ab5f9f3b7c89ebab33ef0e7..49d7df39b02d0faa8708e78a5b810f5514c4a7cf 100644 (file)
@@ -632,7 +632,7 @@ done:
         */
        set_bit(MMF_OOM_SKIP, &mm->flags);
 
-       /* Drop a reference taken by wake_oom_reaper */
+       /* Drop a reference taken by queue_oom_reaper */
        put_task_struct(tsk);
 }
 
@@ -644,12 +644,12 @@ static int oom_reaper(void *unused)
                struct task_struct *tsk = NULL;
 
                wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
-               spin_lock(&oom_reaper_lock);
+               spin_lock_irq(&oom_reaper_lock);
                if (oom_reaper_list != NULL) {
                        tsk = oom_reaper_list;
                        oom_reaper_list = tsk->oom_reaper_list;
                }
-               spin_unlock(&oom_reaper_lock);
+               spin_unlock_irq(&oom_reaper_lock);
 
                if (tsk)
                        oom_reap_task(tsk);
@@ -658,22 +658,48 @@ static int oom_reaper(void *unused)
        return 0;
 }
 
-static void wake_oom_reaper(struct task_struct *tsk)
+static void wake_oom_reaper(struct timer_list *timer)
 {
-       /* mm is already queued? */
-       if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
-               return;
+       struct task_struct *tsk = container_of(timer, struct task_struct,
+                       oom_reaper_timer);
+       struct mm_struct *mm = tsk->signal->oom_mm;
+       unsigned long flags;
 
-       get_task_struct(tsk);
+       /* The victim managed to terminate on its own - see exit_mmap */
+       if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
+               put_task_struct(tsk);
+               return;
+       }
 
-       spin_lock(&oom_reaper_lock);
+       spin_lock_irqsave(&oom_reaper_lock, flags);
        tsk->oom_reaper_list = oom_reaper_list;
        oom_reaper_list = tsk;
-       spin_unlock(&oom_reaper_lock);
+       spin_unlock_irqrestore(&oom_reaper_lock, flags);
        trace_wake_reaper(tsk->pid);
        wake_up(&oom_reaper_wait);
 }
 
+/*
+ * Give the OOM victim time to exit naturally before invoking the oom_reaping.
+ * The timers timeout is arbitrary... the longer it is, the longer the worst
+ * case scenario for the OOM can take. If it is too small, the oom_reaper can
+ * get in the way and release resources needed by the process exit path.
+ * e.g. The futex robust list can sit in Anon|Private memory that gets reaped
+ * before the exit path is able to wake the futex waiters.
+ */
+#define OOM_REAPER_DELAY (2*HZ)
+static void queue_oom_reaper(struct task_struct *tsk)
+{
+       /* mm is already queued? */
+       if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
+               return;
+
+       get_task_struct(tsk);
+       timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
+       tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
+       add_timer(&tsk->oom_reaper_timer);
+}
+
 static int __init oom_init(void)
 {
        oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -681,7 +707,7 @@ static int __init oom_init(void)
 }
 subsys_initcall(oom_init)
 #else
-static inline void wake_oom_reaper(struct task_struct *tsk)
+static inline void queue_oom_reaper(struct task_struct *tsk)
 {
 }
 #endif /* CONFIG_MMU */
@@ -932,7 +958,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
        rcu_read_unlock();
 
        if (can_oom_reap)
-               wake_oom_reaper(victim);
+               queue_oom_reaper(victim);
 
        mmdrop(mm);
        put_task_struct(victim);
@@ -968,7 +994,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
        task_lock(victim);
        if (task_will_free_mem(victim)) {
                mark_oom_victim(victim);
-               wake_oom_reaper(victim);
+               queue_oom_reaper(victim);
                task_unlock(victim);
                put_task_struct(victim);
                return;
@@ -1067,7 +1093,7 @@ bool out_of_memory(struct oom_control *oc)
         */
        if (task_will_free_mem(current)) {
                mark_oom_victim(current);
-               wake_oom_reaper(current);
+               queue_oom_reaper(current);
                return true;
        }
 
index 2db95780e003110e3a6ac2009ac2c58e0851cbeb..0e42038382c12503dac6a1fb9f370850bd41f86b 100644 (file)
@@ -128,7 +128,7 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
 struct pagesets {
        local_lock_t lock;
 };
-static DEFINE_PER_CPU(struct pagesets, pagesets) __maybe_unused = {
+static DEFINE_PER_CPU(struct pagesets, pagesets) = {
        .lock = INIT_LOCAL_LOCK(lock),
 };
 
@@ -6131,7 +6131,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
        do {
                zone_type--;
                zone = pgdat->node_zones + zone_type;
-               if (managed_zone(zone)) {
+               if (populated_zone(zone)) {
                        zoneref_set_zone(zone, &zonerefs[nr_zones++]);
                        check_highest_zone(zone_type);
                }
@@ -8919,7 +8919,7 @@ void *__init alloc_large_system_hash(const char *tablename,
                                table = memblock_alloc_raw(size,
                                                           SMP_CACHE_BYTES);
                } else if (get_order(size) >= MAX_ORDER || hashdist) {
-                       table = __vmalloc(size, gfp_flags);
+                       table = vmalloc_huge(size, gfp_flags);
                        virt = true;
                        if (table)
                                huge = is_vm_area_hugepages(table);
index b417f000b49eb1aa3b776ba7a030d10828119493..3fbdab6a940e783020057596e42af8a4f836ba92 100644 (file)
@@ -51,54 +51,6 @@ void end_swap_bio_write(struct bio *bio)
        bio_put(bio);
 }
 
-static void swap_slot_free_notify(struct page *page)
-{
-       struct swap_info_struct *sis;
-       struct gendisk *disk;
-       swp_entry_t entry;
-
-       /*
-        * There is no guarantee that the page is in swap cache - the software
-        * suspend code (at least) uses end_swap_bio_read() against a non-
-        * swapcache page.  So we must check PG_swapcache before proceeding with
-        * this optimization.
-        */
-       if (unlikely(!PageSwapCache(page)))
-               return;
-
-       sis = page_swap_info(page);
-       if (data_race(!(sis->flags & SWP_BLKDEV)))
-               return;
-
-       /*
-        * The swap subsystem performs lazy swap slot freeing,
-        * expecting that the page will be swapped out again.
-        * So we can avoid an unnecessary write if the page
-        * isn't redirtied.
-        * This is good for real swap storage because we can
-        * reduce unnecessary I/O and enhance wear-leveling
-        * if an SSD is used as the as swap device.
-        * But if in-memory swap device (eg zram) is used,
-        * this causes a duplicated copy between uncompressed
-        * data in VM-owned memory and compressed data in
-        * zram-owned memory.  So let's free zram-owned memory
-        * and make the VM-owned decompressed page *dirty*,
-        * so the page should be swapped out somewhere again if
-        * we again wish to reclaim it.
-        */
-       disk = sis->bdev->bd_disk;
-       entry.val = page_private(page);
-       if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
-               unsigned long offset;
-
-               offset = swp_offset(entry);
-
-               SetPageDirty(page);
-               disk->fops->swap_slot_free_notify(sis->bdev,
-                               offset);
-       }
-}
-
 static void end_swap_bio_read(struct bio *bio)
 {
        struct page *page = bio_first_page_all(bio);
@@ -114,7 +66,6 @@ static void end_swap_bio_read(struct bio *bio)
        }
 
        SetPageUptodate(page);
-       swap_slot_free_notify(page);
 out:
        unlock_page(page);
        WRITE_ONCE(bio->bi_private, NULL);
@@ -394,11 +345,6 @@ int swap_readpage(struct page *page, bool synchronous)
        if (sis->flags & SWP_SYNCHRONOUS_IO) {
                ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
                if (!ret) {
-                       if (trylock_page(page)) {
-                               swap_slot_free_notify(page);
-                               unlock_page(page);
-                       }
-
                        count_vm_event(PSWPIN);
                        goto out;
                }
@@ -414,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous)
         * attempt to access it in the page fault retry time check.
         */
        if (synchronous) {
-               bio->bi_opf |= REQ_POLLED;
                get_task_struct(current);
                bio->bi_private = current;
        }
@@ -426,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
                if (!READ_ONCE(bio->bi_private))
                        break;
 
-               if (!bio_poll(bio, NULL, 0))
-                       blk_io_schedule();
+               blk_io_schedule();
        }
        __set_current_state(TASK_RUNNING);
        bio_put(bio);
index 1187f9c1ec5b1048a2c6a8cbd2a0e6341f19d400..14a5cda73dee62759d5c3dc52a26f13f6e624e43 100644 (file)
@@ -163,7 +163,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
                return not_found(pvmw);
 
        if (unlikely(is_vm_hugetlb_page(vma))) {
-               unsigned long size = pvmw->nr_pages * PAGE_SIZE;
+               struct hstate *hstate = hstate_vma(vma);
+               unsigned long size = huge_page_size(hstate);
                /* The only possible mapping was handled on last iteration */
                if (pvmw->pte)
                        return not_found(pvmw);
@@ -173,8 +174,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
                if (!pvmw->pte)
                        return false;
 
-               pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm,
-                                               pvmw->pte);
+               pvmw->ptl = huge_pte_lockptr(hstate, mm, pvmw->pte);
                spin_lock(pvmw->ptl);
                if (!check_pte(pvmw))
                        return not_found(pvmw);
index 8e3775829513ee7caec5b45e0c51df8ea517e5f9..26bf74a6b2fe6ae67b42ce931eb264a0fca648a3 100644 (file)
  * ->readpage() which may be less efficient.
  */
 
+#include <linux/blkdev.h>
 #include <linux/kernel.h>
 #include <linux/dax.h>
 #include <linux/gfp.h>
@@ -474,7 +475,8 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
 
        if (!folio)
                return -ENOMEM;
-       if (mark - index < (1UL << order))
+       mark = round_up(mark, 1UL << order);
+       if (index == mark)
                folio_set_readahead(folio);
        err = filemap_add_folio(ractl->mapping, folio, index, gfp);
        if (err)
@@ -555,8 +557,9 @@ static void ondemand_readahead(struct readahead_control *ractl,
        struct file_ra_state *ra = ractl->ra;
        unsigned long max_pages = ra->ra_pages;
        unsigned long add_pages;
-       unsigned long index = readahead_index(ractl);
-       pgoff_t prev_index;
+       pgoff_t index = readahead_index(ractl);
+       pgoff_t expected, prev_index;
+       unsigned int order = folio ? folio_order(folio) : 0;
 
        /*
         * If the request exceeds the readahead window, allow the read to
@@ -575,8 +578,9 @@ static void ondemand_readahead(struct readahead_control *ractl,
         * It's the expected callback index, assume sequential access.
         * Ramp up sizes, and push forward the readahead window.
         */
-       if ((index == (ra->start + ra->size - ra->async_size) ||
-            index == (ra->start + ra->size))) {
+       expected = round_up(ra->start + ra->size - ra->async_size,
+                       1UL << order);
+       if (index == expected || index == (ra->start + ra->size)) {
                ra->start += ra->size;
                ra->size = get_next_ra_size(ra, max_pages);
                ra->async_size = ra->size;
@@ -662,7 +666,7 @@ readit:
        }
 
        ractl->_index = ra->start;
-       page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
+       page_cache_ra_order(ractl, ra, order);
 }
 
 void page_cache_sync_ra(struct readahead_control *ractl,
index 098638d3b8a41c08a6818e5bac4acccf75d80df3..3b3cf2892b6ae8a7f5da0ce28030ac45e09b51ca 100644 (file)
@@ -158,6 +158,22 @@ const struct address_space_operations secretmem_aops = {
        .isolate_page   = secretmem_isolate_page,
 };
 
+static int secretmem_setattr(struct user_namespace *mnt_userns,
+                            struct dentry *dentry, struct iattr *iattr)
+{
+       struct inode *inode = d_inode(dentry);
+       unsigned int ia_valid = iattr->ia_valid;
+
+       if ((ia_valid & ATTR_SIZE) && inode->i_size)
+               return -EINVAL;
+
+       return simple_setattr(mnt_userns, dentry, iattr);
+}
+
+static const struct inode_operations secretmem_iops = {
+       .setattr = secretmem_setattr,
+};
+
 static struct vfsmount *secretmem_mnt;
 
 static struct file *secretmem_file_create(unsigned long flags)
@@ -177,6 +193,7 @@ static struct file *secretmem_file_create(unsigned long flags)
        mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
        mapping_set_unevictable(inode->i_mapping);
 
+       inode->i_op = &secretmem_iops;
        inode->i_mapping->a_ops = &secretmem_aops;
 
        /* pretend we are a normal file with zero size */
index 529c9ad3e9264340ee94f2792bb24b5c19647aba..4b2fea33158e8a33a87354ccefc574ef74c1e793 100644 (file)
@@ -2513,7 +2513,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                pgoff_t end_index;
                unsigned long nr, ret;
                loff_t i_size = i_size_read(inode);
-               bool got_page;
 
                end_index = i_size >> PAGE_SHIFT;
                if (index > end_index)
@@ -2570,24 +2569,34 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                         */
                        if (!offset)
                                mark_page_accessed(page);
-                       got_page = true;
+                       /*
+                        * Ok, we have the page, and it's up-to-date, so
+                        * now we can copy it to user space...
+                        */
+                       ret = copy_page_to_iter(page, offset, nr, to);
+                       put_page(page);
+
+               } else if (iter_is_iovec(to)) {
+                       /*
+                        * Copy to user tends to be so well optimized, but
+                        * clear_user() not so much, that it is noticeably
+                        * faster to copy the zero page instead of clearing.
+                        */
+                       ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to);
                } else {
-                       page = ZERO_PAGE(0);
-                       got_page = false;
+                       /*
+                        * But submitting the same page twice in a row to
+                        * splice() - or others? - can result in confusion:
+                        * so don't attempt that optimization on pipes etc.
+                        */
+                       ret = iov_iter_zero(nr, to);
                }
 
-               /*
-                * Ok, we have the page, and it's up-to-date, so
-                * now we can copy it to user space...
-                */
-               ret = copy_page_to_iter(page, offset, nr, to);
                retval += ret;
                offset += ret;
                index += offset >> PAGE_SHIFT;
                offset &= ~PAGE_MASK;
 
-               if (got_page)
-                       put_page(page);
                if (!iov_iter_count(to))
                        break;
                if (ret < nr) {
index b04e40078bdf7d63e540fe20da21049b00779d3a..0edb474edef1839662d4e1a4edeacfceea14bb41 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3665,7 +3665,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
 {
        struct kmem_cache *cachep;
        unsigned int objnr;
index fd7ae2024897d5ccb0bdabb5f079544e3b5183f1..95eb34174c1bb538e38a9c2432273f4db2b73bb4 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -868,7 +868,7 @@ struct kmem_obj_info {
        void *kp_stack[KS_ADDRS_COUNT];
        void *kp_free_stack[KS_ADDRS_COUNT];
 };
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
+void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
 #endif
 
 #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
index 6ee64d6208b395980d346d8bf4518bd77eec3f60..2b3206a2c3b51064f8813ab03412152d4f6fc043 100644 (file)
@@ -555,6 +555,13 @@ bool kmem_valid_obj(void *object)
 }
 EXPORT_SYMBOL_GPL(kmem_valid_obj);
 
+static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+{
+       if (__kfence_obj_info(kpp, object, slab))
+               return;
+       __kmem_obj_info(kpp, object, slab);
+}
+
 /**
  * kmem_dump_obj - Print available slab provenance information
  * @object: slab object for which to find provenance information.
@@ -590,6 +597,8 @@ void kmem_dump_obj(void *object)
                pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
        else
                pr_cont(" slab%s", cp);
+       if (is_kfence_address(object))
+               pr_cont(" (kfence)");
        if (kp.kp_objp)
                pr_cont(" start %px", kp.kp_objp);
        if (kp.kp_data_offset)
index dfa6808dff36f531af6140746aec5befcf1d766d..40ea6e2d4ccd3f67c1a558cbdb35475c81bcaaf3 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -463,7 +463,7 @@ out:
 }
 
 #ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
 {
        kpp->kp_ptr = object;
        kpp->kp_slab = slab;
index 74d92aa4a3a28d744d3d5dfd4b30f5d632172207..ed5c2c03a47aaeac43a2c5534f34cdf7da9fdc38 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4312,7 +4312,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 }
 
 #ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
+void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
 {
        void *base;
        int __maybe_unused i;
index 63c61f8b261188c34d26b276e5e81cd1a07eb878..981a6e85c88e773b9b1a11a27b5265b17819928e 100644 (file)
@@ -6,6 +6,7 @@
  *  Swap reorganised 29.12.95, Stephen Tweedie
  */
 
+#include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
@@ -179,7 +180,7 @@ static int discard_swap(struct swap_info_struct *si)
        nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
        if (nr_blocks) {
                err = blkdev_issue_discard(si->bdev, start_block,
-                               nr_blocks, GFP_KERNEL, 0);
+                               nr_blocks, GFP_KERNEL);
                if (err)
                        return err;
                cond_resched();
@@ -190,7 +191,7 @@ static int discard_swap(struct swap_info_struct *si)
                nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
 
                err = blkdev_issue_discard(si->bdev, start_block,
-                               nr_blocks, GFP_KERNEL, 0);
+                               nr_blocks, GFP_KERNEL);
                if (err)
                        break;
 
@@ -254,7 +255,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
                start_block <<= PAGE_SHIFT - 9;
                nr_blocks <<= PAGE_SHIFT - 9;
                if (blkdev_issue_discard(si->bdev, start_block,
-                                       nr_blocks, GFP_NOIO, 0))
+                                       nr_blocks, GFP_NOIO))
                        break;
 
                se = next_se(se);
@@ -2466,7 +2467,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
 
-       if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev)))
+       if (!p->bdev || !bdev_nonrot(p->bdev))
                atomic_dec(&nr_rotate_swap);
 
        mutex_lock(&swapon_mutex);
@@ -2761,7 +2762,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
                 * write only restriction.  Hence zoned block devices are not
                 * suitable for swapping.  Disallow them here.
                 */
-               if (blk_queue_is_zoned(p->bdev->bd_disk->queue))
+               if (bdev_is_zoned(p->bdev))
                        return -EINVAL;
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
@@ -2957,20 +2958,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
        return nr_extents;
 }
 
-/*
- * Helper to sys_swapon determining if a given swap
- * backing device queue supports DISCARD operations.
- */
-static bool swap_discardable(struct swap_info_struct *si)
-{
-       struct request_queue *q = bdev_get_queue(si->bdev);
-
-       if (!blk_queue_discard(q))
-               return false;
-
-       return true;
-}
-
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
        struct swap_info_struct *p;
@@ -3065,13 +3052,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                goto bad_swap_unlock_inode;
        }
 
-       if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue))
+       if (p->bdev && bdev_stable_writes(p->bdev))
                p->flags |= SWP_STABLE_WRITES;
 
        if (p->bdev && p->bdev->bd_disk->fops->rw_page)
                p->flags |= SWP_SYNCHRONOUS_IO;
 
-       if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+       if (p->bdev && bdev_nonrot(p->bdev)) {
                int cpu;
                unsigned long ci, nr_cluster;
 
@@ -3132,7 +3119,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                                         sizeof(long),
                                         GFP_KERNEL);
 
-       if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
+       if ((swap_flags & SWAP_FLAG_DISCARD) &&
+           p->bdev && bdev_max_discard_sectors(p->bdev)) {
                /*
                 * When discard is enabled for swap with no particular
                 * policy flagged, we set all swap discard flags here in
index 0cb8e5ef17136743f710f0b54c4199075bb7404c..e9bb6db002aa0bdbebc981b4470ee1194d3a5f16 100644 (file)
@@ -72,12 +72,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
        _dst_pte = pte_mkdirty(_dst_pte);
        if (page_in_cache && !vm_shared)
                writable = false;
-       if (writable) {
-               if (wp_copy)
-                       _dst_pte = pte_mkuffd_wp(_dst_pte);
-               else
-                       _dst_pte = pte_mkwrite(_dst_pte);
-       }
+
+       /*
+        * Always mark a PTE as write-protected when needed, regardless of
+        * VM_WRITE, which the user might change.
+        */
+       if (wp_copy)
+               _dst_pte = pte_mkuffd_wp(_dst_pte);
+       else if (writable)
+               _dst_pte = pte_mkwrite(_dst_pte);
 
        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 
index 54e5e761a9a90ed383cc18a5ee5f85414ef2a923..3492a9e81aa3a28d73c6008ac601a32c1011aa40 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -592,8 +592,15 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
                return NULL;
        }
 
-       return __vmalloc_node(size, 1, flags, node,
-                       __builtin_return_address(0));
+       /*
+        * kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
+        * since the callers already cannot assume anything
+        * about the resulting pointer, and cannot play
+        * protection games.
+        */
+       return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+                       flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+                       node, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(kvmalloc_node);
 
index e163372d396798fdedb45e87964531f1599bb183..cadfbb5155ea574ed3c139d4c9f8c37049da04b1 100644 (file)
@@ -1671,17 +1671,6 @@ static DEFINE_MUTEX(vmap_purge_lock);
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
 
-#ifdef CONFIG_X86_64
-/*
- * called before a call to iounmap() if the caller wants vm_area_struct's
- * immediately freed.
- */
-void set_iounmap_nonlazy(void)
-{
-       atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
-}
-#endif /* CONFIG_X86_64 */
-
 /*
  * Purges all lazily-freed vmap areas.
  */
@@ -2664,15 +2653,18 @@ static void __vunmap(const void *addr, int deallocate_pages)
        vm_remove_mappings(area, deallocate_pages);
 
        if (deallocate_pages) {
-               unsigned int page_order = vm_area_page_order(area);
-               int i, step = 1U << page_order;
+               int i;
 
-               for (i = 0; i < area->nr_pages; i += step) {
+               for (i = 0; i < area->nr_pages; i++) {
                        struct page *page = area->pages[i];
 
                        BUG_ON(!page);
-                       mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
-                       __free_pages(page, page_order);
+                       mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+                       /*
+                        * High-order allocs for huge vmallocs are split, so
+                        * can be freed as an array of order-0 allocations
+                        */
+                       __free_pages(page, 0);
                        cond_resched();
                }
                atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2925,12 +2917,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
                        if (nr != nr_pages_request)
                                break;
                }
-       } else
-               /*
-                * Compound pages required for remap_vmalloc_page if
-                * high-order pages.
-                */
-               gfp |= __GFP_COMP;
+       }
 
        /* High-order pages or fallback path if "bulk" fails. */
 
@@ -2944,6 +2931,15 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
                        page = alloc_pages_node(nid, gfp, order);
                if (unlikely(!page))
                        break;
+               /*
+                * Higher order allocations must be able to be treated as
+                * indepdenent small pages by callers (as they can with
+                * small-page vmallocs). Some drivers do their own refcounting
+                * on vmalloc_to_page() pages, some use page->mapping,
+                * page->lru, etc.
+                */
+               if (order)
+                       split_page(page, order);
 
                /*
                 * Careful, we allocate and map page-order pages, but
@@ -3003,11 +2999,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
        atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
        if (gfp_mask & __GFP_ACCOUNT) {
-               int i, step = 1U << page_order;
+               int i;
 
-               for (i = 0; i < area->nr_pages; i += step)
-                       mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
-                                            step);
+               for (i = 0; i < area->nr_pages; i++)
+                       mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
        }
 
        /*
@@ -3106,7 +3101,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
                return NULL;
        }
 
-       if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
+       if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) {
                unsigned long size_per_node;
 
                /*
@@ -3273,21 +3268,24 @@ void *vmalloc(unsigned long size)
 EXPORT_SYMBOL(vmalloc);
 
 /**
- * vmalloc_no_huge - allocate virtually contiguous memory using small pages
- * @size:    allocation size
+ * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
+ * @size:      allocation size
+ * @gfp_mask:  flags for the page level allocator
  *
- * Allocate enough non-huge pages to cover @size from the page level
+ * Allocate enough pages to cover @size from the page level
  * allocator and map them into contiguous kernel virtual space.
+ * If @size is greater than or equal to PMD_SIZE, allow using
+ * huge pages for the memory
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_no_huge(unsigned long size)
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
 {
        return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-                                   GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
+                                   gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
                                    NUMA_NO_NODE, __builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc_no_huge);
+EXPORT_SYMBOL_GPL(vmalloc_huge);
 
 /**
  * vzalloc - allocate virtually contiguous memory with zero fill
index 8a3828acc0bfd9256bc10d3c8b5b939786fdac0b..592569a8974c4d41bda81095b1e51c32f6bc7444 100644 (file)
@@ -355,7 +355,7 @@ void workingset_refault(struct folio *folio, void *shadow)
 
        mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
 
-       mem_cgroup_flush_stats();
+       mem_cgroup_flush_stats_delayed();
        /*
         * Compare the distance to the existing workingset size. We
         * don't activate pages that couldn't stay resident even if
index 0899a729a23f474c313e2a9e6f98f1ba875a6ada..c120c7c6d25fc13fe8e2a3724ffda4539b72d5c3 100644 (file)
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                goto free_skb;
        }
 
+       /* GRO might have added fragments to the fragment list instead of
+        * frags[]. But this is not handled by skb_split and must be
+        * linearized to avoid incorrect length information after all
+        * batman-adv fragments were created and submitted to the
+        * hard-interface
+        */
+       if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+               ret = -ENOMEM;
+               goto free_skb;
+       }
+
        /* Create one header to be copied to all fragments */
        frag_header.packet_type = BATADV_UNICAST_FRAG;
        frag_header.version = BATADV_COMPAT_VERSION;
index 84312c83654937989c5cc1dbc8cd100ed86b52ca..fe803bee419a9d5814d7f013aafbb3b41c492303 100644 (file)
@@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work)
                /* Disable LE Advertising */
                le_disable_advertising(hdev);
                hci_dev_lock(hdev);
-               hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+               hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
                hci_dev_unlock(hdev);
                return;
        }
@@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
 EXPORT_SYMBOL(hci_get_route);
 
 /* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 {
        struct hci_dev *hdev = conn->hdev;
        struct hci_conn_params *params;
@@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
                params->conn = NULL;
        }
 
-       conn->state = BT_CLOSED;
-
        /* If the status indicates successful cancellation of
         * the attempt (i.e. Unknown Connection Id) there's no point of
         * notifying failure since we'll go back to keep trying to
@@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
                mgmt_connect_failed(hdev, &conn->dst, conn->type,
                                    conn->dst_type, status);
 
-       hci_connect_cfm(conn, status);
-
-       hci_conn_del(conn);
-
        /* Since we may have temporarily stopped the background scanning in
         * favor of connection establishment, we should restart it.
         */
@@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
        hci_enable_advertising(hdev);
 }
 
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+       struct hci_dev *hdev = conn->hdev;
+
+       bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+       switch (conn->type) {
+       case LE_LINK:
+               hci_le_conn_failed(conn, status);
+               break;
+       case ACL_LINK:
+               mgmt_connect_failed(hdev, &conn->dst, conn->type,
+                                   conn->dst_type, status);
+               break;
+       }
+
+       conn->state = BT_CLOSED;
+       hci_connect_cfm(conn, status);
+       hci_conn_del(conn);
+}
+
 static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
 {
        struct hci_conn *conn = data;
index b4782a6c1025d6cf907d6290ecd8ee584e454926..45c2dd2e15905fef2695391ec80414c1be5bbf90 100644 (file)
@@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev)
         */
        switch (hdev->dev_type) {
        case HCI_PRIMARY:
-               id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+               id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
                break;
        case HCI_AMP:
-               id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+               id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
                break;
        default:
                return -EINVAL;
@@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev)
        if (id < 0)
                return id;
 
-       sprintf(hdev->name, "hci%d", id);
+       snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
        hdev->id = id;
 
        BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
index abaabfae19cc96274e466e74dabee9c1f8cdbc67..66451661283c2b2f04dacab4ea4612f9fa0787ea 100644 (file)
@@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
        bt_dev_dbg(hdev, "status 0x%2.2x", status);
 
        /* All connection failure handling is taken care of by the
-        * hci_le_conn_failed function which is triggered by the HCI
+        * hci_conn_failed function which is triggered by the HCI
         * request completion callbacks used for connecting.
         */
        if (status)
@@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
        bt_dev_dbg(hdev, "status 0x%2.2x", status);
 
        /* All connection failure handling is taken care of by the
-        * hci_le_conn_failed function which is triggered by the HCI
+        * hci_conn_failed function which is triggered by the HCI
         * request completion callbacks used for connecting.
         */
        if (status)
@@ -3067,18 +3067,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
 {
        struct hci_ev_conn_complete *ev = data;
        struct hci_conn *conn;
+       u8 status = ev->status;
 
-       if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
-               bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
-               return;
-       }
-
-       bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+       bt_dev_dbg(hdev, "status 0x%2.2x", status);
 
        hci_dev_lock(hdev);
 
        conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
        if (!conn) {
+               /* In case of error status and there is no connection pending
+                * just unlock as there is nothing to cleanup.
+                */
+               if (ev->status)
+                       goto unlock;
+
                /* Connection may not exist if auto-connected. Check the bredr
                 * allowlist to see if this device is allowed to auto connect.
                 * If link is an ACL type, create a connection class
@@ -3122,8 +3124,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
                goto unlock;
        }
 
-       if (!ev->status) {
+       if (!status) {
                conn->handle = __le16_to_cpu(ev->handle);
+               if (conn->handle > HCI_CONN_HANDLE_MAX) {
+                       bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+                                  conn->handle, HCI_CONN_HANDLE_MAX);
+                       status = HCI_ERROR_INVALID_PARAMETERS;
+                       goto done;
+               }
 
                if (conn->type == ACL_LINK) {
                        conn->state = BT_CONFIG;
@@ -3164,19 +3172,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
                        hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
                                     &cp);
                }
-       } else {
-               conn->state = BT_CLOSED;
-               if (conn->type == ACL_LINK)
-                       mgmt_connect_failed(hdev, &conn->dst, conn->type,
-                                           conn->dst_type, ev->status);
        }
 
        if (conn->type == ACL_LINK)
                hci_sco_setup(conn, ev->status);
 
-       if (ev->status) {
-               hci_connect_cfm(conn, ev->status);
-               hci_conn_del(conn);
+done:
+       if (status) {
+               hci_conn_failed(conn, status);
        } else if (ev->link_type == SCO_LINK) {
                switch (conn->setting & SCO_AIRMODE_MASK) {
                case SCO_AIRMODE_CVSD:
@@ -3185,7 +3188,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
                        break;
                }
 
-               hci_connect_cfm(conn, ev->status);
+               hci_connect_cfm(conn, status);
        }
 
 unlock:
@@ -4676,6 +4679,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
 {
        struct hci_ev_sync_conn_complete *ev = data;
        struct hci_conn *conn;
+       u8 status = ev->status;
 
        switch (ev->link_type) {
        case SCO_LINK:
@@ -4690,12 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
                return;
        }
 
-       if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
-               bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
-               return;
-       }
-
-       bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+       bt_dev_dbg(hdev, "status 0x%2.2x", status);
 
        hci_dev_lock(hdev);
 
@@ -4729,9 +4728,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
                goto unlock;
        }
 
-       switch (ev->status) {
+       switch (status) {
        case 0x00:
                conn->handle = __le16_to_cpu(ev->handle);
+               if (conn->handle > HCI_CONN_HANDLE_MAX) {
+                       bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+                                  conn->handle, HCI_CONN_HANDLE_MAX);
+                       status = HCI_ERROR_INVALID_PARAMETERS;
+                       conn->state = BT_CLOSED;
+                       break;
+               }
+
                conn->state  = BT_CONNECTED;
                conn->type   = ev->link_type;
 
@@ -4775,8 +4782,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
                }
        }
 
-       hci_connect_cfm(conn, ev->status);
-       if (ev->status)
+       hci_connect_cfm(conn, status);
+       if (status)
                hci_conn_del(conn);
 
 unlock:
@@ -5527,11 +5534,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
        struct smp_irk *irk;
        u8 addr_type;
 
-       if (handle > HCI_CONN_HANDLE_MAX) {
-               bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
-               return;
-       }
-
        hci_dev_lock(hdev);
 
        /* All controllers implicitly stop advertising in the event of a
@@ -5541,6 +5543,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 
        conn = hci_lookup_le_connect(hdev);
        if (!conn) {
+               /* In case of error status and there is no connection pending
+                * just unlock as there is nothing to cleanup.
+                */
+               if (status)
+                       goto unlock;
+
                conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
                if (!conn) {
                        bt_dev_err(hdev, "no memory for new connection");
@@ -5603,8 +5611,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 
        conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
 
+       if (handle > HCI_CONN_HANDLE_MAX) {
+               bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+                          HCI_CONN_HANDLE_MAX);
+               status = HCI_ERROR_INVALID_PARAMETERS;
+       }
+
        if (status) {
-               hci_le_conn_failed(conn, status);
+               hci_conn_failed(conn, status);
                goto unlock;
        }
 
index 8f4c5698913d7fb19e45d2cfd9a5641fd447ccf0..13600bf120b0268bc0cef30412394029fde9a93f 100644 (file)
@@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
 static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
                               u8 reason)
 {
+       int err;
+
        switch (conn->state) {
        case BT_CONNECTED:
        case BT_CONFIG:
                return hci_disconnect_sync(hdev, conn, reason);
        case BT_CONNECT:
-               return hci_connect_cancel_sync(hdev, conn);
+               err = hci_connect_cancel_sync(hdev, conn);
+               /* Cleanup hci_conn object if it cannot be cancelled as it
+                * likelly means the controller and host stack are out of sync.
+                */
+               if (err)
+                       hci_conn_failed(conn, err);
+
+               return err;
        case BT_CONNECT2:
                return hci_reject_conn_sync(hdev, conn, reason);
        default:
index e7b9c2636d1095533aac4256bfab349fa3bb8df0..af709c182674ed2c8bb40788047e81f5377ddd53 100644 (file)
@@ -108,6 +108,7 @@ struct xdp_test_data {
        struct page_pool *pp;
        struct xdp_frame **frames;
        struct sk_buff **skbs;
+       struct xdp_mem_info mem;
        u32 batch_size;
        u32 frame_cnt;
 };
@@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
 
 static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
 {
-       struct xdp_mem_info mem = {};
        struct page_pool *pp;
        int err = -ENOMEM;
        struct page_pool_params pp_params = {
@@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
        }
 
        /* will copy 'mem.id' into pp->xdp_mem_id */
-       err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp);
+       err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
        if (err)
                goto err_mmodel;
 
@@ -202,6 +202,7 @@ err_skbs:
 
 static void xdp_test_run_teardown(struct xdp_test_data *xdp)
 {
+       xdp_unreg_mem_model(&xdp->mem);
        page_pool_destroy(xdp->pp);
        kfree(xdp->frames);
        kfree(xdp->skbs);
index 196417859c4a97168c2f87b8af476b0794c3b2b4..68b3e850bcb9dba2121f22f17e810ed19c28482f 100644 (file)
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
        dev_sw_netstats_rx_add(brdev, skb->len);
 
        vg = br_vlan_group_rcu(br);
+
+       /* Reset the offload_fwd_mark because there could be a stacked
+        * bridge above, and it should not think this bridge it doing
+        * that bridge's work forwarding out its ports.
+        */
+       br_switchdev_frame_unmark(skb);
+
        /* Bridge is just like any other port.  Make sure the
         * packet is allowed except in promisc mode when someone
         * may be running packet capture.
index 8cc44c36723171e77f2695c22d9bfd70bcdfb6cc..18affda2b522ac952effa064ac87adf1dee6c3b2 100644 (file)
@@ -353,6 +353,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
        attr.orig_dev = br_dev;
 
        vg = br_vlan_group(br);
+       if (!vg)
+               return 0;
 
        list_for_each_entry(v, &vg->vlan_list, vlist) {
                if (v->msti) {
index bafb0fb5f0e0ece5f8130676096381fdafc76792..1e7c6a460ef9a39d2c313c8cbb964b73585f2fbf 100644 (file)
@@ -906,6 +906,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        struct canfd_frame *cf;
        int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
        int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+       s64 hrtimer_sec = 0;
        int off;
        int err;
 
@@ -1004,7 +1005,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
                isotp_create_fframe(cf, so, ae);
 
                /* start timeout for FC */
-               hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+               hrtimer_sec = 1;
+               hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+                             HRTIMER_MODE_REL_SOFT);
        }
 
        /* send the first or only CAN frame */
@@ -1017,6 +1020,11 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        if (err) {
                pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
                               __func__, ERR_PTR(err));
+
+               /* no transmission -> no timeout monitoring */
+               if (hrtimer_sec)
+                       hrtimer_cancel(&so->txtimer);
+
                goto err_out_drop;
        }
 
@@ -1181,6 +1189,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
        lock_sock(sk);
 
+       if (so->bound) {
+               err = -EINVAL;
+               goto out;
+       }
+
        /* do not register frame reception for functional addressing */
        if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
                do_rx_reg = 0;
@@ -1191,10 +1204,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                goto out;
        }
 
-       if (so->bound && addr->can_ifindex == so->ifindex &&
-           rx_id == so->rxid && tx_id == so->txid)
-               goto out;
-
        dev = dev_get_by_index(net, addr->can_ifindex);
        if (!dev) {
                err = -ENODEV;
@@ -1229,22 +1238,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
        dev_put(dev);
 
-       if (so->bound && do_rx_reg) {
-               /* unregister old filter */
-               if (so->ifindex) {
-                       dev = dev_get_by_index(net, so->ifindex);
-                       if (dev) {
-                               can_rx_unregister(net, dev, so->rxid,
-                                                 SINGLE_MASK(so->rxid),
-                                                 isotp_rcv, sk);
-                               can_rx_unregister(net, dev, so->txid,
-                                                 SINGLE_MASK(so->txid),
-                                                 isotp_rcv_echo, sk);
-                               dev_put(dev);
-                       }
-               }
-       }
-
        /* switch to new settings */
        so->ifindex = ifindex;
        so->rxid = rx_id;
index 1c5815530e0dd08695b672be71fb04b69501d1ee..9d82bb42e958f4709b1e734377984a940d8016bb 100644 (file)
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
        target_init(&req->r_t);
 }
 
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable.  Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
-       struct ceph_osd_client *osdc = req->r_osdc;
-       bool mempool = req->r_mempool;
-       unsigned int num_ops = req->r_num_ops;
-       u64 snapid = req->r_snapid;
-       struct ceph_snap_context *snapc = req->r_snapc;
-       bool linger = req->r_linger;
-       struct ceph_msg *request_msg = req->r_request;
-       struct ceph_msg *reply_msg = req->r_reply;
-
-       dout("%s req %p\n", __func__, req);
-       WARN_ON(kref_read(&req->r_kref) != 1);
-       request_release_checks(req);
-
-       WARN_ON(kref_read(&request_msg->kref) != 1);
-       WARN_ON(kref_read(&reply_msg->kref) != 1);
-       target_destroy(&req->r_t);
-
-       request_init(req);
-       req->r_osdc = osdc;
-       req->r_mempool = mempool;
-       req->r_num_ops = num_ops;
-       req->r_snapid = snapid;
-       req->r_snapc = snapc;
-       req->r_linger = linger;
-       req->r_request = request_msg;
-       req->r_reply = reply_msg;
-}
-
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
                                               struct ceph_snap_context *snapc,
                                               unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
  * @watch_opcode: CEPH_OSD_WATCH_OP_*
  */
 static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
-                                 u64 cookie, u8 watch_opcode)
+                                 u8 watch_opcode, u64 cookie, u32 gen)
 {
        struct ceph_osd_req_op *op;
 
        op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
        op->watch.cookie = cookie;
        op->watch.op = watch_opcode;
-       op->watch.gen = 0;
+       op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+                                  u64 cookie, struct ceph_pagelist *request_pl)
+{
+       struct ceph_osd_req_op *op;
+
+       op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+       op->notify.cookie = cookie;
+
+       ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+       op->indata_len = request_pl->length;
 }
 
 /*
@@ -2385,7 +2364,11 @@ again:
                if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
                        err = -ENOSPC;
                } else {
-                       pr_warn_ratelimited("FULL or reached pool quota\n");
+                       if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+                               pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+                       else
+                               pr_warn_ratelimited("pool %lld is full or reached quota\n",
+                                                   req->r_t.base_oloc.pool);
                        req->r_t.paused = true;
                        maybe_request_map(osdc);
                }
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
        WARN_ON(!list_empty(&lreq->pending_lworks));
        WARN_ON(lreq->osd);
 
-       if (lreq->reg_req)
-               ceph_osdc_put_request(lreq->reg_req);
-       if (lreq->ping_req)
-               ceph_osdc_put_request(lreq->ping_req);
+       if (lreq->request_pl)
+               ceph_pagelist_release(lreq->request_pl);
+       if (lreq->notify_id_pages)
+               ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+       ceph_osdc_put_request(lreq->reg_req);
+       ceph_osdc_put_request(lreq->ping_req);
        target_destroy(&lreq->t);
        kfree(lreq);
 }
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->reg_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
             lreq->linger_id, req->r_result);
        linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
                }
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->reg_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
             lreq, lreq->linger_id, req->r_result, lreq->last_error);
        if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
                }
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
 
 static void send_linger(struct ceph_osd_linger_request *lreq)
 {
-       struct ceph_osd_request *req = lreq->reg_req;
-       struct ceph_osd_req_op *op = &req->r_ops[0];
+       struct ceph_osd_client *osdc = lreq->osdc;
+       struct ceph_osd_request *req;
+       int ret;
 
-       verify_osdc_wrlocked(req->r_osdc);
+       verify_osdc_wrlocked(osdc);
+       mutex_lock(&lreq->lock);
        dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
 
-       if (req->r_osd)
-               cancel_linger_request(req);
+       if (lreq->reg_req) {
+               if (lreq->reg_req->r_osd)
+                       cancel_linger_request(lreq->reg_req);
+               ceph_osdc_put_request(lreq->reg_req);
+       }
+
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+       BUG_ON(!req);
 
-       request_reinit(req);
        target_copy(&req->r_t, &lreq->t);
        req->r_mtime = lreq->mtime;
 
-       mutex_lock(&lreq->lock);
        if (lreq->is_watch && lreq->committed) {
-               WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
-                       op->watch.cookie != lreq->linger_id);
-               op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
-               op->watch.gen = ++lreq->register_gen;
+               osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+                                     lreq->linger_id, ++lreq->register_gen);
                dout("lreq %p reconnect register_gen %u\n", lreq,
-                    op->watch.gen);
+                    req->r_ops[0].watch.gen);
                req->r_callback = linger_reconnect_cb;
        } else {
-               if (!lreq->is_watch)
+               if (lreq->is_watch) {
+                       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+                                             lreq->linger_id, 0);
+               } else {
                        lreq->notify_id = 0;
-               else
-                       WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+                       refcount_inc(&lreq->request_pl->refcnt);
+                       osd_req_op_notify_init(req, 0, lreq->linger_id,
+                                              lreq->request_pl);
+                       ceph_osd_data_pages_init(
+                           osd_req_op_data(req, 0, notify, response_data),
+                           lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+               }
                dout("lreq %p register\n", lreq);
                req->r_callback = linger_commit_cb;
        }
-       mutex_unlock(&lreq->lock);
+
+       ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+       BUG_ON(ret);
 
        req->r_priv = linger_get(lreq);
        req->r_linger = true;
+       lreq->reg_req = req;
+       mutex_unlock(&lreq->lock);
 
        submit_request(req, true);
 }
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
        struct ceph_osd_linger_request *lreq = req->r_priv;
 
        mutex_lock(&lreq->lock);
+       if (req != lreq->ping_req) {
+               dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+                    __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+               goto out;
+       }
+
        dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
             __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
             lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
                     lreq->register_gen, req->r_ops[0].watch.gen);
        }
 
+out:
        mutex_unlock(&lreq->lock);
        linger_put(lreq);
 }
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
 static void send_linger_ping(struct ceph_osd_linger_request *lreq)
 {
        struct ceph_osd_client *osdc = lreq->osdc;
-       struct ceph_osd_request *req = lreq->ping_req;
-       struct ceph_osd_req_op *op = &req->r_ops[0];
+       struct ceph_osd_request *req;
+       int ret;
 
        if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
                dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
             __func__, lreq, lreq->linger_id, lreq->ping_sent,
             lreq->register_gen);
 
-       if (req->r_osd)
-               cancel_linger_request(req);
+       if (lreq->ping_req) {
+               if (lreq->ping_req->r_osd)
+                       cancel_linger_request(lreq->ping_req);
+               ceph_osdc_put_request(lreq->ping_req);
+       }
 
-       request_reinit(req);
-       target_copy(&req->r_t, &lreq->t);
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+       BUG_ON(!req);
 
-       WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
-               op->watch.cookie != lreq->linger_id ||
-               op->watch.op != CEPH_OSD_WATCH_OP_PING);
-       op->watch.gen = lreq->register_gen;
+       target_copy(&req->r_t, &lreq->t);
+       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+                             lreq->register_gen);
        req->r_callback = linger_ping_cb;
+
+       ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+       BUG_ON(ret);
+
        req->r_priv = linger_get(lreq);
        req->r_linger = true;
+       lreq->ping_req = req;
 
        ceph_osdc_get_request(req);
        account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 
        down_write(&osdc->lock);
        linger_register(lreq);
-       if (lreq->is_watch) {
-               lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
-               lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
-       } else {
-               lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
-       }
 
        calc_target(osdc, &lreq->t, false);
        osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
  */
 static void __linger_cancel(struct ceph_osd_linger_request *lreq)
 {
-       if (lreq->is_watch && lreq->ping_req->r_osd)
+       if (lreq->ping_req && lreq->ping_req->r_osd)
                cancel_linger_request(lreq->ping_req);
-       if (lreq->reg_req->r_osd)
+       if (lreq->reg_req && lreq->reg_req->r_osd)
                cancel_linger_request(lreq->reg_req);
        cancel_linger_map_check(lreq);
        unlink_linger(lreq->osd, lreq);
@@ -4566,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
 /*
- * Unregister a registered request.  The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request.  If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
  */
 void ceph_osdc_cancel_request(struct ceph_osd_request *req)
 {
@@ -4653,43 +4683,6 @@ again:
 }
 EXPORT_SYMBOL(ceph_osdc_sync);
 
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
-       struct ceph_osd_request *req;
-
-       req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
-       if (!req)
-               return NULL;
-
-       ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
-       ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
-       return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
-       struct ceph_osd_request *req;
-
-       req = alloc_linger_request(lreq);
-       if (!req)
-               return NULL;
-
-       /*
-        * Pass 0 for cookie because we don't know it yet, it will be
-        * filled in by linger_submit().
-        */
-       osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
-       if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
-               ceph_osdc_put_request(req);
-               return NULL;
-       }
-
-       return req;
-}
-
 /*
  * Returns a handle, caller owns a ref.
  */
@@ -4719,18 +4712,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
        lreq->t.flags = CEPH_OSD_FLAG_WRITE;
        ktime_get_real_ts64(&lreq->mtime);
 
-       lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
-       if (!lreq->reg_req) {
-               ret = -ENOMEM;
-               goto err_put_lreq;
-       }
-
-       lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
-       if (!lreq->ping_req) {
-               ret = -ENOMEM;
-               goto err_put_lreq;
-       }
-
        linger_submit(lreq);
        ret = linger_reg_commit_wait(lreq);
        if (ret) {
@@ -4768,8 +4749,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
        ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
        req->r_flags = CEPH_OSD_FLAG_WRITE;
        ktime_get_real_ts64(&req->r_mtime);
-       osd_req_op_watch_init(req, 0, lreq->linger_id,
-                             CEPH_OSD_WATCH_OP_UNWATCH);
+       osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+                             lreq->linger_id, 0);
 
        ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
        if (ret)
@@ -4855,35 +4836,6 @@ out_put_req:
 }
 EXPORT_SYMBOL(ceph_osdc_notify_ack);
 
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
-                                 u64 cookie, u32 prot_ver, u32 timeout,
-                                 void *payload, u32 payload_len)
-{
-       struct ceph_osd_req_op *op;
-       struct ceph_pagelist *pl;
-       int ret;
-
-       op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
-       op->notify.cookie = cookie;
-
-       pl = ceph_pagelist_alloc(GFP_NOIO);
-       if (!pl)
-               return -ENOMEM;
-
-       ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
-       ret |= ceph_pagelist_encode_32(pl, timeout);
-       ret |= ceph_pagelist_encode_32(pl, payload_len);
-       ret |= ceph_pagelist_append(pl, payload, payload_len);
-       if (ret) {
-               ceph_pagelist_release(pl);
-               return -ENOMEM;
-       }
-
-       ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
-       op->indata_len = pl->length;
-       return 0;
-}
-
 /*
  * @timeout: in seconds
  *
@@ -4902,7 +4854,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
                     size_t *preply_len)
 {
        struct ceph_osd_linger_request *lreq;
-       struct page **pages;
        int ret;
 
        WARN_ON(!timeout);
@@ -4915,41 +4866,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
        if (!lreq)
                return -ENOMEM;
 
-       lreq->preply_pages = preply_pages;
-       lreq->preply_len = preply_len;
-
-       ceph_oid_copy(&lreq->t.base_oid, oid);
-       ceph_oloc_copy(&lreq->t.base_oloc, oloc);
-       lreq->t.flags = CEPH_OSD_FLAG_READ;
-
-       lreq->reg_req = alloc_linger_request(lreq);
-       if (!lreq->reg_req) {
+       lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+       if (!lreq->request_pl) {
                ret = -ENOMEM;
                goto out_put_lreq;
        }
 
-       /*
-        * Pass 0 for cookie because we don't know it yet, it will be
-        * filled in by linger_submit().
-        */
-       ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
-                                    payload, payload_len);
-       if (ret)
+       ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+       ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+       ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+       ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+       if (ret) {
+               ret = -ENOMEM;
                goto out_put_lreq;
+       }
 
        /* for notify_id */
-       pages = ceph_alloc_page_vector(1, GFP_NOIO);
-       if (IS_ERR(pages)) {
-               ret = PTR_ERR(pages);
+       lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+       if (IS_ERR(lreq->notify_id_pages)) {
+               ret = PTR_ERR(lreq->notify_id_pages);
+               lreq->notify_id_pages = NULL;
                goto out_put_lreq;
        }
-       ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
-                                                response_data),
-                                pages, PAGE_SIZE, 0, false, true);
 
-       ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
-       if (ret)
-               goto out_put_lreq;
+       lreq->preply_pages = preply_pages;
+       lreq->preply_len = preply_len;
+
+       ceph_oid_copy(&lreq->t.base_oid, oid);
+       ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+       lreq->t.flags = CEPH_OSD_FLAG_READ;
 
        linger_submit(lreq);
        ret = linger_reg_commit_wait(lreq);
index 8c6c08446556a23baae724d263bb9bd63eb39c7e..2771fd22dc6aedd0cd82bad91983f6d2381b451d 100644 (file)
@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
        const struct net_device *last_dev;
        struct net_device_path_ctx ctx = {
                .dev    = dev,
-               .daddr  = daddr,
        };
        struct net_device_path *path;
        int ret = 0;
 
+       memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
        stack->num_paths = 0;
        while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
                last_dev = ctx.dev;
@@ -10304,7 +10304,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
 {
        struct net_device_core_stats __percpu *p;
 
@@ -10315,11 +10315,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
                free_percpu(p);
 
        /* This READ_ONCE() pairs with the cmpxchg() above */
-       p = READ_ONCE(dev->core_stats);
-       if (!p)
-               return NULL;
-
-       return this_cpu_ptr(p);
+       return READ_ONCE(dev->core_stats);
 }
 EXPORT_SYMBOL(netdev_core_stats_alloc);
 
@@ -10356,9 +10352,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 
                for_each_possible_cpu(i) {
                        core_stats = per_cpu_ptr(p, i);
-                       storage->rx_dropped += local_read(&core_stats->rx_dropped);
-                       storage->tx_dropped += local_read(&core_stats->tx_dropped);
-                       storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+                       storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+                       storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+                       storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
                }
        }
        return storage;
index a7044e98765ec5e5b55724527aa61068ccaec20a..64470a727ef77d25fe25fd38c861cac65fd3e95f 100644 (file)
@@ -7016,24 +7016,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
        if (!th->ack || th->rst || th->syn)
                return -ENOENT;
 
+       if (unlikely(iph_len < sizeof(struct iphdr)))
+               return -EINVAL;
+
        if (tcp_synq_no_recent_overflow(sk))
                return -ENOENT;
 
        cookie = ntohl(th->ack_seq) - 1;
 
-       switch (sk->sk_family) {
-       case AF_INET:
-               if (unlikely(iph_len < sizeof(struct iphdr)))
+       /* Both struct iphdr and struct ipv6hdr have the version field at the
+        * same offset so we can cast to the shorter header (struct iphdr).
+        */
+       switch (((struct iphdr *)iph)->version) {
+       case 4:
+               if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
                        return -EINVAL;
 
                ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
                break;
 
 #if IS_BUILTIN(CONFIG_IPV6)
-       case AF_INET6:
+       case 6:
                if (unlikely(iph_len < sizeof(struct ipv6hdr)))
                        return -EINVAL;
 
+               if (sk->sk_family != AF_INET6)
+                       return -EINVAL;
+
                ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
                break;
 #endif /* CONFIG_IPV6 */
index 03b6e649c4288a4a8014cf6c200bd0bec0819b7b..6f7ec72016dcd5e0caa90ea631d06317e667d8d7 100644 (file)
@@ -1032,7 +1032,7 @@ bool __skb_flow_dissect(const struct net *net,
                key_eth_addrs = skb_flow_dissector_target(flow_dissector,
                                                          FLOW_DISSECTOR_KEY_ETH_ADDRS,
                                                          target_container);
-               memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+               memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs));
        }
 
 proto_again:
@@ -1183,6 +1183,7 @@ proto_again:
                                         VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
                        }
                        key_vlan->vlan_tpid = saved_vlan_tpid;
+                       key_vlan->vlan_eth_type = proto;
                }
 
                fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
index 349480ef68a51e82435b5e201b57601b8446de01..8b6b5e72b217947d4f623962584ad3de75ff9f12 100644 (file)
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
        return dst->lwtstate->orig_output(net, sk, skb);
 }
 
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
 {
-       int hh_len = skb_dst(skb)->dev->hard_header_len;
-
        if (skb_headroom(skb) < hh_len) {
                int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
 
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
 
        bpf = bpf_lwt_lwtunnel(dst->lwtstate);
        if (bpf->xmit.prog) {
+               int hh_len = dst->dev->hard_header_len;
                __be16 proto = skb->protocol;
                int ret;
 
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
                        /* If the header was expanded, headroom might be too
                         * small for L2 header to come, expand as needed.
                         */
-                       ret = xmit_check_hhlen(skb);
+                       ret = xmit_check_hhlen(skb, hh_len);
                        if (unlikely(ret))
                                return ret;
 
index 159c9c61e6af353cadbdbb03281d2271d36f7e71..d1381ea6d52e092ca3684f122118d6db9ce87cee 100644 (file)
@@ -5242,6 +5242,8 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
                *prividx = attr_id_l3_stats;
 
                size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3);
+               if (!size_l3)
+                       goto skip_l3_stats;
                attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3,
                                         IFLA_OFFLOAD_XSTATS_UNSPEC);
                if (!attr)
@@ -5253,6 +5255,7 @@ static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
                        return err;
 
                have_data = true;
+skip_l3_stats:
                *prividx = 0;
        }
 
index 9b8443774449f5bb8cd2c62ca34eedf139eeb3ed..5f85e01d4093bb01dc52348204626aa29d616fcf 100644 (file)
@@ -22,6 +22,8 @@
 static siphash_aligned_key_t net_secret;
 static siphash_aligned_key_t ts_secret;
 
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
 static __always_inline void net_secret_init(void)
 {
        net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
 }
 EXPORT_SYMBOL(secure_tcpv6_seq);
 
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
                               __be16 dport)
 {
        const struct {
                struct in6_addr saddr;
                struct in6_addr daddr;
+               unsigned int timeseed;
                __be16 dport;
        } __aligned(SIPHASH_ALIGNMENT) combined = {
                .saddr = *(struct in6_addr *)saddr,
                .daddr = *(struct in6_addr *)daddr,
-               .dport = dport
+               .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+               .dport = dport,
        };
        net_secret_init();
        return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
 }
 EXPORT_SYMBOL_GPL(secure_tcp_seq);
 
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
        net_secret_init();
-       return siphash_3u32((__force u32)saddr, (__force u32)daddr,
-                           (__force u16)dport, &net_secret);
+       return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+                           (__force u16)dport,
+                           jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+                           &net_secret);
 }
 EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
 #endif
index 10bde7c6db445a876e6ca06e8fc8a33a0008c934..c90c74de90d5abd40460e1ca39e20903f533dccc 100644 (file)
@@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
        unsigned int delta_len = 0;
        struct sk_buff *tail = NULL;
        struct sk_buff *nskb, *tmp;
-       int err;
+       int len_diff, err;
 
        skb_push(skb, -skb_network_offset(skb) + offset);
 
@@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
                skb_push(nskb, -skb_network_offset(nskb) + offset);
 
                skb_release_head_state(nskb);
+               len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
                __copy_skb_header(nskb, skb);
 
                skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+               nskb->transport_header += len_diff;
                skb_copy_from_linear_data_offset(skb, -tnl_hlen,
                                                 nskb->data - tnl_hlen,
                                                 offset + tnl_hlen);
@@ -5276,11 +5278,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
        if (skb_cloned(to))
                return false;
 
-       /* The page pool signature of struct page will eventually figure out
-        * which pages can be recycled or not but for now let's prohibit slab
-        * allocated and page_pool allocated SKBs from being coalesced.
+       /* In general, avoid mixing slab allocated and page_pool allocated
+        * pages within the same SKB. However when @to is not pp_recycle and
+        * @from is cloned, we can transition frag pages from page_pool to
+        * reference counted.
+        *
+        * On the other hand, don't allow coalescing two pp_recycle SKBs if
+        * @from is cloned, in case the SKB is using page_pool fragment
+        * references (PP_FLAG_PAGE_FRAG). Since we only take full page
+        * references for cloned SKBs at the moment that would result in
+        * inconsistent reference counts.
         */
-       if (to->pp_recycle != from->pp_recycle)
+       if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
                return false;
 
        if (len <= skb_tailroom(to)) {
index ae662567a6cb6a440c79a9805a2cd6d146ac5a29..0ea29270d7e53730d14ec43654be8f956f891552 100644 (file)
@@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
+static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&dccp_hashinfo, AF_INET);
+}
+
 static struct pernet_operations dccp_v4_ops = {
        .init   = dccp_v4_init_net,
        .exit   = dccp_v4_exit_net,
+       .exit_batch = dccp_v4_exit_batch,
        .id     = &dccp_v4_pernet_id,
        .size   = sizeof(struct dccp_v4_pernet),
 };
index eab3bd1ee9a0a0064c04ff97fd8363e60daa0079..fa663518fa0e465458b7486ad0cd0672425f08b0 100644 (file)
@@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
        inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
+static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&dccp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations dccp_v6_ops = {
        .init   = dccp_v6_init_net,
        .exit   = dccp_v6_exit_net,
+       .exit_batch = dccp_v6_exit_batch,
        .id     = &dccp_v6_pernet_id,
        .size   = sizeof(struct dccp_v6_pernet),
 };
index 0ee7d4c0c95545542d850cd2061cc3cddde38306..a09ba642b5e76abdbfd0d844d12be9572c001abc 100644 (file)
@@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
        memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
 
        if (dn_db->router) {
-               struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+               struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
                dn_dn2eth(msg->neighbor, dn->addr);
        }
 
@@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
        int n;
        struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-       struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+       struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
        struct sk_buff *skb;
        size_t size;
        unsigned char *ptr;
index 94b306f6d5511b8fcd615c690c6e0dbf20a44f14..fbd98ac853ea0554f7ffbd003d676be4bedfdeee 100644 (file)
@@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
                        if (!dn_db->router) {
                                dn_db->router = neigh_clone(neigh);
                        } else {
-                               if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+                               if (msg->priority > container_of(dn_db->router,
+                                                                struct dn_neigh, n)->priority)
                                        neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
                        }
                }
index 7e85f2a1ae2541b093d58e1a4f9c3b224ab5476d..d1d78a463a06bf091a799001edbfa530a34ff7df 100644 (file)
@@ -1120,7 +1120,7 @@ source_ok:
                /* Ok then, we assume its directly connected and move on */
 select_source:
                if (neigh)
-                       gateway = ((struct dn_neigh *)neigh)->addr;
+                       gateway = container_of(neigh, struct dn_neigh, n)->addr;
                if (gateway == 0)
                        gateway = fld.daddr;
                if (fld.saddr == 0) {
@@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
                /* Use the default router if there is one */
                neigh = neigh_clone(dn_db->router);
                if (neigh) {
-                       gateway = ((struct dn_neigh *)neigh)->addr;
+                       gateway = container_of(neigh, struct dn_neigh, n)->addr;
                        goto make_route;
                }
 
index ca6af86964bcef49192a139d2722ea6f19174a27..cf933225df32492c084567215a2333ed55ada802 100644 (file)
@@ -562,7 +562,6 @@ static void dsa_port_teardown(struct dsa_port *dp)
 {
        struct devlink_port *dlp = &dp->devlink_port;
        struct dsa_switch *ds = dp->ds;
-       struct net_device *slave;
 
        if (!dp->setup)
                return;
@@ -584,11 +583,9 @@ static void dsa_port_teardown(struct dsa_port *dp)
                dsa_port_link_unregister_of(dp);
                break;
        case DSA_PORT_TYPE_USER:
-               slave = dp->slave;
-
-               if (slave) {
+               if (dp->slave) {
+                       dsa_slave_destroy(dp->slave);
                        dp->slave = NULL;
-                       dsa_slave_destroy(slave);
                }
                break;
        }
@@ -1147,17 +1144,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
        if (err)
                goto teardown_cpu_ports;
 
-       err = dsa_tree_setup_master(dst);
+       err = dsa_tree_setup_ports(dst);
        if (err)
                goto teardown_switches;
 
-       err = dsa_tree_setup_ports(dst);
+       err = dsa_tree_setup_master(dst);
        if (err)
-               goto teardown_master;
+               goto teardown_ports;
 
        err = dsa_tree_setup_lags(dst);
        if (err)
-               goto teardown_ports;
+               goto teardown_master;
 
        dst->setup = true;
 
@@ -1165,10 +1162,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
 
        return 0;
 
-teardown_ports:
-       dsa_tree_teardown_ports(dst);
 teardown_master:
        dsa_tree_teardown_master(dst);
+teardown_ports:
+       dsa_tree_teardown_ports(dst);
 teardown_switches:
        dsa_tree_teardown_switches(dst);
 teardown_cpu_ports:
@@ -1186,10 +1183,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 
        dsa_tree_teardown_lags(dst);
 
-       dsa_tree_teardown_ports(dst);
-
        dsa_tree_teardown_master(dst);
 
+       dsa_tree_teardown_ports(dst);
+
        dsa_tree_teardown_switches(dst);
 
        dsa_tree_teardown_cpu_ports(dst);
index 991c2930d631a21d6c4e117ebba12abac1db70c2..2851e44c4cf0dc5860f7e60545f5c346c7ecf6bd 100644 (file)
@@ -335,11 +335,24 @@ static const struct attribute_group dsa_group = {
        .attrs  = dsa_slave_attrs,
 };
 
+static void dsa_master_reset_mtu(struct net_device *dev)
+{
+       int err;
+
+       err = dev_set_mtu(dev, ETH_DATA_LEN);
+       if (err)
+               netdev_dbg(dev,
+                          "Unable to reset MTU to exclude DSA overheads\n");
+}
+
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
+       const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops;
        struct dsa_switch *ds = cpu_dp->ds;
        struct device_link *consumer_link;
-       int ret;
+       int mtu, ret;
+
+       mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops);
 
        /* The DSA master must use SET_NETDEV_DEV for this to work. */
        consumer_link = device_link_add(ds->dev, dev->dev.parent,
@@ -349,6 +362,15 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
                           "Failed to create a device link to DSA switch %s\n",
                           dev_name(ds->dev));
 
+       /* The switch driver may not implement ->port_change_mtu(), case in
+        * which dsa_slave_change_mtu() will not update the master MTU either,
+        * so we need to do that here.
+        */
+       ret = dev_set_mtu(dev, mtu);
+       if (ret)
+               netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n",
+                           ret, mtu);
+
        /* If we use a tagging format that doesn't have an ethertype
         * field, make sure that all packets from this point on get
         * sent to the tag format's receive function.
@@ -384,6 +406,7 @@ void dsa_master_teardown(struct net_device *dev)
        sysfs_remove_group(&dev->dev.kobj, &dsa_group);
        dsa_netdev_ops_set(dev, NULL);
        dsa_master_ethtool_teardown(dev);
+       dsa_master_reset_mtu(dev);
        dsa_master_set_promiscuity(dev, -1);
 
        dev->dsa_ptr = NULL;
index 32d472a8224194b1049205919676980d64970088..bdccb613285dbf69b6f9ec7ef625df87aeedd3c3 100644 (file)
@@ -451,6 +451,7 @@ out_rollback_unoffload:
        switchdev_bridge_port_unoffload(brport_dev, dp,
                                        &dsa_slave_switchdev_notifier,
                                        &dsa_slave_switchdev_blocking_notifier);
+       dsa_flush_workqueue();
 out_rollback_unbridge:
        dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
 out_rollback:
@@ -1620,8 +1621,10 @@ int dsa_port_link_register_of(struct dsa_port *dp)
                        if (ds->ops->phylink_mac_link_down)
                                ds->ops->phylink_mac_link_down(ds, port,
                                        MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+                       of_node_put(phy_np);
                        return dsa_port_phylink_register(dp);
                }
+               of_node_put(phy_np);
                return 0;
        }
 
index 41c69a6e7854ac2fef309010d57ff99aa8c9cfbd..8022d50584db72618647a15a305560126a25cf07 100644 (file)
@@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
                if (other_dp->slave->flags & IFF_ALLMULTI)
                        flags.val |= BR_MCAST_FLOOD;
                if (other_dp->slave->flags & IFF_PROMISC)
-                       flags.val |= BR_FLOOD;
+                       flags.val |= BR_FLOOD | BR_MCAST_FLOOD;
        }
 
        err = dsa_port_pre_bridge_flags(dp, flags, NULL);
index f64b805303cd798dd48c92f68c960e19bcbe25a2..eb204ad36eeec083f9f4bf8f9ff89baf06eea72f 100644 (file)
@@ -21,6 +21,14 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
        struct dsa_port *dp = dsa_slave_to_port(dev);
        u8 *tag;
 
+       /* Calculate checksums (if required) before adding the trailer tag to
+        * avoid including it in calculations. That would lead to wrong
+        * checksums after the switch strips the tag.
+        */
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+           skb_checksum_help(skb))
+               return NULL;
+
        /* Tag encoding */
        tag  = skb_put(skb, HELLCREEK_TAG_LEN);
        *tag = BIT(dp->index);
index 70e6c87fbe3df15c5ca6adc68b685e3a2afa21a4..d747166bb291ccf95ae8ac8d6863177cda6649e3 100644 (file)
@@ -446,7 +446,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
        struct page *page;
        struct sk_buff *trailer;
        int tailen = esp->tailen;
-       unsigned int allocsz;
 
        /* this is non-NULL only with TCP/UDP Encapsulation */
        if (x->encap) {
@@ -456,8 +455,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
                        return err;
        }
 
-       allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
-       if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+       if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+           ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
                goto cow;
 
        if (!skb_cloned(skb)) {
index cc8e84ef2ae46246216c12bbddc3e7a6dde0832c..ccb62038f6a4a61a9f02fba91a673b4dc2ffca5b 100644 (file)
@@ -889,8 +889,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
        }
 
        if (cfg->fc_oif || cfg->fc_gw_family) {
-               struct fib_nh *nh = fib_info_nh(fi, 0);
+               struct fib_nh *nh;
+
+               /* cannot match on nexthop object attributes */
+               if (fi->nh)
+                       return 1;
 
+               nh = fib_info_nh(fi, 0);
                if (cfg->fc_encap) {
                        if (fib_encap_match(net, cfg->fc_encap_type,
                                            cfg->fc_encap, nh, cfg, extack))
index 2ad3c7b42d6d271baf941e7f74feaf981dff8036..1d9e6d5e9a76c5c22d78d7da5b0efbf61d8feb88 100644 (file)
@@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
                        /* decrease mem now to avoid the memleak warning */
                        atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                                   &sk->sk_omem_alloc);
-                       kfree_rcu(psl, rcu);
                }
                rcu_assign_pointer(pmc->sflist, newpsl);
+               if (psl)
+                       kfree_rcu(psl, rcu);
                psl = newpsl;
        }
        rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                /* decrease mem now to avoid the memleak warning */
                atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                           &sk->sk_omem_alloc);
-               kfree_rcu(psl, rcu);
-       } else
+       } else {
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                        0, NULL, 0);
+       }
        rcu_assign_pointer(pmc->sflist, newpsl);
+       if (psl)
+               kfree_rcu(psl, rcu);
        pmc->sfmode = msf->imsf_fmode;
        err = 0;
 done:
index 17440840a7914d76c6de91baea5ba94622a051f8..a5d57fa679caa47ec31ea4b1de3c45f93be4cd13 100644 (file)
@@ -504,7 +504,7 @@ not_unique:
        return -EADDRNOTAVAIL;
 }
 
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
 {
        const struct inet_sock *inet = inet_sk(sk);
 
@@ -726,15 +726,17 @@ EXPORT_SYMBOL_GPL(inet_unhash);
  * Note that we use 32bit integers (vs RFC 'short integers')
  * because 2^16 is not a multiple of num_ephemeral and this
  * property might be used by clever attacker.
- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
- * we use 256 instead to really give more isolation and
- * privacy, this only consumes 1 KB of kernel memory.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 instead to really
+ * give more isolation and privacy, at the expense of 256kB of kernel
+ * memory.
  */
-#define INET_TABLE_PERTURB_SHIFT 8
-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+#define INET_TABLE_PERTURB_SHIFT 16
+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT)
+static u32 *table_perturb;
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
-               struct sock *sk, u32 port_offset,
+               struct sock *sk, u64 port_offset,
                int (*check_established)(struct inet_timewait_death_row *,
                        struct sock *, __u16, struct inet_timewait_sock **))
 {
@@ -774,10 +776,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
        if (likely(remaining > 1))
                remaining &= ~1U;
 
-       net_get_random_once(table_perturb, sizeof(table_perturb));
-       index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+       net_get_random_once(table_perturb,
+                           INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+       index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+       offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+       offset %= remaining;
 
-       offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
        /* In first pass we try ports of @low parity.
         * inet_csk_get_port() does the opposite choice.
         */
@@ -831,11 +836,12 @@ next_port:
        return -EADDRNOTAVAIL;
 
 ok:
-       /* If our first attempt found a candidate, skip next candidate
-        * in 1/16 of cases to add some noise.
+       /* Here we want to add a little bit of randomness to the next source
+        * port that will be chosen. We use a max() with a random here so that
+        * on low contention the randomness is maximal and on high contention
+        * it may be inexistent.
         */
-       if (!i && !(prandom_u32() % 16))
-               i = 2;
+       i = max_t(int, i, (prandom_u32() & 7) * 2);
        WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
 
        /* Head lock still held and bh's disabled */
@@ -859,7 +865,7 @@ ok:
 int inet_hash_connect(struct inet_timewait_death_row *death_row,
                      struct sock *sk)
 {
-       u32 port_offset = 0;
+       u64 port_offset = 0;
 
        if (!inet_sk(sk)->inet_num)
                port_offset = inet_sk_port_offset(sk);
@@ -909,6 +915,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
                                            low_limit,
                                            high_limit);
        init_hashinfo_lhash2(h);
+
+       /* this one is used for source ports of outgoing connections */
+       table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+                                     sizeof(*table_perturb), GFP_KERNEL);
+       if (!table_perturb)
+               panic("TCP: failed to alloc table_perturb");
 }
 
 int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
index 9e0bbd02656013e6e8be5765a7b86fc16e6bf831..0ec501845cb3bb51082f8091b4e0ebb32f83bf33 100644 (file)
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
-       bhead = &hashinfo->bhash[tw->tw_bslot];
+       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+                       hashinfo->bhash_size)];
 
        spin_lock(&bhead->lock);
        inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
           Note, that any socket with inet->num != 0 MUST be bound in
           binding cache, even if it is closed.
         */
-       /* Cache inet_bhashfn(), because 'struct net' might be no longer
-        * available later in inet_twsk_kill().
-        */
-       tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
-                                   hashinfo->bhash_size);
-       bhead = &hashinfo->bhash[tw->tw_bslot];
+       bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+                       hashinfo->bhash_size)];
        spin_lock(&bhead->lock);
        tw->tw_tb = icsk->icsk_bind_hash;
        WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
        }
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+       struct inet_timewait_sock *tw;
+       struct sock *sk;
+       struct hlist_nulls_node *node;
+       unsigned int slot;
+
+       for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+               struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+               cond_resched();
+               rcu_read_lock();
+restart:
+               sk_nulls_for_each_rcu(sk, node, &head->chain) {
+                       if (sk->sk_state != TCP_TIME_WAIT)
+                               continue;
+                       tw = inet_twsk(sk);
+                       if ((tw->tw_family != family) ||
+                               refcount_read(&twsk_net(tw)->ns.count))
+                               continue;
+
+                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+                               continue;
+
+                       if (unlikely((tw->tw_family != family) ||
+                                    refcount_read(&twsk_net(tw)->ns.count))) {
+                               inet_twsk_put(tw);
+                               goto restart;
+                       }
+
+                       rcu_read_unlock();
+                       local_bh_disable();
+                       inet_twsk_deschedule_put(tw);
+                       local_bh_enable();
+                       goto restart_rcu;
+               }
+               /* If the nulls value we got at the end of this lookup is
+                * not the expected one, we must restart lookup.
+                * We probably met an item that was moved to another chain.
+                */
+               if (get_nulls_value(node) != slot)
+                       goto restart;
+               rcu_read_unlock();
+       }
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
index 99db2e41ed10f1fd6472b16278307ee97eee0c57..aacee9dd771b4ae1317b53b9128d1051f7450fc8 100644 (file)
@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
                       __be16 proto)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
-
-       if (tunnel->parms.o_flags & TUNNEL_SEQ)
-               tunnel->o_seqno++;
+       __be16 flags = tunnel->parms.o_flags;
 
        /* Push GRE header. */
        gre_build_header(skb, tunnel->tun_hlen,
-                        tunnel->parms.o_flags, proto, tunnel->parms.o_key,
-                        htonl(tunnel->o_seqno));
+                        flags, proto, tunnel->parms.o_key,
+                        (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
 
        ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
                (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
        gre_build_header(skb, tunnel_hlen, flags, proto,
                         tunnel_id_to_key32(tun_info->key.tun_id),
-                        (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+                        (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
 
        ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 
@@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        gre_build_header(skb, 8, TUNNEL_SEQ,
-                        proto, 0, htonl(tunnel->o_seqno++));
+                        proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
 
        ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 
@@ -605,8 +603,8 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
        key = &info->key;
        ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
                            tunnel_id_to_key32(key->tun_id),
-                           key->tos & ~INET_ECN_MASK, 0, skb->mark,
-                           skb_get_hash(skb));
+                           key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
+                           skb->mark, skb_get_hash(skb));
        rt = ip_route_output_key(dev_net(dev), &fl4);
        if (IS_ERR(rt))
                return PTR_ERR(rt);
index 5a473319d3a5c8924d1101f95ea83244f5dbb833..94017a8c39945612443ba32a936f4e85eb6533ad 100644 (file)
@@ -294,8 +294,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 
                ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
                                    iph->saddr, tunnel->parms.o_key,
-                                   RT_TOS(iph->tos), tunnel->parms.link,
-                                   tunnel->fwmark, 0);
+                                   RT_TOS(iph->tos), dev_net(dev),
+                                   tunnel->parms.link, tunnel->fwmark, 0);
                rt = ip_route_output_key(tunnel->net, &fl4);
 
                if (!IS_ERR(rt)) {
@@ -570,7 +570,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
        }
        ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
                            tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
-                           0, skb->mark, skb_get_hash(skb));
+                           dev_net(dev), 0, skb->mark, skb_get_hash(skb));
        if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
                goto tx_error;
 
@@ -726,7 +726,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
        }
 
        ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
-                           tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+                           tunnel->parms.o_key, RT_TOS(tos),
+                           dev_net(dev), tunnel->parms.link,
                            tunnel->fwmark, skb_get_hash(skb));
 
        if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
deleted file mode 100644 (file)
index e69de29..0000000
index 3ee947557b88358e31afce995b3f157b0c41c0f8..aa9a11b20d18e9a11dd36199217ff670227a92f9 100644 (file)
@@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
        struct net *net = sock_net(sk);
        if (sk->sk_family == AF_INET) {
                struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+               u32 tb_id = RT_TABLE_LOCAL;
                int chk_addr_ret;
 
                if (addr_len < sizeof(*addr))
@@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
                         sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
-               chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+               tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+               chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
 
                if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
                                                 addr->sin_addr.s_addr,
@@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                                return -ENODEV;
                        }
                }
+
+               if (!dev && sk->sk_bound_dev_if) {
+                       dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+                       if (!dev) {
+                               rcu_read_unlock();
+                               return -ENODEV;
+                       }
+               }
                has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
                                                    scoped);
                rcu_read_unlock();
index 98c6f3429593150af72cdd6cb25efc5792fe23ef..ed01063d8f3033561ad77e4f646ddbc9e8f93354 100644 (file)
@@ -1726,6 +1726,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        unsigned int flags = RTCF_MULTICAST;
        struct rtable *rth;
+       bool no_policy;
        u32 itag = 0;
        int err;
 
@@ -1736,8 +1737,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (our)
                flags |= RTCF_LOCAL;
 
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+                          no_policy, false);
        if (!rth)
                return -ENOBUFS;
 
@@ -1753,6 +1758,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
        RT_CACHE_STAT_INC(in_slow_mc);
 
+       skb_dst_drop(skb);
        skb_dst_set(skb, &rth->dst);
        return 0;
 }
@@ -1795,7 +1801,7 @@ static int __mkroute_input(struct sk_buff *skb,
        struct rtable *rth;
        int err;
        struct in_device *out_dev;
-       bool do_cache;
+       bool do_cache, no_policy;
        u32 itag = 0;
 
        /* get a working reference to the output device */
@@ -1840,6 +1846,10 @@ static int __mkroute_input(struct sk_buff *skb,
                }
        }
 
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        fnhe = find_exception(nhc, daddr);
        if (do_cache) {
                if (fnhe)
@@ -1852,8 +1862,7 @@ static int __mkroute_input(struct sk_buff *skb,
                }
        }
 
-       rth = rt_dst_alloc(out_dev->dev, 0, res->type,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY),
+       rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
                           IN_DEV_ORCONF(out_dev, NOXFRM));
        if (!rth) {
                err = -ENOBUFS;
@@ -2228,6 +2237,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        struct rtable   *rth;
        struct flowi4   fl4;
        bool do_cache = true;
+       bool no_policy;
 
        /* IP on this device is disabled. */
 
@@ -2346,6 +2356,10 @@ brd_input:
        RT_CACHE_STAT_INC(in_brd);
 
 local_input:
+       no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+       if (no_policy)
+               IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
        do_cache &= res->fi && !itag;
        if (do_cache) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2360,7 +2374,7 @@ local_input:
 
        rth = rt_dst_alloc(ip_rt_get_dev(net, res),
                           flags | RTCF_LOCAL, res->type,
-                          IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+                          no_policy, false);
        if (!rth)
                goto e_nobufs;
 
index 2cb3b852d14861231ac47f0b3e4daeb57682ffd2..f33c31dd7366c06a642bdc5954856efa9d8da0ac 100644 (file)
@@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
 EXPORT_SYMBOL(cookie_ecn_ok);
 
 struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+                                           const struct tcp_request_sock_ops *af_ops,
                                            struct sock *sk,
                                            struct sk_buff *skb)
 {
@@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
                return NULL;
 
        treq = tcp_rsk(req);
+
+       /* treq->af_specific might be used to perform TCP_MD5 lookup */
+       treq->af_specific = af_ops;
+
        treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
 #if IS_ENABLED(CONFIG_MPTCP)
        treq->is_mptcp = sk_is_mptcp(sk);
@@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
                goto out;
 
        ret = NULL;
-       req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+       req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+                                    &tcp_request_sock_ipv4_ops, sk, skb);
        if (!req)
                goto out;
 
index cf18fbcbf123a864608a9603bfe215def9e4b70e..bb7ef45408e1aee88ee043b48d07dcecc4089711 100644 (file)
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
        if (sk->sk_state == TCP_LISTEN)
                goto out;
 
-       if (tp->recvmsg_inq)
+       if (tp->recvmsg_inq) {
                *cmsg_flags = TCP_CMSG_INQ;
+               msg->msg_get_inq = 1;
+       }
        timeo = sock_rcvtimeo(sk, nonblock);
 
        /* Urgent data needs to be handled specially. */
@@ -2559,7 +2561,7 @@ recv_sndq:
 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
                int flags, int *addr_len)
 {
-       int cmsg_flags = 0, ret, inq;
+       int cmsg_flags = 0, ret;
        struct scm_timestamping_internal tss;
 
        if (unlikely(flags & MSG_ERRQUEUE))
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        release_sock(sk);
        sk_defer_free_flush(sk);
 
-       if (cmsg_flags && ret >= 0) {
+       if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
                if (cmsg_flags & TCP_CMSG_TS)
                        tcp_recv_timestamp(msg, sk, &tss);
-               if (cmsg_flags & TCP_CMSG_INQ) {
-                       inq = tcp_inq_hint(sk);
-                       put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+               if (msg->msg_get_inq) {
+                       msg->msg_inq = tcp_inq_hint(sk);
+                       if (cmsg_flags & TCP_CMSG_INQ)
+                               put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+                                        sizeof(msg->msg_inq), &msg->msg_inq);
                }
        }
        return ret;
index 2088f93fa37b5fb9110e7933242a27bd4009990e..60f99e9fb6d1226ab027f044f0748058a91ca784 100644 (file)
@@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                tcp_process_tlp_ack(sk, ack, flag);
 
        if (tcp_ack_is_dubious(sk, flag)) {
-               if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+               if (!(flag & (FLAG_SND_UNA_ADVANCED |
+                             FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
                        num_dupack = 1;
                        /* Consider if pure acks were aggregated in tcp_add_backlog() */
                        if (!(flag & FLAG_DATA))
@@ -5454,7 +5455,17 @@ static void tcp_new_space(struct sock *sk)
        INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
 }
 
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ *    small enough that tcp_stream_memory_free() decides it
+ *    is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
 {
        /* pairs with tcp_poll() */
        smp_mb();
index f9cec624068dfa1d218357d7e88c89459d7d54f4..457f5b5d5d4a95c06eca82db1dbe7822cb4d040c 100644 (file)
@@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
+       inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
        list_for_each_entry(net, net_exit_list, exit_list)
                tcp_fastopen_ctx_destroy(net);
 }
index 6366df7aaf2a6d655162a73ae1b19205a5ca0a23..6854bb1fb32b265ef4c0838267bf272c57f7601e 100644 (file)
@@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
        newtp->md5sig_info = NULL;      /*XXX*/
-       if (newtp->af_specific->md5_lookup(sk, newsk))
+       if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
                newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
        if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
index 9ede847f4199844c5884e3f62ea450562072a0a7..1ca2f28c9981018e6cfaee3435d711467af6048d 100644 (file)
@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 
        NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
                      tcp_skb_pcount(skb));
+       tcp_check_space(sk);
 }
 
 /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
index fbab921670cc91a121f2ab8cd7aa6ecfd3748535..9a8e014d9b5b99e3a8b970a2c6b1e545f542bee5 100644 (file)
@@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
  *
  * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
  * called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
  */
 void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
                            struct rate_sample *rs)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+       u64 tx_tstamp;
 
        if (!scb->tx.delivered_mstamp)
                return;
 
+       tx_tstamp = tcp_skb_timestamp_us(skb);
        if (!rs->prior_delivered ||
-           after(scb->tx.delivered, rs->prior_delivered)) {
+           tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+                              scb->end_seq, rs->last_end_seq)) {
                rs->prior_delivered_ce  = scb->tx.delivered_ce;
                rs->prior_delivered  = scb->tx.delivered;
                rs->prior_mstamp     = scb->tx.delivered_mstamp;
                rs->is_app_limited   = scb->tx.is_app_limited;
                rs->is_retrans       = scb->sacked & TCPCB_RETRANS;
+               rs->last_end_seq     = scb->end_seq;
 
                /* Record send time of most recently ACKed packet: */
-               tp->first_tx_mstamp  = tcp_skb_timestamp_us(skb);
+               tp->first_tx_mstamp  = tx_tstamp;
                /* Find the duration of the "send phase" of this window: */
                rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
                                                     scb->tx.first_tx_mstamp);
index 55d604c9b3b3ea9898fa660cd46da14cb849d5f8..f2120e92caf15d43252102fba31c7c0153c3a446 100644 (file)
@@ -482,7 +482,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
        struct page *page;
        struct sk_buff *trailer;
        int tailen = esp->tailen;
-       unsigned int allocsz;
 
        if (x->encap) {
                int err = esp6_output_encap(x, skb, esp);
@@ -491,8 +490,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
                        return err;
        }
 
-       allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
-       if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+       if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+           ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
                goto cow;
 
        if (!skb_cloned(skb)) {
index 4740afecf7c6291360d969569771235c8af8b1d1..32ccac10bd625bce956f3ec6d636910722f28656 100644 (file)
@@ -308,7 +308,7 @@ not_unique:
        return -EADDRNOTAVAIL;
 }
 
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
 {
        const struct inet_sock *inet = inet_sk(sk);
 
@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
                       struct sock *sk)
 {
-       u32 port_offset = 0;
+       u64 port_offset = 0;
 
        if (!inet_sk(sk)->inet_num)
                port_offset = inet6_sk_port_offset(sk);
index 8753e9cec326433a43f1ca357a45384e3ad00c52..5136959b3dc5d64b3bbf2005f52ea1c67d342877 100644 (file)
@@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 {
        struct ip6_tnl *tunnel = netdev_priv(dev);
        __be16 protocol;
+       __be16 flags;
 
        if (dev->type == ARPHRD_ETHER)
                IPCB(skb)->flags = 0;
@@ -733,16 +734,13 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
        else
                fl6->daddr = tunnel->parms.raddr;
 
-       if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
-               return -ENOMEM;
-
        /* Push GRE header. */
        protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
        if (tunnel->parms.collect_md) {
                struct ip_tunnel_info *tun_info;
                const struct ip_tunnel_key *key;
-               __be16 flags;
+               int tun_hlen;
 
                tun_info = skb_tunnel_info_txcheck(skb);
                if (IS_ERR(tun_info) ||
@@ -760,21 +758,27 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
                dsfield = key->tos;
                flags = key->tun_flags &
                        (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
-               tunnel->tun_hlen = gre_calc_hlen(flags);
+               tun_hlen = gre_calc_hlen(flags);
 
-               gre_build_header(skb, tunnel->tun_hlen,
+               if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+                       return -ENOMEM;
+
+               gre_build_header(skb, tun_hlen,
                                 flags, protocol,
                                 tunnel_id_to_key32(tun_info->key.tun_id),
-                                (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+                                (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
                                                      : 0);
 
        } else {
-               if (tunnel->parms.o_flags & TUNNEL_SEQ)
-                       tunnel->o_seqno++;
+               if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+                       return -ENOMEM;
+
+               flags = tunnel->parms.o_flags;
 
-               gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+               gre_build_header(skb, tunnel->tun_hlen, flags,
                                 protocol, tunnel->parms.o_key,
-                                htonl(tunnel->o_seqno));
+                                (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+                                                     : 0);
        }
 
        return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1052,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
        /* Push GRE header. */
        proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
                                           : htons(ETH_P_ERSPAN2);
-       gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+       gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
 
        /* TooBig packet may have updated dst->dev's mtu */
        if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
index e23f058166af582af3adf9ce1267286cf334ff7b..fa63ef2bd99cca97aff455d4b39d267f69365b35 100644 (file)
@@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb)
                goto drop;
 
        if (!net->ipv6.devconf_all->disable_policy &&
-           !idev->cnf.disable_policy &&
+           (!idev || !idev->cnf.disable_policy) &&
            !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
                __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
                goto drop;
index a9775c830194dc884746e4eaa246e205bfff8f6a..4e74bc61a3db8aba4a10d236d24587fa01b516f4 100644 (file)
@@ -1653,7 +1653,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
        mifi_t mifi;
        struct net *net = sock_net(sk);
        struct mr_table *mrt;
-       bool do_wrmifwhole;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1761,6 +1760,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
 #ifdef CONFIG_IPV6_PIMSM_V2
        case MRT6_PIM:
        {
+               bool do_wrmifwhole;
                int v;
 
                if (optlen != sizeof(v))
index 909f937befd71fce194517d44cb9a4c5e2876360..7f695c39d9a8c4410e619b88add23e39f2beabae 100644 (file)
@@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
                                newpsl->sl_addr[i] = psl->sl_addr[i];
                        atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                                   &sk->sk_omem_alloc);
-                       kfree_rcu(psl, rcu);
                }
+               rcu_assign_pointer(pmc->sflist, newpsl);
+               kfree_rcu(psl, rcu);
                psl = newpsl;
-               rcu_assign_pointer(pmc->sflist, psl);
        }
        rv = 1; /* > 0 for insert logic below if sl_count is 0 */
        for (i = 0; i < psl->sl_count; i++) {
@@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                               psl->sl_count, psl->sl_addr, 0);
                atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                           &sk->sk_omem_alloc);
-               kfree_rcu(psl, rcu);
        } else {
                ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
        }
-       mutex_unlock(&idev->mc_lock);
        rcu_assign_pointer(pmc->sflist, newpsl);
+       mutex_unlock(&idev->mc_lock);
+       kfree_rcu(psl, rcu);
        pmc->sfmode = gsf->gf_fmode;
        err = 0;
 done:
index 1da332450d98eb7f6e6ca37c260c6562f72c2cbe..8ce60ab89015df2f85d3b09d643acfd43ef628b4 100644 (file)
@@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
 {
        const struct ipv6hdr *iph = ipv6_hdr(skb);
        struct sock *sk = sk_to_full_sk(sk_partial);
+       struct net_device *dev = skb_dst(skb)->dev;
        struct flow_keys flkeys;
        unsigned int hh_len;
        struct dst_entry *dst;
        int strict = (ipv6_addr_type(&iph->daddr) &
                      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
        struct flowi6 fl6 = {
-               .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
-                       strict ? skb_dst(skb)->dev->ifindex : 0,
                .flowi6_mark = skb->mark,
                .flowi6_uid = sock_net_uid(net, sk),
                .daddr = iph->daddr,
@@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
        };
        int err;
 
+       if (sk && sk->sk_bound_dev_if)
+               fl6.flowi6_oif = sk->sk_bound_dev_if;
+       else if (strict)
+               fl6.flowi6_oif = dev->ifindex;
+       else
+               fl6.flowi6_oif = l3mdev_master_ifindex(dev);
+
        fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
        dst = ip6_route_output(net, sk, &fl6);
        err = dst->error;
index 2fa10e60cccd0a5e416fcf22430fc0c8e259491a..c4b6ce017d5e3bf63c66a53df3d46c08370aed23 100644 (file)
@@ -3292,6 +3292,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+       unsigned int val;
        int entries;
 
        entries = dst_entries_get_fast(ops);
@@ -3302,13 +3303,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
            entries <= rt_max_size)
                goto out;
 
-       net->ipv6.ip6_rt_gc_expire++;
-       fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+       fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
        entries = dst_entries_get_slow(ops);
        if (entries < ops->gc_thresh)
-               net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+               atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
 out:
-       net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+       val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+       atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
        return entries > rt_max_size;
 }
 
@@ -4484,7 +4485,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
        struct inet6_dev *idev;
        int type;
 
-       if (netif_is_l3_master(skb->dev) &&
+       if (netif_is_l3_master(skb->dev) ||
            dst->dev == net->loopback_dev)
                idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
        else
@@ -6509,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
        net->ipv6.sysctl.skip_notify_on_dev_down = 0;
 
-       net->ipv6.ip6_rt_gc_expire = 30*HZ;
+       atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
 
        ret = 0;
 out:
index d1b61d00368e1f58725e9997f74a0b144901277e..9cc123f000fbcfbeff7728bfee5339d6dd6470f9 100644 (file)
@@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
                goto out;
 
        ret = NULL;
-       req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+       req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+                                    &tcp_request_sock_ipv6_ops, sk, skb);
        if (!req)
                goto out;
 
index 13678d3908fac9990e5b0c0df87fa4cca685baaf..faaddaf43c90b96e7a2bc9fbad7941ae5ada1b3c 100644 (file)
@@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net)
        inet_ctl_sock_destroy(net->ipv6.tcp_sk);
 }
 
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+       inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+}
+
 static struct pernet_operations tcpv6_net_ops = {
        .init       = tcpv6_net_init,
        .exit       = tcpv6_net_exit,
+       .exit_batch = tcpv6_net_exit_batch,
 };
 
 int __init tcpv6_init(void)
index fd51db3be91c4064b3f7b5a6f8297e2a9c0843de..339d95df19d324349f7f2791dd431631ad3493a5 100644 (file)
@@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
        void *ext_hdrs[SADB_EXT_MAX];
        int err;
 
-       pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
-                       BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+       err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+                             BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+       if (err)
+               return err;
 
        memset(ext_hdrs, 0, sizeof(ext_hdrs));
        err = parse_exthdrs(skb, hdr, ext_hdrs);
@@ -2898,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
                        break;
                if (!aalg->pfkey_supported)
                        continue;
-               if (aalg_tmpl_set(t, aalg))
+               if (aalg_tmpl_set(t, aalg) && aalg->available)
                        sz += sizeof(struct sadb_comb);
        }
        return sz + sizeof(struct sadb_prop);
@@ -2916,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
                if (!ealg->pfkey_supported)
                        continue;
 
-               if (!(ealg_tmpl_set(t, ealg)))
+               if (!(ealg_tmpl_set(t, ealg) && ealg->available))
                        continue;
 
                for (k = 1; ; k++) {
@@ -2927,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
                        if (!aalg->pfkey_supported)
                                continue;
 
-                       if (aalg_tmpl_set(t, aalg))
+                       if (aalg_tmpl_set(t, aalg) && aalg->available)
                                sz += sizeof(struct sadb_comb);
                }
        }
index 4eb8892fb2ffebfff81919520786971276e9870f..ca10916340b098fb1f7e8c577bcca1eec4f4814d 100644 (file)
@@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
 
        dev = dev_get_by_index_rcu(net, ifindex);
        while (dev && !netif_is_l3_master(dev))
-               dev = netdev_master_upper_dev_get(dev);
+               dev = netdev_master_upper_dev_get_rcu(dev);
 
        return dev ? dev->ifindex : 0;
 }
index 9479f2787ea79a1c466dddcea98de53d6b0a722e..88d9cc945a216c4f8b412b26965d37dbebd093fa 100644 (file)
@@ -441,7 +441,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 #define PRINT_HT_CAP(_cond, _str) \
        do { \
        if (_cond) \
-                       p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \
+                       p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \
        } while (0)
        char *buf, *p;
        int i;
index 1b30c724ca8d1b56a659b20d20f48bfcb676ac2b..dc8aec1a5d3dd6e78bef08094301a90aa7369bab 100644 (file)
@@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
                                cbss->transmitted_bss->bssid);
                bss_conf->bssid_indicator = cbss->max_bssid_indicator;
                bss_conf->bssid_index = cbss->bssid_index;
+       } else {
+               bss_conf->nontransmitted = false;
+               memset(bss_conf->transmitter_bssid, 0,
+                      sizeof(bss_conf->transmitter_bssid));
+               bss_conf->bssid_indicator = 0;
+               bss_conf->bssid_index = 0;
        }
 
        /*
index beb6b92eb7804759dddcc3cef2efb68a4338dc9d..88d797fa82ff64c0d1aad14a45bf6ff05ea1c619 100644 (file)
@@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
                goto dont_reorder;
 
        /* not part of a BA session */
-       if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
-           ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+       if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
                goto dont_reorder;
 
        /* new, potentially un-ordered, ampdu frame - process it */
index f0702d920d8d57c2361bd379e3030a7f2c26d18c..e22b0cbb2f35385c7111a63c935b643ace11b6e4 100644 (file)
@@ -93,13 +93,13 @@ out_release:
 static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
        DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
-       const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
        int rc, addrlen = msg->msg_namelen;
        struct sock *sk = sock->sk;
        struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
        struct mctp_skb_cb *cb;
        struct mctp_route *rt;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
+       int hlen;
 
        if (addr) {
                const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
@@ -129,6 +129,34 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (addr->smctp_network == MCTP_NET_ANY)
                addr->smctp_network = mctp_default_net(sock_net(sk));
 
+       /* direct addressing */
+       if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+               DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
+                                extaddr, msg->msg_name);
+               struct net_device *dev;
+
+               rc = -EINVAL;
+               rcu_read_lock();
+               dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex);
+               /* check for correct halen */
+               if (dev && extaddr->smctp_halen == dev->addr_len) {
+                       hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr);
+                       rc = 0;
+               }
+               rcu_read_unlock();
+               if (rc)
+                       goto err_free;
+               rt = NULL;
+       } else {
+               rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+                                      addr->smctp_addr.s_addr);
+               if (!rt) {
+                       rc = -EHOSTUNREACH;
+                       goto err_free;
+               }
+               hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr);
+       }
+
        skb = sock_alloc_send_skb(sk, hlen + 1 + len,
                                  msg->msg_flags & MSG_DONTWAIT, &rc);
        if (!skb)
@@ -147,8 +175,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        cb = __mctp_cb(skb);
        cb->net = addr->smctp_network;
 
-       /* direct addressing */
-       if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+       if (!rt) {
+               /* fill extended address in cb */
                DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
                                 extaddr, msg->msg_name);
 
@@ -159,17 +187,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                }
 
                cb->ifindex = extaddr->smctp_ifindex;
+               /* smctp_halen is checked above */
                cb->halen = extaddr->smctp_halen;
                memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen);
-
-               rt = NULL;
-       } else {
-               rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
-                                      addr->smctp_addr.s_addr);
-               if (!rt) {
-                       rc = -EHOSTUNREACH;
-                       goto err_free;
-               }
        }
 
        rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
index f49be882e98e2c814aeeb66e2537fbfff99c16ab..99a3bda8852f83a73b4fa4310a8cf85233b219d8 100644 (file)
@@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
 void mctp_dev_put(struct mctp_dev *mdev)
 {
        if (mdev && refcount_dec_and_test(&mdev->refs)) {
+               kfree(mdev->addrs);
                dev_put(mdev->dev);
                kfree_rcu(mdev, rcu);
        }
@@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
 
        mctp_route_remove_dev(mdev);
        mctp_neigh_remove_dev(mdev);
-       kfree(mdev->addrs);
 
        mctp_dev_put(mdev);
 }
index d5e7db83fe9d30f8912164b3d0745413c438e981..3b24b8d18b5b55d3f23b8f8af6cb7edb563dc171 100644 (file)
@@ -503,6 +503,11 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
 
        if (cb->ifindex) {
                /* direct route; use the hwaddr we stashed in sendmsg */
+               if (cb->halen != skb->dev->addr_len) {
+                       /* sanity check, sendmsg should have already caught this */
+                       kfree_skb(skb);
+                       return -EMSGSIZE;
+               }
                daddr = cb->haddr;
        } else {
                /* If lookup fails let the device handle daddr==NULL */
@@ -512,7 +517,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
 
        rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
                             daddr, skb->dev->dev_addr, skb->len);
-       if (rc) {
+       if (rc < 0) {
                kfree_skb(skb);
                return -EHOSTUNREACH;
        }
@@ -756,7 +761,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
 {
        const unsigned int hlen = sizeof(struct mctp_hdr);
        struct mctp_hdr *hdr, *hdr2;
-       unsigned int pos, size;
+       unsigned int pos, size, headroom;
        struct sk_buff *skb2;
        int rc;
        u8 seq;
@@ -770,6 +775,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
                return -EMSGSIZE;
        }
 
+       /* keep same headroom as the original skb */
+       headroom = skb_headroom(skb);
+
        /* we've got the header */
        skb_pull(skb, hlen);
 
@@ -777,7 +785,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
                /* size of message payload */
                size = min(mtu - hlen, skb->len - pos);
 
-               skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+               skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
                if (!skb2) {
                        rc = -ENOMEM;
                        break;
@@ -793,7 +801,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
                        skb_set_owner_w(skb2, skb->sk);
 
                /* establish packet */
-               skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+               skb_reserve(skb2, headroom);
                skb_reset_network_header(skb2);
                skb_put(skb2, hlen + size);
                skb2->transport_header = skb2->network_header + hlen;
index 325383646f5c0061f049603aa4a965ac40651f7d..b548cec86c9d87f0c063255461d93c891a09f476 100644 (file)
@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        ptr += 2;
                }
                if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
-                       mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+                       mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
                        mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
                        ptr += 2;
                }
@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
                        if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
                                mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
-                               mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+                               mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
                                ptr += 2;
                        }
 
@@ -1240,7 +1240,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
                WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
 }
 
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
 {
        struct csum_pseudo_header header;
        __wsum csum;
@@ -1256,15 +1256,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
        header.csum = 0;
 
        csum = csum_partial(&header, sizeof(header), sum);
-       return (__force u16)csum_fold(csum);
+       return csum_fold(csum);
 }
 
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
 {
        return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
                                 ~csum_unfold(mpext->csum));
 }
 
+static void put_len_csum(u16 len, __sum16 csum, void *data)
+{
+       __sum16 *sumptr = data + 2;
+       __be16 *ptr = data;
+
+       put_unaligned_be16(len, ptr);
+
+       put_unaligned(csum, sumptr);
+}
+
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
 {
@@ -1340,8 +1350,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        put_unaligned_be32(mpext->subflow_seq, ptr);
                        ptr += 1;
                        if (opts->csum_reqd) {
-                               put_unaligned_be32(mpext->data_len << 16 |
-                                                  mptcp_make_csum(mpext), ptr);
+                               put_len_csum(mpext->data_len,
+                                            mptcp_make_csum(mpext),
+                                            ptr);
                        } else {
                                put_unaligned_be32(mpext->data_len << 16 |
                                                   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1392,11 +1403,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                        goto mp_capable_done;
 
                if (opts->csum_reqd) {
-                       put_unaligned_be32(opts->data_len << 16 |
-                                          __mptcp_make_csum(opts->data_seq,
-                                                            opts->subflow_seq,
-                                                            opts->data_len,
-                                                            ~csum_unfold(opts->csum)), ptr);
+                       put_len_csum(opts->data_len,
+                                    __mptcp_make_csum(opts->data_seq,
+                                                      opts->subflow_seq,
+                                                      opts->data_len,
+                                                      ~csum_unfold(opts->csum)),
+                                    ptr);
                } else {
                        put_unaligned_be32(opts->data_len << 16 |
                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
index 01809eef29b4bb50f25bb33238a331e942c4e646..aa51b100e03353d0dc2b8f170662bfbe9ad51370 100644 (file)
@@ -178,14 +178,13 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
        struct mptcp_pm_data *pm = &msk->pm;
        bool update_subflows;
 
-       update_subflows = (ssk->sk_state == TCP_CLOSE) &&
-                         (subflow->request_join || subflow->mp_join);
+       update_subflows = subflow->request_join || subflow->mp_join;
        if (!READ_ONCE(pm->work_pending) && !update_subflows)
                return;
 
        spin_lock_bh(&pm->lock);
        if (update_subflows)
-               pm->subflows--;
+               __mptcp_pm_close_subflow(msk);
 
        /* Even if this subflow is not really established, tell the PM to try
         * to pick the next ones, if possible.
index 3c1a3036550f8410ba31d4d2c960d27c29c90186..5655a63aa6a8b216e77fca208e99fb42b33ad666 100644 (file)
@@ -443,7 +443,8 @@ struct mptcp_subflow_context {
                can_ack : 1,        /* only after processing the remote a key */
                disposable : 1,     /* ctx can be free at ulp release time */
                stale : 1,          /* unable to snd/rcv data, do not use for xmit */
-               local_id_valid : 1; /* local_id is correctly initialized */
+               local_id_valid : 1, /* local_id is correctly initialized */
+               valid_csum_seen : 1;        /* at least one csum validated */
        enum mptcp_data_avail data_avail;
        u32     remote_nonce;
        u64     thmac;
@@ -723,7 +724,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
 void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
 
 void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
 
 void __init mptcp_pm_init(void);
 void mptcp_pm_data_init(struct mptcp_sock *msk);
@@ -833,6 +834,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
 
+/* called under PM lock */
+static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+       if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
+               WRITE_ONCE(msk->pm.accept_subflow, true);
+}
+
+static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+       spin_lock_bh(&msk->pm.lock);
+       __mptcp_pm_close_subflow(msk);
+       spin_unlock_bh(&msk->pm.lock);
+}
+
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
 
index aba260f547daa1a10f2f5d32294b54b0a1f2e10b..be76ada89d9692278f0474828108489f3afd80fd 100644 (file)
@@ -888,7 +888,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        u32 offset, seq, delta;
-       u16 csum;
+       __sum16 csum;
        int len;
 
        if (!csum_reqd)
@@ -955,11 +955,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
                                 subflow->map_data_csum);
        if (unlikely(csum)) {
                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
-               subflow->send_mp_fail = 1;
-               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+               if (subflow->mp_join || subflow->valid_csum_seen) {
+                       subflow->send_mp_fail = 1;
+                       MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+               }
                return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
        }
 
+       subflow->valid_csum_seen = 1;
        return MAPPING_OK;
 }
 
@@ -1141,6 +1144,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
        }
 }
 
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+       if (subflow->mp_join)
+               return false;
+       else if (READ_ONCE(msk->csum_enabled))
+               return !subflow->valid_csum_seen;
+       else
+               return !subflow->fully_established;
+}
+
 static bool subflow_check_data_avail(struct sock *ssk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1218,7 +1233,7 @@ fallback:
                return true;
        }
 
-       if (subflow->mp_join || subflow->fully_established) {
+       if (!subflow_can_fallback(subflow)) {
                /* fatal protocol error, close the socket.
                 * subflow_error_report() will introduce the appropriate barriers
                 */
@@ -1422,20 +1437,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
        struct sockaddr_storage addr;
        int remote_id = remote->id;
        int local_id = loc->id;
+       int err = -ENOTCONN;
        struct socket *sf;
        struct sock *ssk;
        u32 remote_token;
        int addrlen;
        int ifindex;
        u8 flags;
-       int err;
 
        if (!mptcp_is_fully_established(sk))
-               return -ENOTCONN;
+               goto err_out;
 
        err = mptcp_subflow_create_socket(sk, &sf);
        if (err)
-               return err;
+               goto err_out;
 
        ssk = sf->sk;
        subflow = mptcp_subflow_ctx(ssk);
@@ -1492,6 +1507,12 @@ failed_unlink:
 failed:
        subflow->disposable = 1;
        sock_release(sf);
+
+err_out:
+       /* we account subflows before the creation, and this failures will not
+        * be caught by sk_state_change()
+        */
+       mptcp_pm_close_subflow(msk);
        return err;
 }
 
index 2c467c422dc6343a296ccfff9097069440fd66cd..fb67f1ca2495b3e5e157d72608d0b6916a49bc61 100644 (file)
@@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void)
        pr_info("Connection hash table configured "
                "(size=%d, memory=%ldKbytes)\n",
                ip_vs_conn_tab_size,
-               (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+               (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
        IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
                  sizeof(struct ip_vs_conn));
 
index 8ec55cd72572e0cca076631e2cc1c11f0c2b86f6..204a5cdff5b114a5dd1e6ab791785e4443733ab1 100644 (file)
@@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct,
                        }
 
                }
-       } else if (((state->state == TCP_CONNTRACK_SYN_SENT
-                    && dir == IP_CT_DIR_ORIGINAL)
-                  || (state->state == TCP_CONNTRACK_SYN_RECV
-                    && dir == IP_CT_DIR_REPLY))
-                  && after(end, sender->td_end)) {
+       } else if (tcph->syn &&
+                  after(end, sender->td_end) &&
+                  (state->state == TCP_CONNTRACK_SYN_SENT ||
+                   state->state == TCP_CONNTRACK_SYN_RECV)) {
                /*
                 * RFC 793: "if a TCP is reinitialized ... then it need
                 * not wait at all; it must only be sure to use sequence
                 * numbers larger than those recently used."
-                */
-               sender->td_end =
-               sender->td_maxend = end;
-               sender->td_maxwin = (win == 0 ? 1 : win);
-
-               tcp_options(skb, dataoff, tcph, sender);
-       } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
-                  state->state == TCP_CONNTRACK_SYN_SENT) {
-               /* Retransmitted syn-ack, or syn (simultaneous open).
                 *
                 * Re-init state for this direction, just like for the first
                 * syn(-ack) reply, it might differ in seq, ack or tcp options.
@@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct,
                tcp_init_sender(sender, receiver,
                                skb, dataoff, tcph,
                                end, win);
-               if (!tcph->ack)
+
+               if (dir == IP_CT_DIR_REPLY && !tcph->ack)
                        return true;
        }
 
index 3e1afd10a9b60d21e3cb6b8f4a73bf0ac43e3ebc..55aa55b252b2097cd6fad2eb7285626238b272b0 100644 (file)
@@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
        },
-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
        [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
                .procname       = "nf_flowtable_udp_timeout",
                .maxlen         = sizeof(unsigned int),
index 3db256da919bad534a5f598b6befda933edab15e..f2def06d10709a4fc7a55729e7fc147ad5bc5e97 100644 (file)
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
 
 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 {
-       tcp->state = TCP_CONNTRACK_ESTABLISHED;
        tcp->seen[0].td_maxwin = 0;
        tcp->seen[1].td_maxwin = 0;
 }
 
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
 {
        struct net *net = nf_ct_net(ct);
        int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
        if (l4num == IPPROTO_TCP) {
                struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
-               timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+               flow_offload_fixup_tcp(&ct->proto.tcp);
+
+               timeout = tn->timeouts[ct->proto.tcp.state];
                timeout -= tn->offload_timeout;
        } else if (l4num == IPPROTO_UDP) {
                struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
                WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
 }
 
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
-       if (nf_ct_protonum(ct) == IPPROTO_TCP)
-               flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
-       flow_offload_fixup_ct_state(ct);
-       flow_offload_fixup_ct_timeout(ct);
-}
-
 static void flow_offload_route_release(struct flow_offload *flow)
 {
        nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
        u32 timeout;
 
        timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
-       if (READ_ONCE(flow->timeout) != timeout)
+       if (timeout - READ_ONCE(flow->timeout) > HZ)
                WRITE_ONCE(flow->timeout, timeout);
+       else
+               return;
 
        if (likely(!nf_flowtable_hw_offload(flow_table)))
                return;
@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
        rhashtable_remove_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
                               nf_flow_offload_rhash_params);
-
-       clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
-       if (nf_flow_has_expired(flow))
-               flow_offload_fixup_ct(flow->ct);
-       else
-               flow_offload_fixup_ct_timeout(flow->ct);
-
        flow_offload_free(flow);
 }
 
 void flow_offload_teardown(struct flow_offload *flow)
 {
+       clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
        set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
-       flow_offload_fixup_ct_state(flow->ct);
+       flow_offload_fixup_ct(flow->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 
@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
        return err;
 }
 
-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
-{
-       struct dst_entry *dst;
-
-       if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
-           tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
-               dst = tuple->dst_cache;
-               if (!dst_check(dst, tuple->dst_cookie))
-                       return true;
-       }
-
-       return false;
-}
-
-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
-{
-       return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
-              flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
-}
-
 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
                                    struct flow_offload *flow, void *data)
 {
        if (nf_flow_has_expired(flow) ||
-           nf_ct_is_dying(flow->ct) ||
-           nf_flow_has_stale_dst(flow))
-               set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+           nf_ct_is_dying(flow->ct))
+               flow_offload_teardown(flow);
 
        if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
                if (test_bit(NF_FLOW_HW, &flow->flags)) {
index 32c0eb1b482122a7eca4171c17473a052bf4d4fb..b350fe9d00b0b921e36c98446a28a277bdb0dc76 100644 (file)
@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
        return true;
 }
 
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+       if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+           tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+               return true;
+
+       return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
                                      const struct nf_hook_state *state,
                                      struct dst_entry *dst)
@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
                return NF_ACCEPT;
 
+       if (!nf_flow_dst_check(&tuplehash->tuple)) {
+               flow_offload_teardown(flow);
+               return NF_ACCEPT;
+       }
+
        if (skb_try_make_writable(skb, thoff + hdrsize))
                return NF_DROP;
 
@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
                return NF_ACCEPT;
 
+       if (!nf_flow_dst_check(&tuplehash->tuple)) {
+               flow_offload_teardown(flow);
+               return NF_ACCEPT;
+       }
+
        if (skb_try_make_writable(skb, thoff + hdrsize))
                return NF_DROP;
 
index 5ddfdb2adaf1ebe68d56c8ccc2e5d14bffef8b59..a096b9fbbbdfffdbec2742087a82a243c323fa09 100644 (file)
@@ -5526,7 +5526,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
        int err, i, k;
 
        for (i = 0; i < set->num_exprs; i++) {
-               expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+               expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
                if (!expr)
                        goto err_expr;
 
@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 static bool nft_expr_reduce(struct nft_regs_track *track,
                            const struct nft_expr *expr)
 {
-       if (!expr->ops->reduce) {
-               pr_warn_once("missing reduce for expression %s ",
-                            expr->ops->type->name);
-               return false;
-       }
-
-       if (nft_reduce_is_readonly(expr))
-               return false;
-
-       return expr->ops->reduce(track, expr);
+       return false;
 }
 
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
@@ -9363,7 +9354,7 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
 }
 EXPORT_SYMBOL_GPL(nft_parse_u32_check);
 
-static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg)
+static int nft_parse_register(const struct nlattr *attr, u32 *preg)
 {
        unsigned int reg;
 
index 38caa66632b4e4cc331392f20d0ece18bb16e627..f590ee1c8a1be46e59c20ca3ab0d9faa23b7dc5b 100644 (file)
@@ -290,7 +290,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
        if (!track->regs[priv->sreg].selector)
                return false;
 
-       bitwise = nft_expr_priv(expr);
+       bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
        if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
            track->regs[priv->sreg].num_reg == 0 &&
            track->regs[priv->dreg].bitwise &&
@@ -442,7 +442,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
        if (!track->regs[priv->sreg].selector)
                return false;
 
-       bitwise = nft_expr_priv(expr);
+       bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
        if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
            track->regs[priv->dreg].bitwise &&
            track->regs[priv->dreg].bitwise->ops == expr->ops &&
index 9de1462e4ac4fd803f925ace0f45a3a52ce7997a..d657f999a11b6fd68d07a790259b3b4e8f92d9d1 100644 (file)
@@ -77,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
                        invert = true;
        }
 
-       priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+       priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
        if (!priv->list)
                return -ENOMEM;
 
index da9083605a61ad1107612e05f5c528cc3f945177..f4d3573e8782d2a69a63cc9438bfb1a39c105432 100644 (file)
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
        struct nft_counter __percpu *cpu_stats;
        struct nft_counter *this_cpu;
 
-       cpu_stats = alloc_percpu(struct nft_counter);
+       cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
        if (cpu_stats == NULL)
                return -ENOMEM;
 
index 900d48c810a12654e011f3693252c0b04f493d5b..6f0b07fe648d04e8b3ac3a6560ea037759796237 100644 (file)
@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
        route->tuple[dir].xmit_type     = nft_xmit_type(dst_cache);
 }
 
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+       if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+           dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+               return false;
+
+       return true;
+}
+
 static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
                                     const struct dst_entry *dst_cache,
                                     const struct nf_conn *ct,
@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
        struct neighbour *n;
        u8 nud_state;
 
+       if (!nft_is_valid_ether_device(dev))
+               goto out;
+
        n = dst_neigh_lookup(dst_cache, daddr);
        if (!n)
                return -1;
@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
        if (!(nud_state & NUD_VALID))
                return -1;
 
+out:
        return dev_fill_forward_path(dev, ha, stack);
 }
 
@@ -78,15 +91,6 @@ struct nft_forward_info {
        enum flow_offload_xmit_type xmit_type;
 };
 
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
-       if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
-           dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
-               return false;
-
-       return true;
-}
-
 static void nft_dev_path_info(const struct net_device_path_stack *stack,
                              struct nft_forward_info *info,
                              unsigned char *ha, struct nf_flowtable *flowtable)
@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
                                info->indev = NULL;
                                break;
                        }
-                       info->outdev = path->dev;
+                       if (!info->outdev)
+                               info->outdev = path->dev;
                        info->encap[info->num_encaps].id = path->encap.id;
                        info->encap[info->num_encaps].proto = path->encap.proto;
                        info->num_encaps++;
@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
        case IPPROTO_TCP:
                tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
                                          sizeof(_tcph), &_tcph);
-               if (unlikely(!tcph || tcph->fin || tcph->rst))
+               if (unlikely(!tcph || tcph->fin || tcph->rst ||
+                            !nf_conntrack_tcp_established(ct)))
                        goto out;
                break;
        case IPPROTO_UDP:
index 43d0d4aadb1f65fd908dc38b16aae256a8a86778..bb15a55dad5c018a0b18876e72c11d1d12b9e80c 100644 (file)
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        u64 last_jiffies;
        int err;
 
-       last = kzalloc(sizeof(*last), GFP_KERNEL);
+       last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
        if (!last)
                return -ENOMEM;
 
index d4a6cf3cd697895ce82f6aaf89de8095782bfdd8..04ea8b9bf202855159b01e17c7033ed49047d8d1 100644 (file)
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
                                 priv->rate);
        }
 
-       priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+       priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
        if (!priv->limit)
                return -ENOMEM;
 
index d7db57ed3bc10f2b4388d6399458e9e09a5494db..e6b0df68feeaf85640969c64cf8d80d84082a4ef 100644 (file)
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
                        return -EOPNOTSUPP;
        }
 
-       priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+       priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
        if (!priv->consumed)
                return -ENOMEM;
 
index d600a566da324ae7198635d81757ce78f7205489..7325bee7d14425232f953e630b7453cd490b5ce9 100644 (file)
@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
                                *ext = &rbe->ext;
                                return -EEXIST;
                        } else {
-                               p = &parent->rb_left;
+                               overlap = false;
+                               if (nft_rbtree_interval_end(rbe))
+                                       p = &parent->rb_left;
+                               else
+                                       p = &parent->rb_right;
                        }
                }
 
index bd3792f080ed0673a6be9f29a0c51aeade966dfb..05ae5a338b6f00a76853e48424068aae6c3ac1bd 100644 (file)
@@ -37,12 +37,11 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
 static noinline bool
-nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
+nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
 {
-       struct sock *sk = skb_to_full_sk(pkt->skb);
        struct cgroup *cgrp;
 
-       if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+       if (!sk_fullsock(sk))
                return false;
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
@@ -55,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
 }
 #endif
 
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+       const struct net_device *indev = nft_in(pkt);
+       const struct sk_buff *skb = pkt->skb;
+       struct sock *sk = NULL;
+
+       if (!indev)
+               return NULL;
+
+       switch (nft_pf(pkt)) {
+       case NFPROTO_IPV4:
+               sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+               break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+       case NFPROTO_IPV6:
+               sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+               break;
+#endif
+       default:
+               WARN_ON_ONCE(1);
+               break;
+       }
+
+       return sk;
+}
+
 static void nft_socket_eval(const struct nft_expr *expr,
                            struct nft_regs *regs,
                            const struct nft_pktinfo *pkt)
@@ -68,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
                sk = NULL;
 
        if (!sk)
-               switch(nft_pf(pkt)) {
-               case NFPROTO_IPV4:
-                       sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
-                       break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-               case NFPROTO_IPV6:
-                       sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
-                       break;
-#endif
-               default:
-                       WARN_ON_ONCE(1);
-                       regs->verdict.code = NFT_BREAK;
-                       return;
-               }
+               sk = nft_socket_do_lookup(pkt);
 
        if (!sk) {
                regs->verdict.code = NFT_BREAK;
@@ -109,7 +121,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
                break;
 #ifdef CONFIG_SOCK_CGROUP_DATA
        case NFT_SOCKET_CGROUPV2:
-               if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
+               if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
                        regs->verdict.code = NFT_BREAK;
                        return;
                }
@@ -225,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
        return nft_expr_reduce_bitwise(track, expr);
 }
 
+static int nft_socket_validate(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr,
+                              const struct nft_data **data)
+{
+       return nft_chain_validate_hooks(ctx->chain,
+                                       (1 << NF_INET_PRE_ROUTING) |
+                                       (1 << NF_INET_LOCAL_IN) |
+                                       (1 << NF_INET_LOCAL_OUT));
+}
+
 static struct nft_expr_type nft_socket_type;
 static const struct nft_expr_ops nft_socket_ops = {
        .type           = &nft_socket_type,
@@ -232,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = {
        .eval           = nft_socket_eval,
        .init           = nft_socket_init,
        .dump           = nft_socket_dump,
+       .validate       = nft_socket_validate,
        .reduce         = nft_socket_reduce,
 };
 
index 47a876ccd28816a6065d1c4b7de8cfc97e887a69..73e9c0a9c187674cced15dbec079734489c3329f 100644 (file)
@@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                copied = len;
        }
 
-       skb_reset_transport_header(data_skb);
        err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
 
        if (msg->msg_name) {
@@ -2263,6 +2262,13 @@ static int netlink_dump(struct sock *sk)
         * single netdev. The outcome is MSG_TRUNC error.
         */
        skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+       /* Make sure malicious BPF programs can not read unitialized memory
+        * from skb->head -> skb->data
+        */
+       skb_reset_network_header(skb);
+       skb_reset_mac_header(skb);
+
        netlink_skb_set_owner_r(skb, sk);
 
        if (nlk->dump_done_errno > 0) {
index dc7a2404efdf9c7c17e1cf25a39df0b5d3afc76c..5b286e1e0a6ff134c34c615c4a0a0ef8ea978ac3 100644 (file)
@@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                kfree_skb(skb);
                goto error;
@@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
 
        device_lock(&dev->dev);
 
-       if (!device_is_registered(&dev->dev)) {
+       if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }
@@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev)
                        dev->rfkill = NULL;
                }
        }
+       dev->shutting_down = false;
        device_unlock(&dev->dev);
 
        rc = nfc_genl_device_added(dev);
@@ -1166,12 +1167,10 @@ void nfc_unregister_device(struct nfc_dev *dev)
                rfkill_unregister(dev->rfkill);
                rfkill_destroy(dev->rfkill);
        }
+       dev->shutting_down = true;
        device_unlock(&dev->dev);
 
        if (dev->ops->check_presence) {
-               device_lock(&dev->dev);
-               dev->shutting_down = true;
-               device_unlock(&dev->dev);
                del_timer_sync(&dev->check_pres_timer);
                cancel_work_sync(&dev->check_pres_work);
        }
index d2537383a3e89d181f8281f1d9c3508da8bdc39f..6a193cce2a754e18950555ea6ce9f2853c348a83 100644 (file)
@@ -560,6 +560,10 @@ static int nci_close_device(struct nci_dev *ndev)
        mutex_lock(&ndev->req_lock);
 
        if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
+               /* Need to flush the cmd wq in case
+                * there is a queued/running cmd_work
+                */
+               flush_workqueue(ndev->cmd_wq);
                del_timer_sync(&ndev->cmd_timer);
                del_timer_sync(&ndev->data_timer);
                mutex_unlock(&ndev->req_lock);
index 6055dc9a82aa0de28211ddc85b2291fb59bd7483..aa5e712adf0782f69827290911e6d9a8a9efe67f 100644 (file)
@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 
                skb_frag = nci_skb_alloc(ndev,
                                         (NCI_DATA_HDR_SIZE + frag_len),
-                                        GFP_KERNEL);
+                                        GFP_ATOMIC);
                if (skb_frag == NULL) {
                        rc = -ENOMEM;
                        goto free_exit;
index 19703a649b5a685c5e56294a928e5f7ff46d3eb8..78c4b6addf15aa79add61b4bd9e920c3a7e1e7de 100644 (file)
@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 
        i = 0;
        skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
-                           NCI_DATA_HDR_SIZE, GFP_KERNEL);
+                           NCI_DATA_HDR_SIZE, GFP_ATOMIC);
        if (!skb)
                return -ENOMEM;
 
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
                if (i < data_len) {
                        skb = nci_skb_alloc(ndev,
                                            conn_info->max_pkt_payload_len +
-                                           NCI_DATA_HDR_SIZE, GFP_KERNEL);
+                                           NCI_DATA_HDR_SIZE, GFP_ATOMIC);
                        if (!skb)
                                return -ENOMEM;
 
index f184b0db79d4026418ffa4fbf80bd6b38bdd3d0a..7c62417ccfd78a76937fa7cf7491b5556d5a1603 100644 (file)
@@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
        struct sk_buff *msg;
        void *hdr;
 
-       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!msg)
                return -ENOMEM;
 
@@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
 
        genlmsg_end(msg, hdr);
 
-       genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+       genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
 
        return 0;
 
index 7056cb1b8ba0f2f2bd930500eb1e0c934663c17a..1b5d73079dc9be13b85150246792ef7aee3455e2 100644 (file)
@@ -1051,7 +1051,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
        int rem = nla_len(attr);
        bool dont_clone_flow_key;
 
-       /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+       /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
        clone_arg = nla_data(attr);
        dont_clone_flow_key = nla_get_u32(clone_arg);
        actions = nla_next(clone_arg, &rem);
index cc282a58b75b9406caeb36c7807368cd5ea7c5f0..4c09cf8a0ab2dcbfe9a14cc2b41d02edad1cfef0 100644 (file)
@@ -2317,6 +2317,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
        return sfa;
 }
 
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+       const struct nlattr *a;
+       int rem;
+
+       nla_for_each_nested(a, action, rem) {
+               switch (nla_type(a)) {
+               case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+               case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+                       ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+                       break;
+               }
+       }
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+       const struct nlattr *a = nla_data(action);
+       int rem = nla_len(action);
+
+       switch (nla_type(a)) {
+       case OVS_CLONE_ATTR_EXEC:
+               /* The real list of actions follows this attribute. */
+               a = nla_next(a, &rem);
+               ovs_nla_free_nested_actions(a, rem);
+               break;
+       }
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+       const struct nlattr *a = nla_data(action);
+
+       switch (nla_type(a)) {
+       case OVS_DEC_TTL_ATTR_ACTION:
+               ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+               break;
+       }
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+       const struct nlattr *a = nla_data(action);
+       int rem = nla_len(action);
+
+       switch (nla_type(a)) {
+       case OVS_SAMPLE_ATTR_ARG:
+               /* The real list of actions follows this attribute. */
+               a = nla_next(a, &rem);
+               ovs_nla_free_nested_actions(a, rem);
+               break;
+       }
+}
+
 static void ovs_nla_free_set_action(const struct nlattr *a)
 {
        const struct nlattr *ovs_key = nla_data(a);
@@ -2330,25 +2386,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
        }
 }
 
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
 {
        const struct nlattr *a;
        int rem;
 
-       if (!sf_acts)
+       /* Whenever new actions are added, the need to update this
+        * function should be considered.
+        */
+       BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+
+       if (!actions)
                return;
 
-       nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+       nla_for_each_attr(a, actions, len, rem) {
                switch (nla_type(a)) {
-               case OVS_ACTION_ATTR_SET:
-                       ovs_nla_free_set_action(a);
+               case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+                       ovs_nla_free_check_pkt_len_action(a);
+                       break;
+
+               case OVS_ACTION_ATTR_CLONE:
+                       ovs_nla_free_clone_action(a);
                        break;
+
                case OVS_ACTION_ATTR_CT:
                        ovs_ct_free_action(a);
                        break;
+
+               case OVS_ACTION_ATTR_DEC_TTL:
+                       ovs_nla_free_dec_ttl_action(a);
+                       break;
+
+               case OVS_ACTION_ATTR_SAMPLE:
+                       ovs_nla_free_sample_action(a);
+                       break;
+
+               case OVS_ACTION_ATTR_SET:
+                       ovs_nla_free_set_action(a);
+                       break;
                }
        }
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+       if (!sf_acts)
+               return;
 
+       ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
        kfree(sf_acts);
 }
 
@@ -2380,7 +2465,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
        new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
 
        if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
-               if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+               if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
                        OVS_NLERR(log, "Flow action size exceeds max %u",
                                  MAX_ACTIONS_BUFSIZE);
                        return ERR_PTR(-EMSGSIZE);
@@ -3458,7 +3543,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
        if (!start)
                return -EMSGSIZE;
 
-       err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+       /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+       attr = nla_next(nla_data(attr), &rem);
+       err = ovs_nla_put_actions(attr, rem, skb);
 
        if (err)
                nla_nest_cancel(skb, start);
index c39c09899fd0e1c5e7572eda554752e76c481aff..002d2b9c69dd1f95f234fe38f87dbf3a9481b0da 100644 (file)
@@ -2858,8 +2858,9 @@ tpacket_error:
 
                status = TP_STATUS_SEND_REQUEST;
                err = po->xmit(skb);
-               if (unlikely(err > 0)) {
-                       err = net_xmit_errno(err);
+               if (unlikely(err != 0)) {
+                       if (err > 0)
+                               err = net_xmit_errno(err);
                        if (err && __packet_get_status(po, ph) ==
                                   TP_STATUS_AVAILABLE) {
                                /* skb was destructed already */
@@ -3060,8 +3061,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
                skb->no_fcs = 1;
 
        err = po->xmit(skb);
-       if (err > 0 && (err = net_xmit_errno(err)) != 0)
-               goto out_unlock;
+       if (unlikely(err != 0)) {
+               if (err > 0)
+                       err = net_xmit_errno(err);
+               if (err)
+                       goto out_unlock;
+       }
 
        dev_put(dev);
 
index 5327d130c4b5691e788bbbcb990a349d714ad8d4..73ee2771093d60253d3872cdd5379fac9ba8197e 100644 (file)
@@ -487,14 +487,27 @@ struct rds_tcp_net {
 /* All module specific customizations to the RDS-TCP socket should be done in
  * rds_tcp_tune() and applied after socket creation.
  */
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
 {
        struct sock *sk = sock->sk;
        struct net *net = sock_net(sk);
-       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct rds_tcp_net *rtn;
 
        tcp_sock_set_nodelay(sock->sk);
        lock_sock(sk);
+       /* TCP timer functions might access net namespace even after
+        * a process which created this net namespace terminated.
+        */
+       if (!sk->sk_net_refcnt) {
+               if (!maybe_get_net(net)) {
+                       release_sock(sk);
+                       return false;
+               }
+               sk->sk_net_refcnt = 1;
+               netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
+               sock_inuse_add(net, 1);
+       }
+       rtn = net_generic(net, rds_tcp_netid);
        if (rtn->sndbuf_size > 0) {
                sk->sk_sndbuf = rtn->sndbuf_size;
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -504,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
        }
        release_sock(sk);
+       return true;
 }
 
 static void rds_tcp_accept_worker(struct work_struct *work)
index dc8d745d68575f019ca96c706efc77125552a5d2..f8b5930d7b34369e6e7febde2b5cebd3dbc9bf62 100644 (file)
@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
 };
 
 /* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
index 5461d77fff4f43995a97ace920fb8ddd2005adb6..f0c477c5d1db4e355afc370b563652bad4b52905 100644 (file)
@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
        if (ret < 0)
                goto out;
 
-       rds_tcp_tune(sock);
+       if (!rds_tcp_tune(sock)) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        if (isv6) {
                sin6.sin6_family = AF_INET6;
index 09cadd556d1e188fde086f356718ab149d567632..7edf2e69d3fed61bc4ae410cd9be33532262f940 100644 (file)
@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
        __module_get(new_sock->ops->owner);
 
        rds_tcp_keepalive(new_sock);
-       rds_tcp_tune(new_sock);
+       if (!rds_tcp_tune(new_sock)) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        inet = inet_sk(new_sock->sk);
 
index a4111408ffd0c7e3964a5cebc31899eca6537b68..6a1611b0e3037425e7a92b4055d7f764602b9fc3 100644 (file)
@@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
               local, srx->transport_type, srx->transport.family);
 
        udp_conf.family = srx->transport.family;
+       udp_conf.use_udp_checksums = true;
        if (udp_conf.family == AF_INET) {
                udp_conf.local_ip = srx->transport.sin.sin_addr;
                udp_conf.local_udp_port = srx->transport.sin.sin_port;
@@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
        } else {
                udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
                udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+               udp_conf.use_udp6_tx_checksums = true;
+               udp_conf.use_udp6_rx_checksums = true;
 #endif
        }
        ret = udp_sock_create(net, &udp_conf, &local->socket);
index 25bbc4cc8b1359f7b895f181dad227de088ed31d..cc7e30733feb0d3f381269dfc4b9bcd5532b42ec 100644 (file)
@@ -115,6 +115,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
        rxnet->live = false;
        del_timer_sync(&rxnet->peer_keepalive_timer);
        cancel_work_sync(&rxnet->peer_keepalive_work);
+       /* Remove the timer again as the worker may have restarted it. */
+       del_timer_sync(&rxnet->peer_keepalive_timer);
        rxrpc_destroy_all_calls(rxnet);
        rxrpc_destroy_all_connections(rxnet);
        rxrpc_destroy_all_peers(rxnet);
index 31fcd279c17767a57f6190f623f3cef25f33cf99..211c757bfc3c44e713155585f37d458a6f569f14 100644 (file)
@@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        struct nlattr *pattr;
        struct tcf_pedit *p;
        int ret = 0, err;
-       int ksize;
+       int i, ksize;
        u32 index;
 
        if (!nla) {
@@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                p->tcfp_nkeys = parm->nkeys;
        }
        memcpy(p->tcfp_keys, parm->keys, ksize);
+       p->tcfp_off_max_hint = 0;
+       for (i = 0; i < p->tcfp_nkeys; ++i) {
+               u32 cur = p->tcfp_keys[i].off;
+
+               /* sanitize the shift value for any later use */
+               p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+                                             p->tcfp_keys[i].shift);
+
+               /* The AT option can read a single byte, we can bound the actual
+                * value with uchar max.
+                */
+               cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+               /* Each key touches 4 bytes starting from the computed offset */
+               p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+       }
 
        p->tcfp_flags = parm->flags;
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +324,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
                         struct tcf_result *res)
 {
        struct tcf_pedit *p = to_pedit(a);
+       u32 max_offset;
        int i;
 
-       if (skb_unclone(skb, GFP_ATOMIC))
-               return p->tcf_action;
-
        spin_lock(&p->tcf_lock);
 
+       max_offset = (skb_transport_header_was_set(skb) ?
+                     skb_transport_offset(skb) :
+                     skb_network_offset(skb)) +
+                    p->tcfp_off_max_hint;
+       if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+               goto unlock;
+
        tcf_lastuse_update(&p->tcf_tm);
 
        if (p->tcfp_nkeys > 0) {
@@ -403,6 +424,7 @@ bad:
        p->tcf_qstats.overlimits++;
 done:
        bstats_update(&p->tcf_bstats, skb);
+unlock:
        spin_unlock(&p->tcf_lock);
        return p->tcf_action;
 }
index 2957f8f5cea759315463d5e61fa1db745746e6f7..f0699f39afdb082067e581a5ff1ce217351c4a19 100644 (file)
@@ -1672,10 +1672,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain,
        if (chain->flushing)
                return -EAGAIN;
 
+       RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
        if (*chain_info->pprev == chain->filter_chain)
                tcf_chain0_head_change(chain, tp);
        tcf_proto_get(tp);
-       RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
        rcu_assign_pointer(*chain_info->pprev, tp);
 
        return 0;
index c80fc49c0da1c7c39093a5b81c5027919d73fdd4..ed5e6f08e74a86af659fcd7aa64d92a23584cae1 100644 (file)
@@ -1013,6 +1013,7 @@ static int fl_set_key_mpls(struct nlattr **tb,
 static void fl_set_key_vlan(struct nlattr **tb,
                            __be16 ethertype,
                            int vlan_id_key, int vlan_prio_key,
+                           int vlan_next_eth_type_key,
                            struct flow_dissector_key_vlan *key_val,
                            struct flow_dissector_key_vlan *key_mask)
 {
@@ -1031,6 +1032,11 @@ static void fl_set_key_vlan(struct nlattr **tb,
        }
        key_val->vlan_tpid = ethertype;
        key_mask->vlan_tpid = cpu_to_be16(~0);
+       if (tb[vlan_next_eth_type_key]) {
+               key_val->vlan_eth_type =
+                       nla_get_be16(tb[vlan_next_eth_type_key]);
+               key_mask->vlan_eth_type = cpu_to_be16(~0);
+       }
 }
 
 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -1602,8 +1608,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 
                if (eth_type_vlan(ethertype)) {
                        fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
-                                       TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
-                                       &mask->vlan);
+                                       TCA_FLOWER_KEY_VLAN_PRIO,
+                                       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                                       &key->vlan, &mask->vlan);
 
                        if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
                                ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
@@ -1611,6 +1618,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                                        fl_set_key_vlan(tb, ethertype,
                                                        TCA_FLOWER_KEY_CVLAN_ID,
                                                        TCA_FLOWER_KEY_CVLAN_PRIO,
+                                                       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
                                                        &key->cvlan, &mask->cvlan);
                                        fl_set_key_val(tb, &key->basic.n_proto,
                                                       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
@@ -3002,13 +3010,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
                goto nla_put_failure;
 
        if (mask->basic.n_proto) {
-               if (mask->cvlan.vlan_tpid) {
+               if (mask->cvlan.vlan_eth_type) {
                        if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
                                         key->basic.n_proto))
                                goto nla_put_failure;
-               } else if (mask->vlan.vlan_tpid) {
+               } else if (mask->vlan.vlan_eth_type) {
                        if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-                                        key->basic.n_proto))
+                                        key->vlan.vlan_eth_type))
                                goto nla_put_failure;
                }
        }
index cf5649292ee00941e5c4a4d5b11b1c3dc98cce3f..4d27300c287c46d11bf9d44f8c66eded9e734581 100644 (file)
@@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp)
        return 0;
 }
 
-static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+static void __u32_destroy_key(struct tc_u_knode *n)
 {
        struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
 
        tcf_exts_destroy(&n->exts);
-       tcf_exts_put_net(&n->exts);
        if (ht && --ht->refcnt == 0)
                kfree(ht);
+       kfree(n);
+}
+
+static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+{
+       tcf_exts_put_net(&n->exts);
 #ifdef CONFIG_CLS_U32_PERF
        if (free_pf)
                free_percpu(n->pf);
@@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
        if (free_pf)
                free_percpu(n->pcpu_success);
 #endif
-       kfree(n);
-       return 0;
+       __u32_destroy_key(n);
 }
 
 /* u32_delete_key_rcu should be called when free'ing a copied
@@ -811,10 +815,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
        new->flags = n->flags;
        RCU_INIT_POINTER(new->ht_down, ht);
 
-       /* bump reference count as long as we hold pointer to structure */
-       if (ht)
-               ht->refcnt++;
-
 #ifdef CONFIG_CLS_U32_PERF
        /* Statistics may be incremented by readers during update
         * so we must keep them in tact. When the node is later destroyed
@@ -836,6 +836,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
                return NULL;
        }
 
+       /* bump reference count as long as we hold pointer to structure */
+       if (ht)
+               ht->refcnt++;
+
        return new;
 }
 
@@ -900,13 +904,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                                    extack);
 
                if (err) {
-                       u32_destroy_key(new, false);
+                       __u32_destroy_key(new);
                        return err;
                }
 
                err = u32_replace_hw_knode(tp, new, flags, extack);
                if (err) {
-                       u32_destroy_key(new, false);
+                       __u32_destroy_key(new);
                        return err;
                }
 
index 377f896bdedc4575316c3bab1d661d9c88142ec0..b9c71a304d399db0a31efa62ce64622163eb23a1 100644 (file)
@@ -417,7 +417,8 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
 {
        struct taprio_sched *q = qdisc_priv(sch);
 
-       if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+       /* sk_flags are only safe to use on full sockets. */
+       if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
                if (!is_valid_interval(skb, sch))
                        return qdisc_drop(skb, sch, to_free);
        } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
index a18609f608fb786b2532a4febbd72a9737ab906c..e213aaf45d67c61edbd22abc8be6cd4a197a9ed8 100644 (file)
@@ -914,6 +914,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
                                ctx->asoc->base.sk->sk_err = -error;
                                return;
                        }
+                       ctx->asoc->stats.octrlchunks++;
                        break;
 
                case SCTP_CID_ABORT:
@@ -938,7 +939,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
 
                case SCTP_CID_HEARTBEAT:
                        if (chunk->pmtu_probe) {
-                               sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+                               error = sctp_packet_singleton(ctx->transport,
+                                                             chunk, ctx->gfp);
+                               if (!error)
+                                       ctx->asoc->stats.octrlchunks++;
                                break;
                        }
                        fallthrough;
index b3815b568e8e5cfbf51a20d7358566462b0867bd..463c4a58d2c36d2a975c9e771ead62bd8460a376 100644 (file)
@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
                goto out_unlock;
        }
 
+       /* This happens when the response arrives after the timer is triggered. */
+       if (!asoc->strreset_chunk)
+               goto out_unlock;
+
        error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
                           SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
                           asoc->state, asoc->ep, asoc,
index 7f342bc127358d1d6bb02a6751568afad3d7be35..52edee1322fc36b0400569f8e6de2133adf6cd74 100644 (file)
@@ -781,7 +781,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
                }
        }
 
-       if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+       if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) {
                sctp_association_free(new_asoc);
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
        }
@@ -932,7 +932,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 
        /* Set peer label for connection. */
        if (security_sctp_assoc_established((struct sctp_association *)asoc,
-                                           chunk->skb))
+                                           chunk->head_skb ?: chunk->skb))
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
        /* Verify that the chunk length for the COOKIE-ACK is OK.
@@ -2262,7 +2262,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
        }
 
        /* Update socket peer label if first association. */
-       if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+       if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) {
                sctp_association_free(new_asoc);
                return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
        }
index 3e1a9600be5e1a6284ebef1df39d41fe2b3bf6b4..7b0427658056d480dcafd7f0d040a267225f51c8 100644 (file)
@@ -5636,7 +5636,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
         * Set the daddr and initialize id to something more random and also
         * copy over any ip options.
         */
-       sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+       sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
        sp->pf->copy_ip_options(sk, sock->sk);
 
        /* Populate the fields of the newsk from the oldsk and migrate the
index f0d118e9f155178e80f78d3e16d36126873d16dd..fce16b9d6e1a4bfaa0ee4ad23ada9a3d62bcce1a 100644 (file)
@@ -121,6 +121,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
                                          bool *own_req)
 {
        struct smc_sock *smc;
+       struct sock *child;
 
        smc = smc_clcsock_user_data(sk);
 
@@ -134,8 +135,17 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
        }
 
        /* passthrough to original syn recv sock fct */
-       return smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
-                                             own_req);
+       child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
+                                              own_req);
+       /* child must not inherit smc or its ops */
+       if (child) {
+               rcu_assign_sk_user_data(child, NULL);
+
+               /* v4-mapped sockets don't inherit parent ops. Don't restore. */
+               if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
+                       inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
+       }
+       return child;
 
 drop:
        dst_release(dst);
@@ -233,11 +243,27 @@ struct proto smc_proto6 = {
 };
 EXPORT_SYMBOL_GPL(smc_proto6);
 
+static void smc_fback_restore_callbacks(struct smc_sock *smc)
+{
+       struct sock *clcsk = smc->clcsock->sk;
+
+       write_lock_bh(&clcsk->sk_callback_lock);
+       clcsk->sk_user_data = NULL;
+
+       smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
+       smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
+       smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
+       smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
+
+       write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
 static void smc_restore_fallback_changes(struct smc_sock *smc)
 {
        if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
                smc->clcsock->file->private_data = smc->sk.sk_socket;
                smc->clcsock->file = NULL;
+               smc_fback_restore_callbacks(smc);
        }
 }
 
@@ -363,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
        sk->sk_prot->hash(sk);
        sk_refcnt_debug_inc(sk);
        mutex_init(&smc->clcsock_release_lock);
+       smc_init_saved_callbacks(smc);
 
        return sk;
 }
@@ -734,47 +761,73 @@ out:
 
 static void smc_fback_state_change(struct sock *clcsk)
 {
-       struct smc_sock *smc =
-               smc_clcsock_user_data(clcsk);
+       struct smc_sock *smc;
 
-       if (!smc)
-               return;
-       smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+       read_lock_bh(&clcsk->sk_callback_lock);
+       smc = smc_clcsock_user_data(clcsk);
+       if (smc)
+               smc_fback_forward_wakeup(smc, clcsk,
+                                        smc->clcsk_state_change);
+       read_unlock_bh(&clcsk->sk_callback_lock);
 }
 
 static void smc_fback_data_ready(struct sock *clcsk)
 {
-       struct smc_sock *smc =
-               smc_clcsock_user_data(clcsk);
+       struct smc_sock *smc;
 
-       if (!smc)
-               return;
-       smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+       read_lock_bh(&clcsk->sk_callback_lock);
+       smc = smc_clcsock_user_data(clcsk);
+       if (smc)
+               smc_fback_forward_wakeup(smc, clcsk,
+                                        smc->clcsk_data_ready);
+       read_unlock_bh(&clcsk->sk_callback_lock);
 }
 
 static void smc_fback_write_space(struct sock *clcsk)
 {
-       struct smc_sock *smc =
-               smc_clcsock_user_data(clcsk);
+       struct smc_sock *smc;
 
-       if (!smc)
-               return;
-       smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+       read_lock_bh(&clcsk->sk_callback_lock);
+       smc = smc_clcsock_user_data(clcsk);
+       if (smc)
+               smc_fback_forward_wakeup(smc, clcsk,
+                                        smc->clcsk_write_space);
+       read_unlock_bh(&clcsk->sk_callback_lock);
 }
 
 static void smc_fback_error_report(struct sock *clcsk)
 {
-       struct smc_sock *smc =
-               smc_clcsock_user_data(clcsk);
+       struct smc_sock *smc;
 
-       if (!smc)
-               return;
-       smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+       read_lock_bh(&clcsk->sk_callback_lock);
+       smc = smc_clcsock_user_data(clcsk);
+       if (smc)
+               smc_fback_forward_wakeup(smc, clcsk,
+                                        smc->clcsk_error_report);
+       read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_replace_callbacks(struct smc_sock *smc)
+{
+       struct sock *clcsk = smc->clcsock->sk;
+
+       write_lock_bh(&clcsk->sk_callback_lock);
+       clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+
+       smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
+                              &smc->clcsk_state_change);
+       smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
+                              &smc->clcsk_data_ready);
+       smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
+                              &smc->clcsk_write_space);
+       smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
+                              &smc->clcsk_error_report);
+
+       write_unlock_bh(&clcsk->sk_callback_lock);
 }
 
 static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
 {
-       struct sock *clcsk;
        int rc = 0;
 
        mutex_lock(&smc->clcsock_release_lock);
@@ -782,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
                rc = -EBADF;
                goto out;
        }
-       clcsk = smc->clcsock->sk;
 
-       if (smc->use_fallback)
-               goto out;
        smc->use_fallback = true;
        smc->fallback_rsn = reason_code;
        smc_stat_fallback(smc);
@@ -800,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
                 * in smc sk->sk_wq and they should be woken up
                 * as clcsock's wait queue is woken up.
                 */
-               smc->clcsk_state_change = clcsk->sk_state_change;
-               smc->clcsk_data_ready = clcsk->sk_data_ready;
-               smc->clcsk_write_space = clcsk->sk_write_space;
-               smc->clcsk_error_report = clcsk->sk_error_report;
-
-               clcsk->sk_state_change = smc_fback_state_change;
-               clcsk->sk_data_ready = smc_fback_data_ready;
-               clcsk->sk_write_space = smc_fback_write_space;
-               clcsk->sk_error_report = smc_fback_error_report;
-
-               smc->clcsock->sk->sk_user_data =
-                       (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+               smc_fback_replace_callbacks(smc);
        }
 out:
        mutex_unlock(&smc->clcsock_release_lock);
@@ -1465,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work)
                smc->sk.sk_state = SMC_CLOSED;
                if (rc == -EPIPE || rc == -EAGAIN)
                        smc->sk.sk_err = EPIPE;
+               else if (rc == -ECONNREFUSED)
+                       smc->sk.sk_err = ECONNREFUSED;
                else if (signal_pending(current))
                        smc->sk.sk_err = -sock_intr_errno(timeo);
                sock_put(&smc->sk); /* passive closing */
@@ -1584,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
         * function; switch it back to the original sk_data_ready function
         */
        new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
+
+       /* if new clcsock has also inherited the fallback-specific callback
+        * functions, switch them back to the original ones.
+        */
+       if (lsmc->use_fallback) {
+               if (lsmc->clcsk_state_change)
+                       new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
+               if (lsmc->clcsk_write_space)
+                       new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
+               if (lsmc->clcsk_error_report)
+                       new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
+       }
+
        (*new_smc)->clcsock = new_clcsock;
 out:
        return rc;
@@ -2343,17 +2397,20 @@ out:
 
 static void smc_clcsock_data_ready(struct sock *listen_clcsock)
 {
-       struct smc_sock *lsmc =
-               smc_clcsock_user_data(listen_clcsock);
+       struct smc_sock *lsmc;
 
+       read_lock_bh(&listen_clcsock->sk_callback_lock);
+       lsmc = smc_clcsock_user_data(listen_clcsock);
        if (!lsmc)
-               return;
+               goto out;
        lsmc->clcsk_data_ready(listen_clcsock);
        if (lsmc->sk.sk_state == SMC_LISTEN) {
                sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
                if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
                        sock_put(&lsmc->sk);
        }
+out:
+       read_unlock_bh(&listen_clcsock->sk_callback_lock);
 }
 
 static int smc_listen(struct socket *sock, int backlog)
@@ -2385,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog)
        /* save original sk_data_ready function and establish
         * smc-specific sk_data_ready function
         */
-       smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
-       smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
+       write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
        smc->clcsock->sk->sk_user_data =
                (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+       smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
+                              smc_clcsock_data_ready, &smc->clcsk_data_ready);
+       write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
 
        /* save original ops */
        smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
@@ -2403,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog)
 
        rc = kernel_listen(smc->clcsock, backlog);
        if (rc) {
-               smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+               write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+               smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+                                      &smc->clcsk_data_ready);
+               smc->clcsock->sk->sk_user_data = NULL;
+               write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
                goto out;
        }
        sk->sk_max_ack_backlog = backlog;
@@ -2664,8 +2727,10 @@ static int smc_shutdown(struct socket *sock, int how)
        if (smc->use_fallback) {
                rc = kernel_sock_shutdown(smc->clcsock, how);
                sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
-               if (sk->sk_shutdown == SHUTDOWN_MASK)
+               if (sk->sk_shutdown == SHUTDOWN_MASK) {
                        sk->sk_state = SMC_CLOSED;
+                       sock_put(sk);
+               }
                goto out;
        }
        switch (how) {
index ea0620529ebea4b5e5fccf4fdc1da60059894f9d..5ed765ea0c731a7f0095cd6a99a0e42d227eaca9 100644 (file)
@@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
        return (struct smc_sock *)sk;
 }
 
+static inline void smc_init_saved_callbacks(struct smc_sock *smc)
+{
+       smc->clcsk_state_change = NULL;
+       smc->clcsk_data_ready   = NULL;
+       smc->clcsk_write_space  = NULL;
+       smc->clcsk_error_report = NULL;
+}
+
 static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
 {
        return (struct smc_sock *)
               ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
 }
 
+/* save target_cb in saved_cb, and replace target_cb with new_cb */
+static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
+                                         void (*new_cb)(struct sock *),
+                                         void (**saved_cb)(struct sock *))
+{
+       /* only save once */
+       if (!*saved_cb)
+               *saved_cb = *target_cb;
+       *target_cb = new_cb;
+}
+
+/* restore target_cb to saved_cb, and reset saved_cb to NULL */
+static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
+                                         void (**saved_cb)(struct sock *))
+{
+       if (!*saved_cb)
+               return;
+       *target_cb = *saved_cb;
+       *saved_cb = NULL;
+}
+
 extern struct workqueue_struct *smc_hs_wq;     /* wq for handshake work */
 extern struct workqueue_struct *smc_close_wq;  /* wq for close work */
 
index ce27399b38b1ec2dc240a746af93fe86c18dbedf..f9f3f59c79de21e81c9d75ddc49e29af2bf3b52d 100644 (file)
@@ -191,7 +191,8 @@ static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq,
                          flags, SMC_NETLINK_DUMP_UEID);
        if (!hdr)
                return -ENOMEM;
-       snprintf(ueid_str, sizeof(ueid_str), "%s", ueid);
+       memcpy(ueid_str, ueid, SMC_MAX_EID_LEN);
+       ueid_str[SMC_MAX_EID_LEN] = 0;
        if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) {
                genlmsg_cancel(skb, hdr);
                return -EMSGSIZE;
@@ -252,7 +253,8 @@ int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb)
                goto end;
 
        smc_ism_get_system_eid(&seid);
-       snprintf(seid_str, sizeof(seid_str), "%s", seid);
+       memcpy(seid_str, seid, SMC_MAX_EID_LEN);
+       seid_str[SMC_MAX_EID_LEN] = 0;
        if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str))
                goto err;
        read_lock(&smc_clc_eid_table.lock);
index 676cb2333d3c4b98f539eac4e4d69d71b4faf773..31db7438857c9f81fd336bfce74c5c92b7d275c6 100644 (file)
@@ -214,8 +214,11 @@ again:
                sk->sk_state = SMC_CLOSED;
                sk->sk_state_change(sk); /* wake up accept */
                if (smc->clcsock && smc->clcsock->sk) {
-                       smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+                       write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+                       smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+                                              &smc->clcsk_data_ready);
                        smc->clcsock->sk->sk_user_data = NULL;
+                       write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
                        rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
                }
                smc_close_cleanup_listen(sk);
index 7984f88834720c7b26fb350579c55bc6bfc367c6..7055ed10e316271b665abddecc660d3b8ebdf8ad 100644 (file)
@@ -311,8 +311,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
                if (!strncmp(ibdev->ibdev->name, ib_name,
                             sizeof(ibdev->ibdev->name)) ||
-                   !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
-                            IB_DEVICE_NAME_MAX - 1)) {
+                   (ibdev->ibdev->dev.parent &&
+                    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+                            IB_DEVICE_NAME_MAX - 1))) {
                        goto out;
                }
        }
index 51e8eb2933ff47210172cbc3d607be0f32f61a3b..338b9ef806e8202c7b41a3294cf5e8ce72e4f9f8 100644 (file)
@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
                                }
                                break;
                        }
+                       if (!timeo)
+                               return -EAGAIN;
                        if (signal_pending(current)) {
                                read_done = sock_intr_errno(timeo);
                                break;
                        }
-                       if (!timeo)
-                               return -EAGAIN;
                }
 
                if (!smc_rx_data_available(conn)) {
index 6887840682bb7eee52cd59c02fb01dd94c2899a8..bb6a1a12fbde1824f90abbb8152e7d604c90eb9c 100644 (file)
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
 struct socket *sock_from_file(struct file *file)
 {
        if (file->f_op == &socket_file_ops)
-               return file->private_data;      /* set in sock_map_fd */
+               return file->private_data;      /* set in sock_alloc_file */
 
        return NULL;
 }
@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
 }
 EXPORT_SYMBOL(sock_create_kern);
 
-int __sys_socket(int family, int type, int protocol)
+static struct socket *__sys_socket_create(int family, int type, int protocol)
 {
-       int retval;
        struct socket *sock;
-       int flags;
+       int retval;
 
        /* Check the SOCK_* constants for consistency.  */
        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
 
-       flags = type & ~SOCK_TYPE_MASK;
-       if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-               return -EINVAL;
+       if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return ERR_PTR(-EINVAL);
        type &= SOCK_TYPE_MASK;
 
+       retval = sock_create(family, type, protocol, &sock);
+       if (retval < 0)
+               return ERR_PTR(retval);
+
+       return sock;
+}
+
+struct file *__sys_socket_file(int family, int type, int protocol)
+{
+       struct socket *sock;
+       struct file *file;
+       int flags;
+
+       sock = __sys_socket_create(family, type, protocol);
+       if (IS_ERR(sock))
+               return ERR_CAST(sock);
+
+       flags = type & ~SOCK_TYPE_MASK;
        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
-       retval = sock_create(family, type, protocol, &sock);
-       if (retval < 0)
-               return retval;
+       file = sock_alloc_file(sock, flags, NULL);
+       if (IS_ERR(file))
+               sock_release(sock);
+
+       return file;
+}
+
+int __sys_socket(int family, int type, int protocol)
+{
+       struct socket *sock;
+       int flags;
+
+       sock = __sys_socket_create(family, type, protocol);
+       if (IS_ERR(sock))
+               return PTR_ERR(sock);
+
+       flags = type & ~SOCK_TYPE_MASK;
+       if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+               flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
        return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 }
index 61c276bddaf2531c20afa9eb83261dc25c89690a..f549e4c05defcbc3bfc536aaac3d1e1c23580eaa 100644 (file)
@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
                 * done without the correct namespace:
                 */
                .flags          = RPC_CLNT_CREATE_NOPING |
+                                 RPC_CLNT_CREATE_CONNECTED |
                                  RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
        };
        struct rpc_clnt *clnt;
index 8bf2af8546d2fb886808d149d69bc90a95cd9b06..e2c6eca0271b364c05c4136a9e9ea4eb1e23602b 100644 (file)
@@ -76,6 +76,7 @@ static int    rpc_encode_header(struct rpc_task *task,
 static int     rpc_decode_header(struct rpc_task *task,
                                  struct xdr_stream *xdr);
 static int     rpc_ping(struct rpc_clnt *clnt);
+static int     rpc_ping_noreply(struct rpc_clnt *clnt);
 static void    rpc_check_timeout(struct rpc_task *task);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
                        rpc_shutdown_client(clnt);
                        return ERR_PTR(err);
                }
+       } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+               int err = rpc_ping_noreply(clnt);
+               if (err != 0) {
+                       rpc_shutdown_client(clnt);
+                       return ERR_PTR(err);
+               }
        }
 
        clnt->cl_softrtry = 1;
@@ -1065,10 +1072,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
 static
 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
 {
-       if (task->tk_xprt &&
-                       !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
-                        (task->tk_flags & RPC_TASK_MOVEABLE)))
-               return;
+       if (task->tk_xprt) {
+               if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+                     (task->tk_flags & RPC_TASK_MOVEABLE)))
+                       return;
+               xprt_release(task);
+               xprt_put(task->tk_xprt);
+       }
        if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
                task->tk_xprt = rpc_task_get_first_xprt(clnt);
        else
@@ -1127,6 +1137,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
        struct rpc_task *task;
 
        task = rpc_new_task(task_setup_data);
+       if (IS_ERR(task))
+               return task;
 
        if (!RPC_IS_ASYNC(task))
                task->tk_flags |= RPC_TASK_CRED_NOREF;
@@ -1227,6 +1239,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
         * Create an rpc_task to send the data
         */
        task = rpc_new_task(&task_setup_data);
+       if (IS_ERR(task)) {
+               xprt_free_bc_request(req);
+               return task;
+       }
+
        xprt_init_bc_request(req, task);
 
        task->tk_action = call_bc_encode;
@@ -1858,6 +1875,9 @@ call_encode(struct rpc_task *task)
        xprt_request_dequeue_xprt(task);
        /* Encode here so that rpcsec_gss can use correct sequence number. */
        rpc_xdr_encode(task);
+       /* Add task to reply queue before transmission to avoid races */
+       if (task->tk_status == 0 && rpc_reply_expected(task))
+               task->tk_status = xprt_request_enqueue_receive(task);
        /* Did the encode result in an error condition? */
        if (task->tk_status != 0) {
                /* Was the error nonfatal? */
@@ -1881,9 +1901,6 @@ call_encode(struct rpc_task *task)
                return;
        }
 
-       /* Add task to reply queue before transmission to avoid races */
-       if (rpc_reply_expected(task))
-               xprt_request_enqueue_receive(task);
        xprt_request_enqueue_transmit(task);
 out:
        task->tk_action = call_transmit;
@@ -2200,6 +2217,7 @@ call_transmit_status(struct rpc_task *task)
                 * socket just returned a connection error,
                 * then hold onto the transport lock.
                 */
+       case -ENOMEM:
        case -ENOBUFS:
                rpc_delay(task, HZ>>2);
                fallthrough;
@@ -2283,6 +2301,7 @@ call_bc_transmit_status(struct rpc_task *task)
        case -ENOTCONN:
        case -EPIPE:
                break;
+       case -ENOMEM:
        case -ENOBUFS:
                rpc_delay(task, HZ>>2);
                fallthrough;
@@ -2365,6 +2384,11 @@ call_status(struct rpc_task *task)
        case -EPIPE:
        case -EAGAIN:
                break;
+       case -ENFILE:
+       case -ENOBUFS:
+       case -ENOMEM:
+               rpc_delay(task, HZ>>2);
+               break;
        case -EIO:
                /* shutdown or soft timeout */
                goto out_exit;
@@ -2692,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = {
        .p_decode = rpcproc_decode_null,
 };
 
+static const struct rpc_procinfo rpcproc_null_noreply = {
+       .p_encode = rpcproc_encode_null,
+};
+
 static void
 rpc_null_call_prepare(struct rpc_task *task, void *data)
 {
@@ -2745,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
        return status;
 }
 
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+       struct rpc_message msg = {
+               .rpc_proc = &rpcproc_null_noreply,
+       };
+       struct rpc_task_setup task_setup_data = {
+               .rpc_client = clnt,
+               .rpc_message = &msg,
+               .callback_ops = &rpc_null_ops,
+               .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+       };
+       struct rpc_task *task;
+       int status;
+
+       task = rpc_run_task(&task_setup_data);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       status = task->tk_status;
+       rpc_put_task(task);
+       return status;
+}
+
 struct rpc_cb_add_xprt_calldata {
        struct rpc_xprt_switch *xps;
        struct rpc_xprt *xprt;
index b258b87a3ec22c54bc2f815e7db27b8d7eacf5d6..7f70c1e608b7ce4f07f3651e70a764050c292dc5 100644 (file)
@@ -1128,6 +1128,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 
        if (task == NULL) {
                task = rpc_alloc_task();
+               if (task == NULL) {
+                       rpc_release_calldata(setup_data->callback_ops,
+                                            setup_data->callback_data);
+                       return ERR_PTR(-ENOMEM);
+               }
                flags = RPC_TASK_DYNAMIC;
        }
 
index 05b38bf68316a5aec2045fafb77b338e36f0c4ff..71ba4cf513bcef3c50220db640b4ab35e365925f 100644 (file)
@@ -221,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
 static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
                              struct xdr_buf *xdr, size_t base)
 {
-       int err;
-
-       err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
-       if (err < 0)
-               return err;
-
        iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
                      xdr->page_len + xdr->page_base);
        return xprt_sendmsg(sock, msg, base + xdr->page_base);
index 297c498550383f8d67d634921595f40ab9ec78c1..5b59e2103526ebbb15eb59cd0c1b41948d27a70f 100644 (file)
@@ -1231,6 +1231,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
                dr->daddr = rqstp->rq_daddr;
                dr->argslen = rqstp->rq_arg.len >> 2;
                dr->xprt_hlen = rqstp->rq_xprt_hlen;
+               dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
+               rqstp->rq_xprt_ctxt = NULL;
 
                /* back up head to the start of the buffer and copy */
                skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
@@ -1269,6 +1271,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
        rqstp->rq_xprt_hlen   = dr->xprt_hlen;
        rqstp->rq_daddr       = dr->daddr;
        rqstp->rq_respages    = rqstp->rq_pages;
+       rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
        svc_xprt_received(rqstp->rq_xprt);
        return (dr->argslen<<2) - dr->xprt_hlen;
 }
index 478f857cdaed4548e7c181b3a6cc41ecf5a1425b..cc35ec4334006749f1bb09290a6528e1b8aabf0f 100644 (file)
@@ -579,15 +579,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
        if (svc_xprt_is_dead(xprt))
                goto out_notconn;
 
+       err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+       if (err < 0)
+               goto out_unlock;
+
        err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
-       xdr_free_bvec(xdr);
        if (err == -ECONNREFUSED) {
                /* ICMP error on earlier request. */
                err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
-               xdr_free_bvec(xdr);
        }
+       xdr_free_bvec(xdr);
        trace_svcsock_udp_send(xprt, err);
-
+out_unlock:
        mutex_unlock(&xprt->xpt_mutex);
        if (err < 0)
                return err;
@@ -1096,7 +1099,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
        int ret;
 
        *sentp = 0;
-       xdr_alloc_bvec(xdr, GFP_KERNEL);
+       ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+       if (ret < 0)
+               return ret;
 
        ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
        if (ret < 0)
index 515501f79290984bfd6ea9f659000d3264860145..86d62cffba0dd178d9c9c2529197d2f171c192de 100644 (file)
 /*
  * Local functions
  */
-static void     xprt_init(struct rpc_xprt *xprt, struct net *net);
+static void    xprt_init(struct rpc_xprt *xprt, struct net *net);
 static __be32  xprt_alloc_xid(struct rpc_xprt *xprt);
-static void     xprt_destroy(struct rpc_xprt *xprt);
-static void     xprt_request_init(struct rpc_task *task);
+static void    xprt_destroy(struct rpc_xprt *xprt);
+static void    xprt_request_init(struct rpc_task *task);
+static int     xprt_request_prepare(struct rpc_rqst *req);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
@@ -929,12 +930,7 @@ void xprt_connect(struct rpc_task *task)
        if (!xprt_lock_write(xprt, task))
                return;
 
-       if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
-               trace_xprt_disconnect_cleanup(xprt);
-               xprt->ops->close(xprt);
-       }
-
-       if (!xprt_connected(xprt)) {
+       if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
                task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
                rpc_sleep_on_timeout(&xprt->pending, task, NULL,
                                xprt_request_timeout(task->tk_rqstp));
@@ -1143,16 +1139,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
  * @task: RPC task
  *
  */
-void
+int
 xprt_request_enqueue_receive(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
+       int ret;
 
        if (!xprt_request_need_enqueue_receive(task, req))
-               return;
+               return 0;
 
-       xprt_request_prepare(task->tk_rqstp);
+       ret = xprt_request_prepare(task->tk_rqstp);
+       if (ret)
+               return ret;
        spin_lock(&xprt->queue_lock);
 
        /* Update the softirq receive buffer */
@@ -1166,6 +1165,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
 
        /* Turn off autodisconnect */
        del_singleshot_timer_sync(&xprt->timer);
+       return 0;
 }
 
 /**
@@ -1452,14 +1452,16 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
  *
  * Calls into the transport layer to do whatever is needed to prepare
  * the request for transmission or receive.
+ * Returns error, or zero.
  */
-void
+static int
 xprt_request_prepare(struct rpc_rqst *req)
 {
        struct rpc_xprt *xprt = req->rq_xprt;
 
        if (xprt->ops->prepare_request)
-               xprt->ops->prepare_request(req);
+               return xprt->ops->prepare_request(req);
+       return 0;
 }
 
 /**
index cf76a6ad127b26603d0c07ea1a8845dd80527577..864131a9fc6e38e99758e84c579f4d0e87bf7b74 100644 (file)
@@ -831,7 +831,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                goto out_err;
        if (ret == 0)
                goto out_drop;
-       rqstp->rq_xprt_hlen = ret;
+       rqstp->rq_xprt_hlen = 0;
 
        if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
                goto out_backchannel;
index 78af7518f263ddcd9c6726698d802367aafa34d6..650102a9c86a9043a3eee8547e0031dcb1f363cb 100644 (file)
@@ -822,12 +822,17 @@ static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait)
        return ret;
 }
 
-static void
+static int
 xs_stream_prepare_request(struct rpc_rqst *req)
 {
+       gfp_t gfp = rpc_task_gfp_mask();
+       int ret;
+
+       ret = xdr_alloc_bvec(&req->rq_snd_buf, gfp);
+       if (ret < 0)
+               return ret;
        xdr_free_bvec(&req->rq_rcv_buf);
-       req->rq_task->tk_status = xdr_alloc_bvec(
-               &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+       return xdr_alloc_bvec(&req->rq_rcv_buf, gfp);
 }
 
 /*
@@ -879,7 +884,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
 
        /* Close the stream if the previous transmission was incomplete */
        if (xs_send_request_was_aborted(transport, req)) {
-               xs_close(xprt);
+               xprt_force_disconnect(xprt);
                return -ENOTCONN;
        }
 
@@ -915,7 +920,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
                        -status);
                fallthrough;
        case -EPIPE:
-               xs_close(xprt);
+               xprt_force_disconnect(xprt);
                status = -ENOTCONN;
        }
 
@@ -956,6 +961,9 @@ static int xs_udp_send_request(struct rpc_rqst *req)
        if (!xprt_request_get_cong(xprt, req))
                return -EBADSLT;
 
+       status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+       if (status < 0)
+               return status;
        req->rq_xtime = ktime_get();
        status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
 
@@ -1185,6 +1193,16 @@ static void xs_reset_transport(struct sock_xprt *transport)
 
        if (sk == NULL)
                return;
+       /*
+        * Make sure we're calling this in a context from which it is safe
+        * to call __fput_sync(). In practice that means rpciod and the
+        * system workqueue.
+        */
+       if (!(current->flags & PF_WQ_WORKER)) {
+               WARN_ON_ONCE(1);
+               set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+               return;
+       }
 
        if (atomic_read(&transport->xprt.swapper))
                sk_clear_memalloc(sk);
@@ -1208,7 +1226,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
        mutex_unlock(&transport->recv_mutex);
 
        trace_rpc_socket_close(xprt, sock);
-       fput(filp);
+       __fput_sync(filp);
 
        xprt_disconnect_done(xprt);
 }
@@ -1400,6 +1418,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
+/**
+ * xs_local_state_change - callback to handle AF_LOCAL socket state changes
+ * @sk: socket whose state has changed
+ *
+ */
+static void xs_local_state_change(struct sock *sk)
+{
+       struct rpc_xprt *xprt;
+       struct sock_xprt *transport;
+
+       if (!(xprt = xprt_from_sock(sk)))
+               return;
+       transport = container_of(xprt, struct sock_xprt, xprt);
+       if (sk->sk_shutdown & SHUTDOWN_MASK) {
+               clear_bit(XPRT_CONNECTED, &xprt->state);
+               /* Trigger the socket release */
+               xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
+       }
+}
+
 /**
  * xs_tcp_state_change - callback to handle TCP socket state changes
  * @sk: socket whose state has changed
@@ -1848,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
                sk->sk_user_data = xprt;
                sk->sk_data_ready = xs_data_ready;
                sk->sk_write_space = xs_udp_write_space;
+               sk->sk_state_change = xs_local_state_change;
                sk->sk_error_report = xs_error_report;
 
                xprt_clear_connected(xprt);
@@ -1932,6 +1971,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        int ret;
 
+       if (transport->file)
+               goto force_disconnect;
+
        if (RPC_IS_ASYNC(task)) {
                /*
                 * We want the AF_LOCAL connect to be resolved in the
@@ -1944,11 +1986,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                 */
                task->tk_rpc_status = -ENOTCONN;
                rpc_exit(task, -ENOTCONN);
-               return;
+               goto out_wake;
        }
        ret = xs_local_setup_socket(transport);
        if (ret && !RPC_IS_SOFTCONN(task))
                msleep_interruptible(15000);
+       return;
+force_disconnect:
+       xprt_force_disconnect(xprt);
+out_wake:
+       xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, -ENOTCONN);
 }
 
 #if IS_ENABLED(CONFIG_SUNRPC_SWAP)
@@ -2544,6 +2592,9 @@ static int bc_sendto(struct rpc_rqst *req)
        int err;
 
        req->rq_xtime = ktime_get();
+       err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+       if (err < 0)
+               return err;
        err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
        xdr_free_bvec(xdr);
        if (err < 0 || sent != (xdr->len + sizeof(marker)))
@@ -2824,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
                }
                xprt_set_bound(xprt);
                xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
-               ret = ERR_PTR(xs_local_setup_socket(transport));
-               if (ret)
-                       goto out_err;
                break;
        default:
                ret = ERR_PTR(-EAFNOSUPPORT);
index 12f7b56771d9a5a6cf7700fbc37195b2374ef796..3919fe2c58c5c22926a3f4fe31b1438590cc505c 100644 (file)
@@ -483,11 +483,13 @@ handle_error:
                copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
                copy = min_t(size_t, copy, (max_open_record_len - record->len));
 
-               rc = tls_device_copy_data(page_address(pfrag->page) +
-                                         pfrag->offset, copy, msg_iter);
-               if (rc)
-                       goto handle_error;
-               tls_append_frag(record, pfrag, copy);
+               if (copy) {
+                       rc = tls_device_copy_data(page_address(pfrag->page) +
+                                                 pfrag->offset, copy, msg_iter);
+                       if (rc)
+                               goto handle_error;
+                       tls_append_frag(record, pfrag, copy);
+               }
 
                size -= copy;
                if (!size) {
@@ -1345,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev)
 
                /* Device contexts for RX and TX will be freed in on sk_destruct
                 * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+                * Now release the ref taken above.
                 */
+               if (refcount_dec_and_test(&ctx->refcount))
+                       tls_device_free_ctx(ctx);
        }
 
        up_write(&device_offload_lock);
index 0024a692f0f8e25f51c24e1899267f49e5a734ff..a8976ef95528e19e088d7ce516cfb2d9a6db748f 100644 (file)
@@ -1496,7 +1496,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
        if (prot->version == TLS_1_3_VERSION ||
            prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
                memcpy(iv + iv_offset, tls_ctx->rx.iv,
-                      crypto_aead_ivsize(ctx->aead_recv));
+                      prot->iv_size + prot->salt_size);
        else
                memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
 
index ee1c2b6b69711b97206a34aae86cb47acea93e54..1a3551b6d18bb6ceec45c75ade76e1b7a51d9277 100644 (file)
@@ -528,7 +528,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
                                   .len = IEEE80211_MAX_MESH_ID_LEN },
        [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
 
-       [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
+       /* allow 3 for NUL-termination, we used to declare this NLA_STRING */
+       [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3),
        [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
 
        [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
@@ -3172,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
        } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
                chandef->width =
                        nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+               if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+                       /* User input error for channel width doesn't match channel  */
+                       if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+                               NL_SET_ERR_MSG_ATTR(extack,
+                                                   attrs[NL80211_ATTR_CHANNEL_WIDTH],
+                                                   "bad channel width");
+                               return -EINVAL;
+                       }
+               }
                if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
                        chandef->center_freq1 =
                                nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -11656,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
        struct cfg80211_bitrate_mask mask;
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
        struct net_device *dev = info->user_ptr[1];
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
        int err;
 
        if (!rdev->ops->set_bitrate_mask)
                return -EOPNOTSUPP;
 
+       wdev_lock(wdev);
        err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
                                            NL80211_ATTR_TX_RATES, &mask,
                                            dev, true);
        if (err)
-               return err;
+               goto out;
 
-       return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+       err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+out:
+       wdev_unlock(wdev);
+       return err;
 }
 
 static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
index b2fdac96bab07fd60a6aeacc9574b34ab582fc71..6d82bd9eaf8c7375f36d2fc5ada36e307d38de43 100644 (file)
@@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
                if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) {
                        struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data;
 
-                       return s1gop->primary_ch;
+                       return s1gop->oper_ch;
                }
        } else {
                tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen);
@@ -2018,11 +2018,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
                /* this is a nontransmitting bss, we need to add it to
                 * transmitting bss' list if it is not there
                 */
+               spin_lock_bh(&rdev->bss_lock);
                if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
                                               &res->pub)) {
                        if (__cfg80211_unlink_bss(rdev, res))
                                rdev->bss_generation++;
                }
+               spin_unlock_bh(&rdev->bss_lock);
        }
 
        trace_cfg80211_return_bss(&res->pub);
index 2c34caee0fd115b099a5de5f5c43537214fa73bb..3a9348030e207263f664734911140157bca91086 100644 (file)
@@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
        if (sk_can_busy_loop(sk))
                sk_busy_loop(sk, 1); /* only support non-blocking sockets */
 
-       if (xsk_no_wakeup(sk))
+       if (xs->zc && xsk_no_wakeup(sk))
                return 0;
 
        pool = xs->pool;
@@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 
                        xp_get_pool(umem_xs->pool);
                        xs->pool = umem_xs->pool;
+
+                       /* If underlying shared umem was created without Tx
+                        * ring, allocate Tx descs array that Tx batching API
+                        * utilizes
+                        */
+                       if (xs->tx && !xs->pool->tx_descs) {
+                               err = xp_alloc_tx_descs(xs->pool, xs);
+                               if (err) {
+                                       xp_put_pool(xs->pool);
+                                       sockfd_put(sock);
+                                       goto out_unlock;
+                               }
+                       }
                }
 
                xdp_get_umem(umem_xs->umem);
index af040ffa14ff3b24aac47024eb17834d92e10864..87bdd71c7bb66cb3ba0cb6c7fa91b5cd9e52dcca 100644 (file)
@@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool)
        kvfree(pool);
 }
 
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+       pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+                                 GFP_KERNEL);
+       if (!pool->tx_descs)
+               return -ENOMEM;
+
+       return 0;
+}
+
 struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
                                                struct xdp_umem *umem)
 {
@@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
        if (!pool->heads)
                goto out;
 
-       if (xs->tx) {
-               pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
-               if (!pool->tx_descs)
+       if (xs->tx)
+               if (xp_alloc_tx_descs(pool, xs))
                        goto out;
-       }
 
        pool->chunk_mask = ~((u64)umem->chunk_size - 1);
        pool->addrs_cnt = umem->size;
index 19aa994f5d2c2bffed1d306b2c395b45c7df25a3..f1876ea61fdce29d15be13c9635b0d6cf7c90587 100644 (file)
@@ -2593,12 +2593,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 
                if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
                        __u32 mark = 0;
+                       int oif;
 
                        if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
                                mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
 
                        family = xfrm[i]->props.family;
-                       dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+                       oif = fl->flowi_oif ? : fl->flowi_l3mdev;
+                       dst = xfrm_dst_lookup(xfrm[i], tos, oif,
                                              &saddr, &daddr, family, mark);
                        err = PTR_ERR(dst);
                        if (IS_ERR(dst))
@@ -3742,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst)
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 {
        while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
-               dst->dev = dev_net(dev)->loopback_dev;
+               dst->dev = blackhole_netdev;
                dev_hold(dst->dev);
                dev_put(dev);
        }
index 9fdd8e7c2a458b3cac9224ee3c4c33e48d30e057..951388334a3fa01d8c2ac23b0a4a2c69d5712dfb 100644 (file)
@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
         * that the custom event is using.
         */
        TP_PROTO(bool preempt,
-                unsigned int prev_state,
                 struct task_struct *prev,
-                struct task_struct *next),
+                struct task_struct *next,
+                unsigned int prev_state),
 
-       TP_ARGS(preempt, prev_state, prev, next),
+       TP_ARGS(preempt, prev, next, prev_state),
 
        /*
         * The next fields are where the customization happens.
index 9717e6f6fb31498aec1075220e346c769648decf..3911bfc217026491eafe6d62b3161f58b270e189 100644 (file)
@@ -231,9 +231,9 @@ objtool_args =                                                              \
        $(if $(part-of-module), --module)                               \
        $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt)                     \
        $(if $(CONFIG_FRAME_POINTER),, --no-fp)                         \
-       $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\
+       $(if $(CONFIG_GCOV_KERNEL), --no-unreachable)                   \
        $(if $(CONFIG_RETPOLINE), --retpoline)                          \
-       $(if $(CONFIG_X86_SMAP), --uaccess)                             \
+       --uaccess                                                       \
        $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)             \
        $(if $(CONFIG_SLS), --sls)
 
index 589454bce93013e5fb0802873f0589b00721b20b..8425da41de0dab49c33c2f93e4eb7f1ccf095d79 100644 (file)
@@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = {
        .help           = "disable\tturn off latent entropy instrumentation\n",
 };
 
-static unsigned HOST_WIDE_INT seed;
-/*
- * get_random_seed() (this is a GCC function) generates the seed.
- * This is a simple random generator without any cryptographic security because
- * the entropy doesn't come from here.
- */
+static unsigned HOST_WIDE_INT deterministic_seed;
+static unsigned HOST_WIDE_INT rnd_buf[32];
+static size_t rnd_idx = ARRAY_SIZE(rnd_buf);
+static int urandom_fd = -1;
+
 static unsigned HOST_WIDE_INT get_random_const(void)
 {
-       unsigned int i;
-       unsigned HOST_WIDE_INT ret = 0;
-
-       for (i = 0; i < 8 * sizeof(ret); i++) {
-               ret = (ret << 1) | (seed & 1);
-               seed >>= 1;
-               if (ret & 1)
-                       seed ^= 0xD800000000000000ULL;
+       if (deterministic_seed) {
+               unsigned HOST_WIDE_INT w = deterministic_seed;
+               w ^= w << 13;
+               w ^= w >> 7;
+               w ^= w << 17;
+               deterministic_seed = w;
+               return deterministic_seed;
        }
 
-       return ret;
+       if (urandom_fd < 0) {
+               urandom_fd = open("/dev/urandom", O_RDONLY);
+               gcc_assert(urandom_fd >= 0);
+       }
+       if (rnd_idx >= ARRAY_SIZE(rnd_buf)) {
+               gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf));
+               rnd_idx = 0;
+       }
+       return rnd_buf[rnd_idx++];
 }
 
 static tree tree_get_random_const(tree type)
@@ -537,8 +543,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused,
        tree type, id;
        int quals;
 
-       seed = get_random_seed(false);
-
        if (in_lto_p)
                return;
 
@@ -573,6 +577,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info,
        const struct plugin_argument * const argv = plugin_info->argv;
        int i;
 
+       /*
+        * Call get_random_seed() with noinit=true, so that this returns
+        * 0 in the case where no seed has been passed via -frandom-seed.
+        */
+       deterministic_seed = get_random_seed(true);
+
        static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = {
                {
                        .base = &latent_entropy_decl,
index 20f44504a644b900eafebe483c2d2a6f14dbe46f..e036b0bbb42cf9f4127edeb8d04ce6001bc98986 100755 (executable)
@@ -140,15 +140,15 @@ objtool_link()
                if ! is_enabled CONFIG_FRAME_POINTER; then
                        objtoolopt="${objtoolopt} --no-fp"
                fi
-               if is_enabled CONFIG_GCOV_KERNEL || is_enabled CONFIG_LTO_CLANG; then
+               if is_enabled CONFIG_GCOV_KERNEL; then
                        objtoolopt="${objtoolopt} --no-unreachable"
                fi
                if is_enabled CONFIG_RETPOLINE; then
                        objtoolopt="${objtoolopt} --retpoline"
                fi
-               if is_enabled CONFIG_X86_SMAP; then
-                       objtoolopt="${objtoolopt} --uaccess"
-               fi
+
+               objtoolopt="${objtoolopt} --uaccess"
+
                if is_enabled CONFIG_SLS; then
                        objtoolopt="${objtoolopt} --sls"
                fi
index 0ae4e4e57a401ebb36f09ba59a4e7b60b9c67846..3fb8f9026e9be80dfa448212052f18d7b9f67f1c 100644 (file)
@@ -179,7 +179,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
                        kmem_cache_free(hashtab_node_cachep, cur);
                }
        }
-       kmem_cache_free(hashtab_node_cachep, new);
+       kfree(new->htable);
+       memset(new, 0, sizeof(*new));
        return -ENOMEM;
 }
 
index 31ba7024e3addfd5e7ffdac9f792a2bab27e1828..726a8353201f834048f5a847c03fe95806441459 100644 (file)
@@ -209,6 +209,12 @@ static void __snd_card_release(struct device *dev, void *data)
  * snd_card_register(), the very first devres action to call snd_card_free()
  * is added automatically.  In that way, the resource disconnection is assured
  * at first, then released in the expected order.
+ *
+ * If an error happens at the probe before snd_card_register() is called and
+ * there have been other devres resources, you'd need to free the card manually
+ * via snd_card_free() call in the error; otherwise it may lead to UAF due to
+ * devres call orders.  You can use snd_card_free_on_error() helper for
+ * handling it more easily.
  */
 int snd_devm_card_new(struct device *parent, int idx, const char *xid,
                      struct module *module, size_t extra_size,
@@ -235,6 +241,28 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid,
 }
 EXPORT_SYMBOL_GPL(snd_devm_card_new);
 
+/**
+ * snd_card_free_on_error - a small helper for handling devm probe errors
+ * @dev: the managed device object
+ * @ret: the return code from the probe callback
+ *
+ * This function handles the explicit snd_card_free() call at the error from
+ * the probe callback.  It's just a small helper for simplifying the error
+ * handling with the managed devices.
+ */
+int snd_card_free_on_error(struct device *dev, int ret)
+{
+       struct snd_card *card;
+
+       if (!ret)
+               return 0;
+       card = devres_find(dev, __snd_card_release, NULL, NULL);
+       if (card)
+               snd_card_free(card);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(snd_card_free_on_error);
+
 static int snd_card_init(struct snd_card *card, struct device *parent,
                         int idx, const char *xid, struct module *module,
                         size_t extra_size)
index 6fd763d4d15b19457707d7b12b45579e87ac77fe..15dc7160ba34e8433b3de4ef5467555e64be4516 100644 (file)
@@ -499,6 +499,10 @@ static const struct snd_malloc_ops snd_dma_wc_ops = {
 };
 #endif /* CONFIG_X86 */
 
+#ifdef CONFIG_SND_DMA_SGBUF
+static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size);
+#endif
+
 /*
  * Non-contiguous pages allocator
  */
@@ -509,8 +513,18 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
 
        sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
                                      DEFAULT_GFP, 0);
-       if (!sgt)
+       if (!sgt) {
+#ifdef CONFIG_SND_DMA_SGBUF
+               if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
+                       dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
+               else
+                       dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
+               return snd_dma_sg_fallback_alloc(dmab, size);
+#else
                return NULL;
+#endif
+       }
+
        dmab->dev.need_sync = dma_need_sync(dmab->dev.dev,
                                            sg_dma_address(sgt->sgl));
        p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt);
@@ -633,6 +647,8 @@ static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size)
 
        if (!p)
                return NULL;
+       if (dmab->dev.type != SNDRV_DMA_TYPE_DEV_WC_SG)
+               return p;
        for_each_sgtable_page(sgt, &iter, 0)
                set_memory_wc(sg_wc_address(&iter), 1);
        return p;
@@ -665,6 +681,95 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
        .get_page = snd_dma_noncontig_get_page,
        .get_chunk_size = snd_dma_noncontig_get_chunk_size,
 };
+
+/* Fallback SG-buffer allocations for x86 */
+struct snd_dma_sg_fallback {
+       size_t count;
+       struct page **pages;
+       dma_addr_t *addrs;
+};
+
+static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab,
+                                      struct snd_dma_sg_fallback *sgbuf)
+{
+       size_t i;
+
+       if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+               set_pages_array_wb(sgbuf->pages, sgbuf->count);
+       for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++)
+               dma_free_coherent(dmab->dev.dev, PAGE_SIZE,
+                                 page_address(sgbuf->pages[i]),
+                                 sgbuf->addrs[i]);
+       kvfree(sgbuf->pages);
+       kvfree(sgbuf->addrs);
+       kfree(sgbuf);
+}
+
+static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
+{
+       struct snd_dma_sg_fallback *sgbuf;
+       struct page **pages;
+       size_t i, count;
+       void *p;
+
+       sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL);
+       if (!sgbuf)
+               return NULL;
+       count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
+       if (!pages)
+               goto error;
+       sgbuf->pages = pages;
+       sgbuf->addrs = kvcalloc(count, sizeof(*sgbuf->addrs), GFP_KERNEL);
+       if (!sgbuf->addrs)
+               goto error;
+
+       for (i = 0; i < count; sgbuf->count++, i++) {
+               p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE,
+                                      &sgbuf->addrs[i], DEFAULT_GFP);
+               if (!p)
+                       goto error;
+               sgbuf->pages[i] = virt_to_page(p);
+       }
+
+       if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+               set_pages_array_wc(pages, count);
+       p = vmap(pages, count, VM_MAP, PAGE_KERNEL);
+       if (!p)
+               goto error;
+       dmab->private_data = sgbuf;
+       return p;
+
+ error:
+       __snd_dma_sg_fallback_free(dmab, sgbuf);
+       return NULL;
+}
+
+static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab)
+{
+       vunmap(dmab->area);
+       __snd_dma_sg_fallback_free(dmab, dmab->private_data);
+}
+
+static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab,
+                                   struct vm_area_struct *area)
+{
+       struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
+
+       if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+               area->vm_page_prot = pgprot_writecombine(area->vm_page_prot);
+       return vm_map_pages(area, sgbuf->pages, sgbuf->count);
+}
+
+static const struct snd_malloc_ops snd_dma_sg_fallback_ops = {
+       .alloc = snd_dma_sg_fallback_alloc,
+       .free = snd_dma_sg_fallback_free,
+       .mmap = snd_dma_sg_fallback_mmap,
+       /* reuse vmalloc helpers */
+       .get_addr = snd_dma_vmalloc_get_addr,
+       .get_page = snd_dma_vmalloc_get_page,
+       .get_chunk_size = snd_dma_vmalloc_get_chunk_size,
+};
 #endif /* CONFIG_SND_DMA_SGBUF */
 
 /*
@@ -736,6 +841,10 @@ static const struct snd_malloc_ops *dma_ops[] = {
 #ifdef CONFIG_GENERIC_ALLOCATOR
        [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops,
 #endif /* CONFIG_GENERIC_ALLOCATOR */
+#ifdef CONFIG_SND_DMA_SGBUF
+       [SNDRV_DMA_TYPE_DEV_SG_FALLBACK] = &snd_dma_sg_fallback_ops,
+       [SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK] = &snd_dma_sg_fallback_ops,
+#endif
 #endif /* CONFIG_HAS_DMA */
 };
 
index 4866aed97aacc074b8a88e5ff4218ca59d5b7881..5588b6a1ee8bd0c7fe57388c755899066b312943 100644 (file)
@@ -433,7 +433,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int
                return 0;
        width = pcm_formats[(INT)format].phys; /* physical width */
        pat = pcm_formats[(INT)format].silence;
-       if (! width)
+       if (!width || !pat)
                return -EINVAL;
        /* signed or 1 byte data */
        if (pcm_formats[(INT)format].signd == 1 || width <= 8) {
index 11235baaf6fa520f188d3ffcab470e7c020bb5b0..f212f233ea618efffb6ebe1d0395aaef80bb651e 100644 (file)
@@ -693,8 +693,6 @@ static int snd_mtpav_probe(struct platform_device *dev)
        mtp_card->outmidihwport = 0xffffffff;
        timer_setup(&mtp_card->timer, snd_mtpav_output_timer, 0);
 
-       card->private_free = snd_mtpav_free;
-
        err = snd_mtpav_get_RAWMIDI(mtp_card);
        if (err < 0)
                return err;
@@ -716,6 +714,8 @@ static int snd_mtpav_probe(struct platform_device *dev)
        if (err < 0)
                return err;
 
+       card->private_free = snd_mtpav_free;
+
        platform_set_drvdata(dev, card);
        printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port);
        return 0;
index 626c0c34b0b668f9c0bc6b8b8ea635e8f57f86f1..3a53914277d357362d06a72cdf26b1d7589705ea 100644 (file)
@@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
        type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE;
        if (copy_to_user(buf, &type, sizeof(type)))
                return -EFAULT;
+       count += sizeof(type);
        remained -= sizeof(type);
        buf += sizeof(type);
 
index efe810af28c53ae819b4e46494c444061e1e9f61..3f35972e1cf750fe8226e6093f5ec91303759b97 100644 (file)
@@ -116,16 +116,24 @@ static int i915_component_master_match(struct device *dev, int subcomponent,
        return 0;
 }
 
-/* check whether intel graphics is present */
-static bool i915_gfx_present(void)
+/* check whether Intel graphics is present and reachable */
+static int i915_gfx_present(struct pci_dev *hdac_pci)
 {
-       static const struct pci_device_id ids[] = {
-               { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_ANY_ID),
-                 .class = PCI_BASE_CLASS_DISPLAY << 16,
-                 .class_mask = 0xff << 16 },
-               {}
-       };
-       return pci_dev_present(ids);
+       unsigned int class = PCI_BASE_CLASS_DISPLAY << 16;
+       struct pci_dev *display_dev = NULL;
+       bool match = false;
+
+       do {
+               display_dev = pci_get_class(class, display_dev);
+
+               if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL &&
+                   connectivity_check(display_dev, hdac_pci)) {
+                       pci_dev_put(display_dev);
+                       match = true;
+               }
+       } while (!match && display_dev);
+
+       return match;
 }
 
 /**
@@ -145,7 +153,7 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
        struct drm_audio_component *acomp;
        int err;
 
-       if (!i915_gfx_present())
+       if (!i915_gfx_present(to_pci_dev(bus->dev)))
                return -ENODEV;
 
        err = snd_hdac_acomp_init(bus, NULL,
index 70fd8b13938eddd0c588ea0615e7708f936a5ad7..a8fe01764b254bd0ff15390ce2cc1b3fa21c064b 100644 (file)
@@ -390,26 +390,49 @@ static const struct config_entry config_table[] = {
 
 /* Alder Lake */
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
+       /* Alderlake-S */
        {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
                .device = 0x7ad0,
        },
+       /* RaptorLake-S */
        {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
-               .device = 0x51c8,
+               .device = 0x7a50,
        },
+       /* Alderlake-P */
        {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
-               .device = 0x51cc,
+               .device = 0x51c8,
        },
        {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
                .device = 0x51cd,
        },
+       /* Alderlake-PS */
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+               .device = 0x51c9,
+       },
+       /* Alderlake-M */
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+               .device = 0x51cc,
+       },
+       /* Alderlake-N */
        {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
                .device = 0x54c8,
        },
+       /* RaptorLake-P */
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+               .device = 0x51ca,
+       },
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+               .device = 0x51cb,
+       },
 #endif
 
 };
index ea001c80149ddd3cf3245878d97ac72af87eb86c..3164eb8510fa4c03121061e762b863e46629e3d1 100644 (file)
@@ -478,7 +478,7 @@ static void snd_galaxy_free(struct snd_card *card)
                galaxy_set_config(galaxy, galaxy->config);
 }
 
-static int snd_galaxy_probe(struct device *dev, unsigned int n)
+static int __snd_galaxy_probe(struct device *dev, unsigned int n)
 {
        struct snd_galaxy *galaxy;
        struct snd_wss *chip;
@@ -598,6 +598,11 @@ static int snd_galaxy_probe(struct device *dev, unsigned int n)
        return 0;
 }
 
+static int snd_galaxy_probe(struct device *dev, unsigned int n)
+{
+       return snd_card_free_on_error(dev, __snd_galaxy_probe(dev, n));
+}
+
 static struct isa_driver snd_galaxy_driver = {
        .match          = snd_galaxy_match,
        .probe          = snd_galaxy_probe,
index 26ab7ff8076845ee703b0cb202262777e71f0606..60398fced046b05f47aba4ad45538ccf4687a7e8 100644 (file)
@@ -537,7 +537,7 @@ static void snd_sc6000_free(struct snd_card *card)
                sc6000_setup_board(vport, 0);
 }
 
-static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
+static int __snd_sc6000_probe(struct device *devptr, unsigned int dev)
 {
        static const int possible_irqs[] = { 5, 7, 9, 10, 11, -1 };
        static const int possible_dmas[] = { 1, 3, 0, -1 };
@@ -662,6 +662,11 @@ static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
        return 0;
 }
 
+static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
+{
+       return snd_card_free_on_error(devptr, __snd_sc6000_probe(devptr, dev));
+}
+
 static struct isa_driver snd_sc6000_driver = {
        .match          = snd_sc6000_match,
        .probe          = snd_sc6000_probe,
index 69cbc79fbb716a902961aedd7e383004be18985e..2aaaa68071744ec9c7e25062788dbb821255cdb6 100644 (file)
@@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev,
 
                        if (dataptr < data_end) {
                
-                               __get_user (sample_short, dataptr);
+                               if (get_user(sample_short, dataptr))
+                                       return -EFAULT;
                                dataptr += skip;
                
                                if (data_is_unsigned) { /* GUS ? */
index c1c52b479da264904353d7a7532008885c7d4b12..ad8ce6a1c25c7db3439817d57a7cb55758fa8ab2 100644 (file)
@@ -88,11 +88,7 @@ static inline int ioctl_return(int __user *addr, int value)
      */
 
 extern int dmasound_init(void);
-#ifdef MODULE
 extern void dmasound_deinit(void);
-#else
-#define dmasound_deinit()      do { } while (0)
-#endif
 
 /* description of the set-up applies to either hard or soft settings */
 
@@ -114,9 +110,7 @@ typedef struct {
     void *(*dma_alloc)(unsigned int, gfp_t);
     void (*dma_free)(void *, unsigned int);
     int (*irqinit)(void);
-#ifdef MODULE
     void (*irqcleanup)(void);
-#endif
     void (*init)(void);
     void (*silence)(void);
     int (*setFormat)(int);
index 0c95828ac0b18fff309c3b611cfbc2bc81672f9d..164335d3c200928042f46c5d20c04614b6c61c5b 100644 (file)
@@ -206,12 +206,10 @@ module_param(writeBufSize, int, 0);
 
 MODULE_LICENSE("GPL");
 
-#ifdef MODULE
 static int sq_unit = -1;
 static int mixer_unit = -1;
 static int state_unit = -1;
 static int irq_installed;
-#endif /* MODULE */
 
 /* control over who can modify resources shared between play/record */
 static fmode_t shared_resource_owner;
@@ -391,9 +389,6 @@ static const struct file_operations mixer_fops =
 
 static void mixer_init(void)
 {
-#ifndef MODULE
-       int mixer_unit;
-#endif
        mixer_unit = register_sound_mixer(&mixer_fops, -1);
        if (mixer_unit < 0)
                return;
@@ -1171,9 +1166,6 @@ static const struct file_operations sq_fops =
 static int sq_init(void)
 {
        const struct file_operations *fops = &sq_fops;
-#ifndef MODULE
-       int sq_unit;
-#endif
 
        sq_unit = register_sound_dsp(fops, -1);
        if (sq_unit < 0) {
@@ -1366,9 +1358,6 @@ static const struct file_operations state_fops = {
 
 static int state_init(void)
 {
-#ifndef MODULE
-       int state_unit;
-#endif
        state_unit = register_sound_special(&state_fops, SND_DEV_STATUS);
        if (state_unit < 0)
                return state_unit ;
@@ -1386,10 +1375,9 @@ static int state_init(void)
 int dmasound_init(void)
 {
        int res ;
-#ifdef MODULE
+
        if (irq_installed)
                return -EBUSY;
-#endif
 
        /* Set up sound queue, /dev/audio and /dev/dsp. */
 
@@ -1408,9 +1396,7 @@ int dmasound_init(void)
                printk(KERN_ERR "DMA sound driver: Interrupt initialization failed\n");
                return -ENODEV;
        }
-#ifdef MODULE
        irq_installed = 1;
-#endif
 
        printk(KERN_INFO "%s DMA sound driver rev %03d installed\n",
                dmasound.mach.name, (DMASOUND_CORE_REVISION<<4) +
@@ -1424,8 +1410,6 @@ int dmasound_init(void)
        return 0;
 }
 
-#ifdef MODULE
-
 void dmasound_deinit(void)
 {
        if (irq_installed) {
@@ -1444,9 +1428,7 @@ void dmasound_deinit(void)
                unregister_sound_dsp(sq_unit);
 }
 
-#else /* !MODULE */
-
-static int dmasound_setup(char *str)
+static int __maybe_unused dmasound_setup(char *str)
 {
        int ints[6], size;
 
@@ -1489,8 +1471,6 @@ static int dmasound_setup(char *str)
 
 __setup("dmasound=", dmasound_setup);
 
-#endif /* !MODULE */
-
     /*
      *  Conversion tables
      */
@@ -1577,9 +1557,7 @@ char dmasound_alaw2dma8[] = {
 
 EXPORT_SYMBOL(dmasound);
 EXPORT_SYMBOL(dmasound_init);
-#ifdef MODULE
 EXPORT_SYMBOL(dmasound_deinit);
-#endif
 EXPORT_SYMBOL(dmasound_write_sq);
 EXPORT_SYMBOL(dmasound_catchRadius);
 #ifdef HAS_8BIT_TABLES
index bba4dae8dcc70eb758bc239ec37381ffb31bba9f..50e30704bf6f9ef745c18dc990da6d6931573075 100644 (file)
@@ -844,8 +844,8 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci)
 }
 
 static int
-snd_ad1889_probe(struct pci_dev *pci,
-                const struct pci_device_id *pci_id)
+__snd_ad1889_probe(struct pci_dev *pci,
+                  const struct pci_device_id *pci_id)
 {
        int err;
        static int devno;
@@ -904,6 +904,12 @@ snd_ad1889_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_ad1889_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_ad1889_probe(pci, pci_id));
+}
+
 static const struct pci_device_id snd_ad1889_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_ANALOG_DEVICES, PCI_DEVICE_ID_AD1889JS) },
        { 0, },
index 92eb59db106de94805eb6b0cc5ba137611f3edd1..2378a39abaebec573a150184cf374d61636d7f7e 100644 (file)
@@ -2124,8 +2124,8 @@ static int snd_ali_create(struct snd_card *card,
        return 0;
 }
 
-static int snd_ali_probe(struct pci_dev *pci,
-                        const struct pci_device_id *pci_id)
+static int __snd_ali_probe(struct pci_dev *pci,
+                          const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct snd_ali *codec;
@@ -2170,6 +2170,12 @@ static int snd_ali_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_ali_probe(struct pci_dev *pci,
+                        const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_ali_probe(pci, pci_id));
+}
+
 static struct pci_driver ali5451_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_ali_ids,
index b86565dcdbe41fe3fb9a86ed00b080296ee5a8fd..c70aff0601205ede92ea2c97a2a999ed21ddbf42 100644 (file)
@@ -708,7 +708,7 @@ static int snd_als300_probe(struct pci_dev *pci,
 
        err = snd_als300_create(card, pci, chip_type);
        if (err < 0)
-               return err;
+               goto error;
 
        strcpy(card->driver, "ALS300");
        if (chip->chip_type == DEVICE_ALS300_PLUS)
@@ -723,11 +723,15 @@ static int snd_als300_probe(struct pci_dev *pci,
 
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
 
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver als300_driver = {
index 535eccd124bee3d1e4a5adb726a4fccad77cad4d..f33aeb692a112ae4e9a539b63792857c024e2f8c 100644 (file)
@@ -806,8 +806,8 @@ static void snd_card_als4000_free( struct snd_card *card )
        snd_als4000_free_gameport(acard);
 }
 
-static int snd_card_als4000_probe(struct pci_dev *pci,
-                                 const struct pci_device_id *pci_id)
+static int __snd_card_als4000_probe(struct pci_dev *pci,
+                                   const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -930,6 +930,12 @@ static int snd_card_als4000_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_card_als4000_probe(struct pci_dev *pci,
+                                 const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_card_als4000_probe(pci, pci_id));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int snd_als4000_suspend(struct device *dev)
 {
index b8e035d5930d25055dcf0f234b57b7e23d683c61..43d01f1847ed7d3a8301104422bee1b3e2290ef0 100644 (file)
@@ -1572,8 +1572,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
 }
 
 
-static int snd_atiixp_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_atiixp_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct atiixp *chip;
@@ -1623,6 +1623,12 @@ static int snd_atiixp_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_atiixp_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id));
+}
+
 static struct pci_driver atiixp_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_atiixp_ids,
index 178dce8ef1e993c28bc0ca48642edaf7b1fbf865..8864c4c3c7e136b6fc7d6f61029ac481bff35743 100644 (file)
@@ -1201,8 +1201,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
 }
 
 
-static int snd_atiixp_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_atiixp_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct atiixp_modem *chip;
@@ -1247,6 +1247,12 @@ static int snd_atiixp_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_atiixp_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id));
+}
+
 static struct pci_driver atiixp_modem_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_atiixp_ids,
index 342ef2a6655e3e48ac0d213fffb92b97f0a5c287..eb234153691bc8c7a68a0015ebb87c4a7217dfd3 100644 (file)
@@ -193,7 +193,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci)
 
 // constructor -- see "Constructor" sub-section
 static int
-snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+__snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -310,6 +310,12 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
        return 0;
 }
 
+static int
+snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_vortex_probe(pci, pci_id));
+}
+
 // pci_driver definition
 static struct pci_driver vortex_driver = {
        .name = KBUILD_MODNAME,
index d56f126d6fdd92a51f58eddd83114e8fea61e517..29a4bcdec237a9e9da64ab1e95159ae8a3f6a77b 100644 (file)
@@ -275,7 +275,7 @@ static int snd_aw2_probe(struct pci_dev *pci,
        /* (3) Create main component */
        err = snd_aw2_create(card, pci);
        if (err < 0)
-               return err;
+               goto error;
 
        /* initialize mutex */
        mutex_init(&chip->mtx);
@@ -294,13 +294,17 @@ static int snd_aw2_probe(struct pci_dev *pci,
        /* (6) Register card instance */
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
 
        /* (7) Set PCI driver data */
        pci_set_drvdata(pci, card);
 
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 /* open callback */
index 089050470ff275dc129f0bfe6d28130f639ea6a0..7f329dfc5404a7089caad847e60cf398048a3097 100644 (file)
@@ -2427,7 +2427,7 @@ snd_azf3328_create(struct snd_card *card,
 }
 
 static int
-snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+__snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2520,6 +2520,12 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
        return 0;
 }
 
+static int
+snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_azf3328_probe(pci, pci_id));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static inline void
 snd_azf3328_suspend_regs(const struct snd_azf3328 *chip,
index d23f931638410cc8cf75dd070878745ab3eafe32..621985bfee5d7de0244550ea7c4585bba554b4d3 100644 (file)
@@ -805,8 +805,8 @@ static int snd_bt87x_detect_card(struct pci_dev *pci)
        return SND_BT87X_BOARD_UNKNOWN;
 }
 
-static int snd_bt87x_probe(struct pci_dev *pci,
-                          const struct pci_device_id *pci_id)
+static int __snd_bt87x_probe(struct pci_dev *pci,
+                            const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -889,6 +889,12 @@ static int snd_bt87x_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_bt87x_probe(struct pci_dev *pci,
+                          const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_bt87x_probe(pci, pci_id));
+}
+
 /* default entries for all Bt87x cards - it's not exported */
 /* driver_data is set to 0 to call detection */
 static const struct pci_device_id snd_bt87x_default_ids[] = {
index 8577f9fa5ea6e1a293aa547e6ec801d23d3f6113..cf1bac7a435f1a29f846c5f506cbd54a764b8335 100644 (file)
@@ -1725,8 +1725,8 @@ static int snd_ca0106_midi(struct snd_ca0106 *chip, unsigned int channel)
 }
 
 
-static int snd_ca0106_probe(struct pci_dev *pci,
-                                       const struct pci_device_id *pci_id)
+static int __snd_ca0106_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1786,6 +1786,12 @@ static int snd_ca0106_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_ca0106_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_ca0106_probe(pci, pci_id));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int snd_ca0106_suspend(struct device *dev)
 {
index dab801d9d3b481f2b86af757b3b45db9c3ede283..727db6d4339161912e446a7181c6c55313d414e2 100644 (file)
@@ -3247,15 +3247,19 @@ static int snd_cmipci_probe(struct pci_dev *pci,
 
        err = snd_cmipci_create(card, pci, dev);
        if (err < 0)
-               return err;
+               goto error;
 
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
 
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 #ifdef CONFIG_PM_SLEEP
index e7367402b84a34ff1d79cb41d5fecd78ceb10a14..0c9cadf7b3b802f99ad9d820f3e667c0952c41ab 100644 (file)
@@ -1827,8 +1827,8 @@ static void snd_cs4281_opl3_command(struct snd_opl3 *opl3, unsigned short cmd,
        spin_unlock_irqrestore(&opl3->reg_lock, flags);
 }
 
-static int snd_cs4281_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_cs4281_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1888,6 +1888,12 @@ static int snd_cs4281_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_cs4281_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_cs4281_probe(pci, pci_id));
+}
+
 /*
  * Power Management
  */
index 499fa0148f9a42508e484b7e65ba9592ef514e29..440b8f9b40c964b2c5c9cc771b3d492318a463cf 100644 (file)
@@ -281,8 +281,8 @@ static int snd_cs5535audio_create(struct snd_card *card,
        return 0;
 }
 
-static int snd_cs5535audio_probe(struct pci_dev *pci,
-                                const struct pci_device_id *pci_id)
+static int __snd_cs5535audio_probe(struct pci_dev *pci,
+                                  const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -331,6 +331,12 @@ static int snd_cs5535audio_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_cs5535audio_probe(struct pci_dev *pci,
+                                const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_cs5535audio_probe(pci, pci_id));
+}
+
 static struct pci_driver cs5535audio_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_cs5535audio_ids,
index 25b012ef5c3e680f720c7da345e292c31ea5c566..c70c3ac4e99a530d847f240c178d58dc4138a286 100644 (file)
@@ -1970,8 +1970,8 @@ static int snd_echo_create(struct snd_card *card,
 }
 
 /* constructor */
-static int snd_echo_probe(struct pci_dev *pci,
-                         const struct pci_device_id *pci_id)
+static int __snd_echo_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2139,6 +2139,11 @@ static int snd_echo_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_echo_probe(struct pci_dev *pci,
+                         const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_echo_probe(pci, pci_id));
+}
 
 
 #if defined(CONFIG_PM_SLEEP)
index c49c44dc10820e67efb8628ed0f2727f17c1c3c8..89043392f3ec73796eaedd612891a8227192aa5c 100644 (file)
@@ -1491,8 +1491,8 @@ static int snd_emu10k1x_midi(struct emu10k1x *emu)
        return 0;
 }
 
-static int snd_emu10k1x_probe(struct pci_dev *pci,
-                             const struct pci_device_id *pci_id)
+static int __snd_emu10k1x_probe(struct pci_dev *pci,
+                               const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1554,6 +1554,12 @@ static int snd_emu10k1x_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_emu10k1x_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_emu10k1x_probe(pci, pci_id));
+}
+
 // PCI IDs
 static const struct pci_device_id snd_emu10k1x_ids[] = {
        { PCI_VDEVICE(CREATIVE, 0x0006), 0 },   /* Dell OEM version (EMU10K1) */
index 2651f0c64c062ba72ad8f8a9f3b8ef4ba3875415..94efe347a97a986f3d5f175bcb3f320278e6f284 100644 (file)
@@ -2304,8 +2304,8 @@ static irqreturn_t snd_audiopci_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static int snd_audiopci_probe(struct pci_dev *pci,
-                             const struct pci_device_id *pci_id)
+static int __snd_audiopci_probe(struct pci_dev *pci,
+                               const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2369,6 +2369,12 @@ static int snd_audiopci_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_audiopci_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_audiopci_probe(pci, pci_id));
+}
+
 static struct pci_driver ens137x_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_audiopci_ids,
index 00b976f42a3dbdb5857b41b1aa2337fd0a02ab1e..e34ec6f89e7e00449790c9d3b5a0e52f985215d3 100644 (file)
@@ -1716,8 +1716,8 @@ static int snd_es1938_mixer(struct es1938 *chip)
 }
        
 
-static int snd_es1938_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_es1938_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1796,6 +1796,12 @@ static int snd_es1938_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_es1938_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_es1938_probe(pci, pci_id));
+}
+
 static struct pci_driver es1938_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_es1938_ids,
index 6a8a02a9ecf41d494913450bacf47f6693ba1296..4a7e20bb11bcae4d7d1fdeccd62f42bea450474a 100644 (file)
@@ -2741,8 +2741,8 @@ static int snd_es1968_create(struct snd_card *card,
 
 /*
  */
-static int snd_es1968_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_es1968_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2848,6 +2848,12 @@ static int snd_es1968_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_es1968_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_es1968_probe(pci, pci_id));
+}
+
 static struct pci_driver es1968_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_es1968_ids,
index 9c22ff19e56d26aeef5001b46e2383e388c707a2..62b3cb126c6d01291ab3ea811855c9410747f0df 100644 (file)
@@ -1268,8 +1268,8 @@ static int snd_fm801_create(struct snd_card *card,
        return 0;
 }
 
-static int snd_card_fm801_probe(struct pci_dev *pci,
-                               const struct pci_device_id *pci_id)
+static int __snd_card_fm801_probe(struct pci_dev *pci,
+                                 const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1333,6 +1333,12 @@ static int snd_card_fm801_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_card_fm801_probe(struct pci_dev *pci,
+                               const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_card_fm801_probe(pci, pci_id));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static const unsigned char saved_regs[] = {
        FM801_PCM_VOL, FM801_I2S_VOL, FM801_FM_VOL, FM801_REC_SRC,
index 3e086eebf88d007b508e1aee2746ceebaf2bfd18..31fe41795571280ef6d6c29c637157edb0f321d5 100644 (file)
@@ -1395,7 +1395,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
 
  last_try:
        /* the last try; check the empty slots in pins */
-       for (i = 0; i < spec->num_nids; i++) {
+       for (i = 0; i < spec->pcm_used; i++) {
                if (!test_bit(i, &spec->pcm_bitmap))
                        return i;
        }
@@ -2325,7 +2325,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec)
         * dev_num is the device entry number in a pin
         */
 
-       if (codec->mst_no_extra_pcms)
+       if (spec->dyn_pcm_no_legacy && codec->mst_no_extra_pcms)
+               pcm_num = spec->num_cvts;
+       else if (codec->mst_no_extra_pcms)
                pcm_num = spec->num_nids;
        else
                pcm_num = spec->num_nids + spec->dev_num - 1;
@@ -4551,6 +4553,7 @@ HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI",   patch_i915_adlp_hdmi),
 HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI",        patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_adlp_hdmi),
+HDA_CODEC_ENTRY(0x8086281f, "Raptorlake-P HDMI",       patch_i915_adlp_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",        patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",   patch_i915_byt_hdmi),
index 4e12af24b4d35d3a58add1d4def14ac3425ad603..ad292df7d805cae2e70d9875e4e00822051c85c0 100644 (file)
@@ -937,6 +937,9 @@ static int alc_init(struct hda_codec *codec)
        return 0;
 }
 
+#define alc_free       snd_hda_gen_free
+
+#ifdef CONFIG_PM
 static inline void alc_shutup(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -950,9 +953,6 @@ static inline void alc_shutup(struct hda_codec *codec)
                alc_shutup_pins(codec);
 }
 
-#define alc_free       snd_hda_gen_free
-
-#ifdef CONFIG_PM
 static void alc_power_eapd(struct hda_codec *codec)
 {
        alc_auto_setup_eapd(codec, false);
@@ -966,9 +966,7 @@ static int alc_suspend(struct hda_codec *codec)
                spec->power_hook(codec);
        return 0;
 }
-#endif
 
-#ifdef CONFIG_PM
 static int alc_resume(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -2619,6 +2617,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+       SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
@@ -6779,6 +6778,41 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec,
        }
 }
 
+static void alc_fixup_dell4_mic_no_presence_quiet(struct hda_codec *codec,
+                                                 const struct hda_fixup *fix,
+                                                 int action)
+{
+       struct alc_spec *spec = codec->spec;
+       struct hda_input_mux *imux = &spec->gen.input_mux;
+       int i;
+
+       alc269_fixup_limit_int_mic_boost(codec, fix, action);
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               /**
+                * Set the vref of pin 0x19 (Headset Mic) and pin 0x1b (Headphone Mic)
+                * to Hi-Z to avoid pop noises at startup and when plugging and
+                * unplugging headphones.
+                */
+               snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ);
+               snd_hda_codec_set_pin_target(codec, 0x1b, PIN_VREFHIZ);
+               break;
+       case HDA_FIXUP_ACT_PROBE:
+               /**
+                * Make the internal mic (0x12) the default input source to
+                * prevent pop noises on cold boot.
+                */
+               for (i = 0; i < imux->num_items; i++) {
+                       if (spec->gen.imux_pins[i] == 0x12) {
+                               spec->gen.cur_mux[0] = i;
+                               break;
+                       }
+               }
+               break;
+       }
+}
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -6820,6 +6854,7 @@ enum {
        ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
        ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
        ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+       ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET,
        ALC269_FIXUP_HEADSET_MODE,
        ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
        ALC269_FIXUP_ASPIRE_HEADSET_MIC,
@@ -7005,11 +7040,13 @@ enum {
        ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
        ALC287_FIXUP_LEGION_16ACHG6,
        ALC287_FIXUP_CS35L41_I2C_2,
+       ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED,
        ALC245_FIXUP_CS35L41_SPI_2,
        ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED,
        ALC245_FIXUP_CS35L41_SPI_4,
        ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED,
        ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
+       ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8767,6 +8804,14 @@ static const struct hda_fixup alc269_fixups[] = {
        [ALC287_FIXUP_CS35L41_I2C_2] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs35l41_fixup_i2c_two,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+       },
+       [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l41_fixup_i2c_two,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_HP_MUTE_LED,
        },
        [ALC245_FIXUP_CS35L41_SPI_2] = {
                .type = HDA_FIXUP_FUNC,
@@ -8798,6 +8843,21 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC285_FIXUP_HP_MUTE_LED,
        },
+       [ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_dell4_mic_no_presence_quiet,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+       },
+       [ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x02a1112c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8888,6 +8948,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1028, 0x0a38, "Dell Latitude 7520", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET),
        SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK),
@@ -9015,21 +9076,22 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
-       SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
        SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8992, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x103c, 0x8994, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x103c, 0x8994, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8995, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x103c, 0x89a4, "HP ProBook 440 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89a6, "HP ProBook 450 G9", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89ac, "HP EliteBook 640 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89ae, "HP EliteBook 650 G9", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9162,6 +9224,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
        SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x867c, "Clevo NP7[01]PNP", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME),
@@ -9236,6 +9299,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
        SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
        SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
        SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
        SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
        SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
@@ -9264,6 +9328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
@@ -9277,6 +9342,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
        SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
        SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
+       SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
@@ -9284,6 +9357,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
        SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
        SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
+       SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
 
 #if 0
        /* Below is a quirk table taken from the old code.
index f6275868877a751b83d6622a024f70f4e3f12ef2..6fab2ad85bbec2bcfcd11e4b2b70dd5df08836fc 100644 (file)
@@ -2519,8 +2519,8 @@ static int snd_vt1724_create(struct snd_card *card,
  *
  */
 
-static int snd_vt1724_probe(struct pci_dev *pci,
-                           const struct pci_device_id *pci_id)
+static int __snd_vt1724_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2662,6 +2662,12 @@ static int snd_vt1724_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_vt1724_probe(struct pci_dev *pci,
+                           const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_vt1724_probe(pci, pci_id));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int snd_vt1724_suspend(struct device *dev)
 {
index a51032b3ac4d85c7483fe514381c3155da461564..ae285c0a629c82a518af20812d681526b4e61a73 100644 (file)
@@ -3109,8 +3109,8 @@ static int check_default_spdif_aclink(struct pci_dev *pci)
        return 0;
 }
 
-static int snd_intel8x0_probe(struct pci_dev *pci,
-                             const struct pci_device_id *pci_id)
+static int __snd_intel8x0_probe(struct pci_dev *pci,
+                               const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct intel8x0 *chip;
@@ -3189,6 +3189,12 @@ static int snd_intel8x0_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_intel8x0_probe(struct pci_dev *pci,
+                             const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_intel8x0_probe(pci, pci_id));
+}
+
 static struct pci_driver intel8x0_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_intel8x0_ids,
index 7de3cb2f17b525023f2ebd0d3c1456fe9bf949ee..2845cc006d0cfb8d13b6cf22044f5cd1a550b5dd 100644 (file)
@@ -1178,8 +1178,8 @@ static struct shortname_table {
        { 0 },
 };
 
-static int snd_intel8x0m_probe(struct pci_dev *pci,
-                              const struct pci_device_id *pci_id)
+static int __snd_intel8x0m_probe(struct pci_dev *pci,
+                                const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct intel8x0m *chip;
@@ -1225,6 +1225,12 @@ static int snd_intel8x0m_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_intel8x0m_probe(struct pci_dev *pci,
+                              const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_intel8x0m_probe(pci, pci_id));
+}
+
 static struct pci_driver intel8x0m_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_intel8x0m_ids,
index 5c9e240ff6a9c514658b4b905368e1857a7e980e..33b4f95d65b3fa91b9d12dec68a45ec87c0b52d5 100644 (file)
@@ -2355,7 +2355,7 @@ snd_korg1212_probe(struct pci_dev *pci,
 
        err = snd_korg1212_create(card, pci);
        if (err < 0)
-               return err;
+               goto error;
 
        strcpy(card->driver, "korg1212");
        strcpy(card->shortname, "korg1212");
@@ -2366,10 +2366,14 @@ snd_korg1212_probe(struct pci_dev *pci,
 
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver korg1212_driver = {
index 5269a1d396a5bfe48cec1f2d3be5a426c5c3ac03..1aa30e90b86a797f1a6f91c63bf1c9bb56bbcd4a 100644 (file)
@@ -637,8 +637,8 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev)
        return 0;
 }
 
-static int lola_probe(struct pci_dev *pci,
-                     const struct pci_device_id *pci_id)
+static int __lola_probe(struct pci_dev *pci,
+                       const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -687,6 +687,12 @@ static int lola_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int lola_probe(struct pci_dev *pci,
+                     const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __lola_probe(pci, pci_id));
+}
+
 /* PCI IDs */
 static const struct pci_device_id lola_ids[] = {
        { PCI_VDEVICE(DIGIGRAM, 0x0001) },
index 168a1084f7303925980a01d5a14d16946a2ede3b..bd9b6148dd6fbe820e452e5369c632a3d83efda7 100644 (file)
@@ -1019,7 +1019,7 @@ static int snd_lx6464es_probe(struct pci_dev *pci,
        err = snd_lx6464es_create(card, pci);
        if (err < 0) {
                dev_err(card->dev, "error during snd_lx6464es_create\n");
-               return err;
+               goto error;
        }
 
        strcpy(card->driver, "LX6464ES");
@@ -1036,12 +1036,16 @@ static int snd_lx6464es_probe(struct pci_dev *pci,
 
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
 
        dev_dbg(chip->card->dev, "initialization successful\n");
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver lx6464es_driver = {
index 056838ead21d6f49ead0b71ac078efe4374b373f..261850775c8071f648f8cf8ed632d5734b858be9 100644 (file)
@@ -2637,7 +2637,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci,
 /*
  */
 static int
-snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+__snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2702,6 +2702,12 @@ snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
        return 0;
 }
 
+static int
+snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_m3_probe(pci, pci_id));
+}
+
 static struct pci_driver m3_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_m3_ids,
index c9c178504959ea1d382137a991700b3ddc740f74..f99a1e96e9231e82a6a2cdcae7c9a7ed340e8d86 100644 (file)
@@ -1573,7 +1573,6 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci)
        chip->coeffs_current = 0;
 
        snd_nm256_init_chip(chip);
-       card->private_free = snd_nm256_free;
 
        // pci_set_master(pci); /* needed? */
        return 0;
@@ -1680,6 +1679,7 @@ static int snd_nm256_probe(struct pci_dev *pci,
        err = snd_card_register(card);
        if (err < 0)
                return err;
+       card->private_free = snd_nm256_free;
 
        pci_set_drvdata(pci, card);
        return 0;
index 4fb3f2484fdba92787c5dcab2a7c8b5b382a4ad6..92ffe9dc20c55699a0c95cc9b4b1ac2ba8c3f98d 100644 (file)
@@ -576,7 +576,7 @@ static void oxygen_card_free(struct snd_card *card)
        mutex_destroy(&chip->mutex);
 }
 
-int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
+static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
                     struct module *owner,
                     const struct pci_device_id *ids,
                     int (*get_model)(struct oxygen *chip,
@@ -701,6 +701,16 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
        pci_set_drvdata(pci, card);
        return 0;
 }
+
+int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
+                    struct module *owner,
+                    const struct pci_device_id *ids,
+                    int (*get_model)(struct oxygen *chip,
+                                     const struct pci_device_id *id))
+{
+       return snd_card_free_on_error(&pci->dev,
+                                     __oxygen_pci_probe(pci, index, id, owner, ids, get_model));
+}
 EXPORT_SYMBOL(oxygen_pci_probe);
 
 #ifdef CONFIG_PM_SLEEP
index 5a987c683c41c14685674803ac0f3079d206d3b3..b37c877c2c16057505fae4b206d74f96657c66f3 100644 (file)
@@ -2023,7 +2023,7 @@ static void snd_riptide_joystick_remove(struct pci_dev *pci)
 #endif
 
 static int
-snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+__snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -2124,6 +2124,12 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
        return 0;
 }
 
+static int
+snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_card_riptide_probe(pci, pci_id));
+}
+
 static struct pci_driver driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_riptide_ids,
index 5b6bd9f0b2f77df56084e213f282f69020d351df..9c0ac025e14320387bd44e1c2fe7f4cbc102cf07 100644 (file)
@@ -1875,7 +1875,7 @@ static void snd_rme32_card_free(struct snd_card *card)
 }
 
 static int
-snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+__snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        static int dev;
        struct rme32 *rme32;
@@ -1927,6 +1927,12 @@ snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
        return 0;
 }
 
+static int
+snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_rme32_probe(pci, pci_id));
+}
+
 static struct pci_driver rme32_driver = {
        .name =         KBUILD_MODNAME,
        .id_table =     snd_rme32_ids,
index 8fc8115049203b5dbe65ed048d0466625d1593de..bccb7e0d3d116c9ec8e8cdef1d581a95573598a5 100644 (file)
@@ -2430,8 +2430,8 @@ static void snd_rme96_card_free(struct snd_card *card)
 }
 
 static int
-snd_rme96_probe(struct pci_dev *pci,
-               const struct pci_device_id *pci_id)
+__snd_rme96_probe(struct pci_dev *pci,
+                 const struct pci_device_id *pci_id)
 {
        static int dev;
        struct rme96 *rme96;
@@ -2498,6 +2498,12 @@ snd_rme96_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_rme96_probe(struct pci_dev *pci,
+                          const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_rme96_probe(pci, pci_id));
+}
+
 static struct pci_driver rme96_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_rme96_ids,
index 96c12dfb24cf9d00cbd98d498f95d4438096415b..3db641318d3ae4819cbf7cc10944b78cebf1bfd6 100644 (file)
@@ -5444,17 +5444,21 @@ static int snd_hdsp_probe(struct pci_dev *pci,
        hdsp->pci = pci;
        err = snd_hdsp_create(card, hdsp);
        if (err)
-               return err;
+               goto error;
 
        strcpy(card->shortname, "Hammerfall DSP");
        sprintf(card->longname, "%s at 0x%lx, irq %d", hdsp->card_name,
                hdsp->port, hdsp->irq);
        err = snd_card_register(card);
        if (err)
-               return err;
+               goto error;
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver hdsp_driver = {
index ff06ee82607cf784e8ebc1d0ef24baada4d6aa60..fa1812e7a49dca872a3325510d7649d24fdf1e54 100644 (file)
@@ -6895,7 +6895,7 @@ static int snd_hdspm_probe(struct pci_dev *pci,
 
        err = snd_hdspm_create(card, hdspm);
        if (err < 0)
-               return err;
+               goto error;
 
        if (hdspm->io_type != MADIface) {
                snprintf(card->shortname, sizeof(card->shortname), "%s_%x",
@@ -6914,12 +6914,16 @@ static int snd_hdspm_probe(struct pci_dev *pci,
 
        err = snd_card_register(card);
        if (err < 0)
-               return err;
+               goto error;
 
        pci_set_drvdata(pci, card);
 
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver hdspm_driver = {
index 7755e19aa77617f140c259cc5b2ed942f39cfca0..1d614fe89a6ae12e17d02b5bbeb2e2351ffc6e27 100644 (file)
@@ -2572,7 +2572,7 @@ static int snd_rme9652_probe(struct pci_dev *pci,
        rme9652->pci = pci;
        err = snd_rme9652_create(card, rme9652, precise_ptr[dev]);
        if (err)
-               return err;
+               goto error;
 
        strcpy(card->shortname, rme9652->card_name);
 
@@ -2580,10 +2580,14 @@ static int snd_rme9652_probe(struct pci_dev *pci,
                card->shortname, rme9652->port, rme9652->irq);
        err = snd_card_register(card);
        if (err)
-               return err;
+               goto error;
        pci_set_drvdata(pci, card);
        dev++;
        return 0;
+
+ error:
+       snd_card_free(card);
+       return err;
 }
 
 static struct pci_driver rme9652_driver = {
index 0b722b0e0604bfc546121ab6b970458299928130..fabe393607f8fa2862db6b73e5247d7bd1f63ccf 100644 (file)
@@ -1331,8 +1331,8 @@ static int sis_chip_create(struct snd_card *card,
        return 0;
 }
 
-static int snd_sis7019_probe(struct pci_dev *pci,
-                            const struct pci_device_id *pci_id)
+static int __snd_sis7019_probe(struct pci_dev *pci,
+                              const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct sis7019 *sis;
@@ -1352,8 +1352,8 @@ static int snd_sis7019_probe(struct pci_dev *pci,
        if (!codecs)
                codecs = SIS_PRIMARY_CODEC_PRESENT;
 
-       rc = snd_card_new(&pci->dev, index, id, THIS_MODULE,
-                         sizeof(*sis), &card);
+       rc = snd_devm_card_new(&pci->dev, index, id, THIS_MODULE,
+                              sizeof(*sis), &card);
        if (rc < 0)
                return rc;
 
@@ -1386,6 +1386,12 @@ static int snd_sis7019_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_sis7019_probe(struct pci_dev *pci,
+                            const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_sis7019_probe(pci, pci_id));
+}
+
 static struct pci_driver sis7019_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_sis7019_ids,
index c8c49881008fd78b461df2dbbb1a28c5c1598ccc..f91cbf6eeca0f6ef151760ccde8b360d6aed8041 100644 (file)
@@ -1387,8 +1387,8 @@ static int snd_sonicvibes_midi(struct sonicvibes *sonic,
        return 0;
 }
 
-static int snd_sonic_probe(struct pci_dev *pci,
-                          const struct pci_device_id *pci_id)
+static int __snd_sonic_probe(struct pci_dev *pci,
+                            const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -1459,6 +1459,12 @@ static int snd_sonic_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_sonic_probe(struct pci_dev *pci,
+                          const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_sonic_probe(pci, pci_id));
+}
+
 static struct pci_driver sonicvibes_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_sonic_ids,
index 65514f7e42d7d2f00f855bc981f6129e168d8873..361b83fd721e61650e0697164e7061fe8b88b7cf 100644 (file)
@@ -2458,8 +2458,8 @@ static int check_dxs_list(struct pci_dev *pci, int revision)
        return VIA_DXS_48K;
 };
 
-static int snd_via82xx_probe(struct pci_dev *pci,
-                            const struct pci_device_id *pci_id)
+static int __snd_via82xx_probe(struct pci_dev *pci,
+                              const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct via82xx *chip;
@@ -2569,6 +2569,12 @@ static int snd_via82xx_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_via82xx_probe(struct pci_dev *pci,
+                            const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id));
+}
+
 static struct pci_driver via82xx_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_via82xx_ids,
index 234f7fbed2364318ef73f0d61cf1b5a52f879c90..ca7f024bf8ec6efe5387d205512120cdbc8872aa 100644 (file)
@@ -1103,8 +1103,8 @@ static int snd_via82xx_create(struct snd_card *card,
 }
 
 
-static int snd_via82xx_probe(struct pci_dev *pci,
-                            const struct pci_device_id *pci_id)
+static int __snd_via82xx_probe(struct pci_dev *pci,
+                              const struct pci_device_id *pci_id)
 {
        struct snd_card *card;
        struct via82xx_modem *chip;
@@ -1157,6 +1157,12 @@ static int snd_via82xx_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_via82xx_probe(struct pci_dev *pci,
+                            const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id));
+}
+
 static struct pci_driver via82xx_modem_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_via82xx_modem_ids,
index 1a7802fbf23c193e79f85ae0f6b0f5f0197ba248..a3856c73e221497a2e403328d8f0f291ada01a4d 100644 (file)
@@ -966,6 +966,7 @@ static int mchp_pdmc_process(struct snd_pcm_substream *substream,
 
 static struct snd_dmaengine_pcm_config mchp_pdmc_config = {
        .process = mchp_pdmc_process,
+       .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
 };
 
 static int mchp_pdmc_probe(struct platform_device *pdev)
index 33e43013ff770c7b782f23e00cd2b9c215e263bf..0d639a33ad969a6b920a47b8523e0a6c084a5e47 100644 (file)
  */
 #undef ENABLE_MIC_INPUT
 
-static struct clk *mclk;
-
-static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card,
-                                       struct snd_soc_dapm_context *dapm,
-                                       enum snd_soc_bias_level level)
-{
-       static int mclk_on;
-       int ret = 0;
-
-       switch (level) {
-       case SND_SOC_BIAS_ON:
-       case SND_SOC_BIAS_PREPARE:
-               if (!mclk_on)
-                       ret = clk_enable(mclk);
-               if (ret == 0)
-                       mclk_on = 1;
-               break;
-
-       case SND_SOC_BIAS_OFF:
-       case SND_SOC_BIAS_STANDBY:
-               if (mclk_on)
-                       clk_disable(mclk);
-               mclk_on = 0;
-               break;
-       }
-
-       return ret;
-}
-
 static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = {
        SND_SOC_DAPM_MIC("Int Mic", NULL),
        SND_SOC_DAPM_SPK("Ext Spk", NULL),
@@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = {
        .owner = THIS_MODULE,
        .dai_link = &at91sam9g20ek_dai,
        .num_links = 1,
-       .set_bias_level = at91sam9g20ek_set_bias_level,
 
        .dapm_widgets = at91sam9g20ek_dapm_widgets,
        .num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets),
@@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct device_node *codec_np, *cpu_np;
-       struct clk *pllb;
        struct snd_soc_card *card = &snd_soc_at91sam9g20ek;
        int ret;
 
@@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       /*
-        * Codec MCLK is supplied by PCK0 - set it up.
-        */
-       mclk = clk_get(NULL, "pck0");
-       if (IS_ERR(mclk)) {
-               dev_err(&pdev->dev, "Failed to get MCLK\n");
-               ret = PTR_ERR(mclk);
-               goto err;
-       }
-
-       pllb = clk_get(NULL, "pllb");
-       if (IS_ERR(pllb)) {
-               dev_err(&pdev->dev, "Failed to get PLLB\n");
-               ret = PTR_ERR(pllb);
-               goto err_mclk;
-       }
-       ret = clk_set_parent(mclk, pllb);
-       clk_put(pllb);
-       if (ret != 0) {
-               dev_err(&pdev->dev, "Failed to set MCLK parent\n");
-               goto err_mclk;
-       }
-
-       clk_set_rate(mclk, MCLK_RATE);
-
        card->dev = &pdev->dev;
 
        /* Parse device node info */
@@ -230,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
 
        return ret;
 
-err_mclk:
-       clk_put(mclk);
-       mclk = NULL;
 err:
        atmel_ssc_put_audio(0);
        return ret;
@@ -242,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev)
 {
        struct snd_soc_card *card = platform_get_drvdata(pdev);
 
-       clk_disable(mclk);
-       mclk = NULL;
        snd_soc_unregister_card(card);
        atmel_ssc_put_audio(0);
 
index e5a56bcbb223db92b5be11d95d3286723f8b3004..aa6823fbd1a4df01b9cc739e40eb8b0d82f9c5e2 100644 (file)
@@ -822,8 +822,8 @@ int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap)
        word_offset = otp_map_match->word_offset;
 
        for (i = 0; i < otp_map_match->num_elements; i++) {
-               dev_dbg(dev, "bitoffset= %d, word_offset=%d, bit_sum mod 32=%d\n",
-                       bit_offset, word_offset, bit_sum % 32);
+               dev_dbg(dev, "bitoffset= %d, word_offset=%d, bit_sum mod 32=%d otp_map[i].size = %d\n",
+                       bit_offset, word_offset, bit_sum % 32, otp_map[i].size);
                if (bit_offset + otp_map[i].size - 1 >= 32) {
                        otp_val = (otp_mem[word_offset] &
                                        GENMASK(31, bit_offset)) >> bit_offset;
@@ -831,12 +831,14 @@ int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap)
                                        GENMASK(bit_offset + otp_map[i].size - 33, 0)) <<
                                        (32 - bit_offset);
                        bit_offset += otp_map[i].size - 32;
-               } else {
+               } else if (bit_offset + otp_map[i].size - 1 >= 0) {
                        otp_val = (otp_mem[word_offset] &
                                   GENMASK(bit_offset + otp_map[i].size - 1, bit_offset)
                                  ) >> bit_offset;
                        bit_offset += otp_map[i].size;
-               }
+               } else /* both bit_offset and otp_map[i].size are 0 */
+                       otp_val = 0;
+
                bit_sum += otp_map[i].size;
 
                if (bit_offset == 32) {
index 13009d08b09ac5ed3271ffdc66b60d7d57cd1b41..c7493549a9a506a538c6575d7b620eaed7d0436a 100644 (file)
@@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
        struct soc_mixer_control *mixer_ctrl =
                (struct soc_mixer_control *) kcontrol->private_value;
        unsigned int reg = mixer_ctrl->reg;
-       __le16 val;
+       __le16 val_new, val_old;
        int ret;
 
        /*
@@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
         * Therefore we need to convert to little endian here to align with
         * HW registers.
         */
-       val = cpu_to_le16(ucontrol->value.integer.value[0]);
+       val_new = cpu_to_le16(ucontrol->value.integer.value[0]);
 
        mutex_lock(&da7219->ctrl_lock);
-       ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val));
+       ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old));
+       if (ret == 0 && (val_old != val_new))
+               ret = regmap_raw_write(da7219->regmap, reg,
+                               &val_new, sizeof(val_new));
        mutex_unlock(&da7219->ctrl_lock);
 
-       return ret;
+       if (ret < 0)
+               return ret;
+
+       return val_old != val_new;
 }
 
 
index 6884ae505e33c150d5b1d8f161f98d4f6cb4c409..3143f9cd7277e4c81294ffe3014c95c866e1d32f 100644 (file)
@@ -3566,12 +3566,16 @@ static int rx_macro_probe(struct platform_device *pdev)
                return PTR_ERR(rx->pds);
 
        base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
+       if (IS_ERR(base)) {
+               ret = PTR_ERR(base);
+               goto err;
+       }
 
        rx->regmap = devm_regmap_init_mmio(dev, base, &rx_regmap_config);
-       if (IS_ERR(rx->regmap))
-               return PTR_ERR(rx->regmap);
+       if (IS_ERR(rx->regmap)) {
+               ret = PTR_ERR(rx->regmap);
+               goto err;
+       }
 
        dev_set_drvdata(dev, rx);
 
@@ -3632,6 +3636,8 @@ err_mclk:
 err_dcodec:
        clk_disable_unprepare(rx->macro);
 err:
+       lpass_macro_pds_exit(rx->pds);
+
        return ret;
 }
 
index 714a411d53379efff549329b3c5ca7a7666dd971..55503ba480bb6350fb482f12e24c77581d0bb16a 100644 (file)
@@ -1828,8 +1828,10 @@ static int tx_macro_probe(struct platform_device *pdev)
                return PTR_ERR(tx->pds);
 
        base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
+       if (IS_ERR(base)) {
+               ret = PTR_ERR(base);
+               goto err;
+       }
 
        /* Update defaults for lpass sc7280 */
        if (of_device_is_compatible(np, "qcom,sc7280-lpass-tx-macro")) {
@@ -1846,8 +1848,10 @@ static int tx_macro_probe(struct platform_device *pdev)
        }
 
        tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config);
-       if (IS_ERR(tx->regmap))
-               return PTR_ERR(tx->regmap);
+       if (IS_ERR(tx->regmap)) {
+               ret = PTR_ERR(tx->regmap);
+               goto err;
+       }
 
        dev_set_drvdata(dev, tx);
 
@@ -1907,6 +1911,8 @@ err_mclk:
 err_dcodec:
        clk_disable_unprepare(tx->macro);
 err:
+       lpass_macro_pds_exit(tx->pds);
+
        return ret;
 }
 
index f3cb596058e02ab680e5aafcc23710b71b0ea894..d18b56e6043305a3ae0e7670286727f6a97eb761 100644 (file)
@@ -1434,8 +1434,10 @@ static int va_macro_probe(struct platform_device *pdev)
                va->dmic_clk_div = VA_MACRO_CLK_DIV_2;
        } else {
                ret = va_macro_validate_dmic_sample_rate(sample_rate, va);
-               if (!ret)
-                       return -EINVAL;
+               if (!ret) {
+                       ret = -EINVAL;
+                       goto err;
+               }
        }
 
        base = devm_platform_ioremap_resource(pdev, 0);
@@ -1492,6 +1494,8 @@ err_mclk:
 err_dcodec:
        clk_disable_unprepare(va->macro);
 err:
+       lpass_macro_pds_exit(va->pds);
+
        return ret;
 }
 
index b45ec35cd63c3393548cd45cbb67eaf14bc1477d..62b41ca050a20cd037e96b681ddbe93c8e130df9 100644 (file)
@@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
 
        val = (val >> mc->shift) & mask;
 
+       if (sel < 0 || sel > mc->max)
+               return -EINVAL;
+
        *select = sel;
 
        /* Setting a volume is only valid if it is already On */
@@ -427,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
                mask << mc->shift,
                sel << mc->shift);
 
-       return 0;
+       return *select != val;
 }
 
 static const char *max98090_perf_pwr_text[] =
index 9ad7fc0baf072678b40063b96fa8450738b06d70..20a07c92b2fc29d5749c21453f04d0020bc823ff 100644 (file)
@@ -1206,9 +1206,16 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev)
 
        dev_set_drvdata(dev, priv);
 
-       return devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
+       ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
                                      msm8916_wcd_digital_dai,
                                      ARRAY_SIZE(msm8916_wcd_digital_dai));
+       if (ret)
+               goto err_mclk;
+
+       return 0;
+
+err_mclk:
+       clk_disable_unprepare(priv->mclk);
 err_clk:
        clk_disable_unprepare(priv->ahbclk);
        return ret;
index 8fffe378618d0ec3f946e3d3985c3d2887638aae..cce6f4e7992f552f31c1bec749596047c7b79258 100644 (file)
@@ -489,7 +489,7 @@ static int rk817_platform_probe(struct platform_device *pdev)
 
        rk817_codec_parse_dt_property(&pdev->dev, rk817_codec_data);
 
-       rk817_codec_data->mclk = clk_get(pdev->dev.parent, "mclk");
+       rk817_codec_data->mclk = devm_clk_get(pdev->dev.parent, "mclk");
        if (IS_ERR(rk817_codec_data->mclk)) {
                dev_dbg(&pdev->dev, "Unable to get mclk\n");
                ret = -ENXIO;
index be68d573a4906406ac3ebebef450ede8a21eef63..2b6c6d6b9771e09b8e776f0c54db33890b57e543 100644 (file)
@@ -1100,6 +1100,15 @@ void rt5682_jack_detect_handler(struct work_struct *work)
                return;
        }
 
+       if (rt5682->is_sdw) {
+               if (pm_runtime_status_suspended(rt5682->slave->dev.parent)) {
+                       dev_dbg(&rt5682->slave->dev,
+                               "%s: parent device is pm_runtime_status_suspended, skipping jack detection\n",
+                               __func__);
+                       return;
+               }
+       }
+
        dapm = snd_soc_component_get_dapm(rt5682->component);
 
        snd_soc_dapm_mutex_lock(dapm);
@@ -2822,14 +2831,11 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate,
 
        for_each_component_dais(component, dai)
                if (dai->id == RT5682_AIF1)
-                       break;
-       if (!dai) {
-               dev_err(rt5682->i2c_dev, "dai %d not found in component\n",
-                       RT5682_AIF1);
-               return -ENODEV;
-       }
+                       return rt5682_set_bclk1_ratio(dai, factor);
 
-       return rt5682_set_bclk1_ratio(dai, factor);
+       dev_err(rt5682->i2c_dev, "dai %d not found in component\n",
+               RT5682_AIF1);
+       return -ENODEV;
 }
 
 static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = {
index 1cba8ec7cedb915e47ea8ec2d0acc7085fa0dd99..b55f3ac3a2673e9467cf83cbaf3651780757df6d 100644 (file)
@@ -2687,14 +2687,11 @@ static int rt5682s_bclk_set_rate(struct clk_hw *hw, unsigned long rate,
 
        for_each_component_dais(component, dai)
                if (dai->id == RT5682S_AIF1)
-                       break;
-       if (!dai) {
-               dev_err(component->dev, "dai %d not found in component\n",
-                       RT5682S_AIF1);
-               return -ENODEV;
-       }
+                       return rt5682s_set_bclk1_ratio(dai, factor);
 
-       return rt5682s_set_bclk1_ratio(dai, factor);
+       dev_err(component->dev, "dai %d not found in component\n",
+               RT5682S_AIF1);
+       return -ENODEV;
 }
 
 static const struct clk_ops rt5682s_dai_clk_ops[RT5682S_DAI_NUM_CLKS] = {
index 6770825d037a8a73a7cd43e80805acb834f6c8a4..ea25fd58d43a987cc9fa264497c7fd76bb6005d6 100644 (file)
@@ -245,6 +245,13 @@ static void rt711_jack_detect_handler(struct work_struct *work)
        if (!rt711->component->card->instantiated)
                return;
 
+       if (pm_runtime_status_suspended(rt711->slave->dev.parent)) {
+               dev_dbg(&rt711->slave->dev,
+                       "%s: parent device is pm_runtime_status_suspended, skipping jack detection\n",
+                       __func__);
+               return;
+       }
+
        reg = RT711_VERB_GET_PIN_SENSE | RT711_HP_OUT;
        ret = regmap_read(rt711->regmap, reg, &jack_status);
        if (ret < 0)
index 7aa1772a915f322b7e3a0fce8fcfdba1a4458a2e..6e0d7cf0c8c92647f41e6cffa591ad20c719405e 100644 (file)
@@ -341,7 +341,6 @@ static int rt9120_get_reg_size(unsigned int reg)
 {
        switch (reg) {
        case 0x00:
-       case 0x09:
        case 0x20 ... 0x27:
                return 2;
        case 0x30 ... 0x3D:
index 1e75e93cf28f285657a84f55bb5d9104ceba512a..6298ebe96e941a2bf31d7e0cbb1e861a0c6d3293 100644 (file)
@@ -1274,29 +1274,7 @@ static int wcd934x_set_sido_input_src(struct wcd934x_codec *wcd, int sido_src)
        if (sido_src == wcd->sido_input_src)
                return 0;
 
-       if (sido_src == SIDO_SOURCE_INTERNAL) {
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-                                  WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, 0);
-               usleep_range(100, 110);
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-                                  WCD934X_ANA_BUCK_HI_ACCU_PRE_ENX_MASK, 0x0);
-               usleep_range(100, 110);
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
-                                  WCD934X_ANA_RCO_BG_EN_MASK, 0);
-               usleep_range(100, 110);
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-                                  WCD934X_ANA_BUCK_PRE_EN1_MASK,
-                                  WCD934X_ANA_BUCK_PRE_EN1_ENABLE);
-               usleep_range(100, 110);
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-                                  WCD934X_ANA_BUCK_PRE_EN2_MASK,
-                                  WCD934X_ANA_BUCK_PRE_EN2_ENABLE);
-               usleep_range(100, 110);
-               regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
-                                  WCD934X_ANA_BUCK_HI_ACCU_EN_MASK,
-                                  WCD934X_ANA_BUCK_HI_ACCU_ENABLE);
-               usleep_range(100, 110);
-       } else if (sido_src == SIDO_SOURCE_RCO_BG) {
+       if (sido_src == SIDO_SOURCE_RCO_BG) {
                regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
                                   WCD934X_ANA_RCO_BG_EN_MASK,
                                   WCD934X_ANA_RCO_BG_ENABLE);
@@ -1382,8 +1360,6 @@ static int wcd934x_disable_ana_bias_and_syclk(struct wcd934x_codec *wcd)
        regmap_update_bits(wcd->regmap, WCD934X_CLK_SYS_MCLK_PRG,
                           WCD934X_EXT_CLK_BUF_EN_MASK |
                           WCD934X_MCLK_EN_MASK, 0x0);
-       wcd934x_set_sido_input_src(wcd, SIDO_SOURCE_INTERNAL);
-
        regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
                           WCD934X_ANA_BIAS_EN_MASK, 0);
        regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
index 5d4949c2ec9baba2fe218138a7a2bc921d5cf8a5..b14c6d104e6d99ca73a84779aabdd6d260c01999 100644 (file)
@@ -602,7 +602,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
        ret = wm8731_reset(wm8731->regmap);
        if (ret < 0) {
                dev_err(dev, "Failed to issue reset: %d\n", ret);
-               goto err_regulator_enable;
+               goto err;
        }
 
        /* Clear POWEROFF, keep everything else disabled */
@@ -619,10 +619,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
 
        regcache_mark_dirty(wm8731->regmap);
 
-err_regulator_enable:
-       /* Regulators will be enabled by bias management */
-       regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
-
+err:
        return ret;
 }
 
@@ -760,21 +757,27 @@ static int wm8731_i2c_probe(struct i2c_client *i2c,
                ret = PTR_ERR(wm8731->regmap);
                dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
                        ret);
-               return ret;
+               goto err_regulator_enable;
        }
 
        ret = wm8731_hw_init(&i2c->dev, wm8731);
        if (ret != 0)
-               return ret;
+               goto err_regulator_enable;
 
        ret = devm_snd_soc_register_component(&i2c->dev,
                        &soc_component_dev_wm8731, &wm8731_dai, 1);
        if (ret != 0) {
                dev_err(&i2c->dev, "Failed to register CODEC: %d\n", ret);
-               return ret;
+               goto err_regulator_enable;
        }
 
        return 0;
+
+err_regulator_enable:
+       /* Regulators will be enabled by bias management */
+       regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
+
+       return ret;
 }
 
 static const struct i2c_device_id wm8731_i2c_id[] = {
index e4018ba3b19a2ccfac18d85b0a0addfd35adcd3c..7878c7a58ff10165a5834280255f8cfe66e043fe 100644 (file)
@@ -530,7 +530,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
 
        wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]);
 
-       return 0;
+       return 1;
 }
 
 #define WM8958_MBC_SWITCH(xname, xval) {\
@@ -656,7 +656,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
 
        wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]);
 
-       return 0;
+       return 1;
 }
 
 
@@ -730,7 +730,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
 
        wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]);
 
-       return 0;
+       return 1;
 }
 
 #define WM8958_HPF_SWITCH(xname, xval) {\
@@ -824,7 +824,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol,
 
        wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]);
 
-       return 0;
+       return 1;
 }
 
 #define WM8958_ENH_EQ_SWITCH(xname, xval) {\
index 4650a6931a946cc89576e9c7833ad52a6391d7f0..ffc24afb5a7acc2a9ded4c5730421c9044b5030d 100644 (file)
@@ -372,7 +372,7 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
                        continue;
                if (ratio == 1 && !support_1_1_ratio)
                        continue;
-               else if (ratio & 1)
+               if ((ratio & 1) && ratio > 1)
                        continue;
 
                diff = abs((long)clk_rate - ratio * freq);
index 8e037835bc583f4d7e0a83119ad935bf2d9d242a..da0c27828ce60f5f02b92cfdc58a6739408a9406 100644 (file)
@@ -322,7 +322,7 @@ void asoc_simple_shutdown(struct snd_pcm_substream *substream)
 
                if (props->mclk_fs && !dai->clk_fixed && !snd_soc_dai_active(cpu_dai))
                        snd_soc_dai_set_sysclk(cpu_dai,
-                                              0, 0, SND_SOC_CLOCK_IN);
+                                              0, 0, SND_SOC_CLOCK_OUT);
 
                asoc_simple_clk_disable(dai);
        }
@@ -364,13 +364,15 @@ static int asoc_simple_set_tdm(struct snd_soc_dai *dai,
                                struct snd_pcm_hw_params *params)
 {
        int sample_bits = params_width(params);
-       int slot_width = simple_dai->slot_width;
-       int slot_count = simple_dai->slots;
+       int slot_width, slot_count;
        int i, ret;
 
        if (!simple_dai || !simple_dai->tdm_width_map)
                return 0;
 
+       slot_width = simple_dai->slot_width;
+       slot_count = simple_dai->slots;
+
        if (slot_width == 0)
                slot_width = sample_bits;
 
index 5e0529aa4f1d2880a4058ddd9b267402dccbdab8..9d617831dd206e2a6c60e07c93734d03ae3821a5 100644 (file)
 #define SOF_ES8336_SSP_CODEC(quirk)            ((quirk) & GENMASK(3, 0))
 #define SOF_ES8336_SSP_CODEC_MASK              (GENMASK(3, 0))
 
-#define SOF_ES8336_TGL_GPIO_QUIRK              BIT(4)
+#define SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK     BIT(4)
 #define SOF_ES8336_ENABLE_DMIC                 BIT(5)
 #define SOF_ES8336_JD_INVERTED                 BIT(6)
+#define SOF_ES8336_HEADPHONE_GPIO              BIT(7)
+#define SOC_ES8336_HEADSET_MIC1                        BIT(8)
 
 static unsigned long quirk;
 
@@ -39,7 +41,7 @@ MODULE_PARM_DESC(quirk, "Board-specific quirk override");
 
 struct sof_es8336_private {
        struct device *codec_dev;
-       struct gpio_desc *gpio_pa;
+       struct gpio_desc *gpio_speakers, *gpio_headphone;
        struct snd_soc_jack jack;
        struct list_head hdmi_pcm_list;
        bool speaker_en;
@@ -51,19 +53,31 @@ struct sof_hdmi_pcm {
        int device;
 };
 
-static const struct acpi_gpio_params pa_enable_gpio = { 0, 0, true };
-static const struct acpi_gpio_mapping acpi_es8336_gpios[] = {
-       { "pa-enable-gpios", &pa_enable_gpio, 1 },
+static const struct acpi_gpio_params enable_gpio0 = { 0, 0, true };
+static const struct acpi_gpio_params enable_gpio1 = { 1, 0, true };
+
+static const struct acpi_gpio_mapping acpi_speakers_enable_gpio0[] = {
+       { "speakers-enable-gpios", &enable_gpio0, 1 },
        { }
 };
 
-static const struct acpi_gpio_params quirk_pa_enable_gpio = { 1, 0, true };
-static const struct acpi_gpio_mapping quirk_acpi_es8336_gpios[] = {
-       { "pa-enable-gpios", &quirk_pa_enable_gpio, 1 },
+static const struct acpi_gpio_mapping acpi_speakers_enable_gpio1[] = {
+       { "speakers-enable-gpios", &enable_gpio1, 1 },
+};
+
+static const struct acpi_gpio_mapping acpi_enable_both_gpios[] = {
+       { "speakers-enable-gpios", &enable_gpio0, 1 },
+       { "headphone-enable-gpios", &enable_gpio1, 1 },
        { }
 };
 
-static const struct acpi_gpio_mapping *gpio_mapping = acpi_es8336_gpios;
+static const struct acpi_gpio_mapping acpi_enable_both_gpios_rev_order[] = {
+       { "speakers-enable-gpios", &enable_gpio1, 1 },
+       { "headphone-enable-gpios", &enable_gpio0, 1 },
+       { }
+};
+
+static const struct acpi_gpio_mapping *gpio_mapping = acpi_speakers_enable_gpio0;
 
 static void log_quirks(struct device *dev)
 {
@@ -71,10 +85,14 @@ static void log_quirks(struct device *dev)
        dev_info(dev, "quirk SSP%ld\n",  SOF_ES8336_SSP_CODEC(quirk));
        if (quirk & SOF_ES8336_ENABLE_DMIC)
                dev_info(dev, "quirk DMIC enabled\n");
-       if (quirk & SOF_ES8336_TGL_GPIO_QUIRK)
-               dev_info(dev, "quirk TGL GPIO enabled\n");
+       if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+               dev_info(dev, "Speakers GPIO1 quirk enabled\n");
+       if (quirk & SOF_ES8336_HEADPHONE_GPIO)
+               dev_info(dev, "quirk headphone GPIO enabled\n");
        if (quirk & SOF_ES8336_JD_INVERTED)
                dev_info(dev, "quirk JD inverted enabled\n");
+       if (quirk & SOC_ES8336_HEADSET_MIC1)
+               dev_info(dev, "quirk headset at mic1 port enabled\n");
 }
 
 static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w,
@@ -83,12 +101,23 @@ static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w,
        struct snd_soc_card *card = w->dapm->card;
        struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card);
 
+       if (priv->speaker_en == !SND_SOC_DAPM_EVENT_ON(event))
+               return 0;
+
+       priv->speaker_en = !SND_SOC_DAPM_EVENT_ON(event);
+
+       if (SND_SOC_DAPM_EVENT_ON(event))
+               msleep(70);
+
+       gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en);
+
+       if (!(quirk & SOF_ES8336_HEADPHONE_GPIO))
+               return 0;
+
        if (SND_SOC_DAPM_EVENT_ON(event))
-               priv->speaker_en = false;
-       else
-               priv->speaker_en = true;
+               msleep(70);
 
-       gpiod_set_value_cansleep(priv->gpio_pa, priv->speaker_en);
+       gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en);
 
        return 0;
 }
@@ -114,18 +143,23 @@ static const struct snd_soc_dapm_route sof_es8316_audio_map[] = {
 
        /*
         * There is no separate speaker output instead the speakers are muxed to
-        * the HP outputs. The mux is controlled by the "Speaker Power" supply.
+        * the HP outputs. The mux is controlled Speaker and/or headphone switch.
         */
        {"Speaker", NULL, "HPOL"},
        {"Speaker", NULL, "HPOR"},
        {"Speaker", NULL, "Speaker Power"},
 };
 
-static const struct snd_soc_dapm_route sof_es8316_intmic_in1_map[] = {
+static const struct snd_soc_dapm_route sof_es8316_headset_mic2_map[] = {
        {"MIC1", NULL, "Internal Mic"},
        {"MIC2", NULL, "Headset Mic"},
 };
 
+static const struct snd_soc_dapm_route sof_es8316_headset_mic1_map[] = {
+       {"MIC2", NULL, "Internal Mic"},
+       {"MIC1", NULL, "Headset Mic"},
+};
+
 static const struct snd_soc_dapm_route dmic_map[] = {
        /* digital mics */
        {"DMic", NULL, "SoC DMIC"},
@@ -199,8 +233,13 @@ static int sof_es8316_init(struct snd_soc_pcm_runtime *runtime)
 
        card->dapm.idle_bias_off = true;
 
-       custom_map = sof_es8316_intmic_in1_map;
-       num_routes = ARRAY_SIZE(sof_es8316_intmic_in1_map);
+       if (quirk & SOC_ES8336_HEADSET_MIC1) {
+               custom_map = sof_es8316_headset_mic1_map;
+               num_routes = ARRAY_SIZE(sof_es8316_headset_mic1_map);
+       } else {
+               custom_map = sof_es8316_headset_mic2_map;
+               num_routes = ARRAY_SIZE(sof_es8316_headset_mic2_map);
+       }
 
        ret = snd_soc_dapm_add_routes(&card->dapm, custom_map, num_routes);
        if (ret)
@@ -233,8 +272,14 @@ static int sof_es8336_quirk_cb(const struct dmi_system_id *id)
 {
        quirk = (unsigned long)id->driver_data;
 
-       if (quirk & SOF_ES8336_TGL_GPIO_QUIRK)
-               gpio_mapping = quirk_acpi_es8336_gpios;
+       if (quirk & SOF_ES8336_HEADPHONE_GPIO) {
+               if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+                       gpio_mapping = acpi_enable_both_gpios;
+               else
+                       gpio_mapping = acpi_enable_both_gpios_rev_order;
+       } else if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK) {
+               gpio_mapping = acpi_speakers_enable_gpio1;
+       }
 
        return 1;
 }
@@ -257,7 +302,16 @@ static const struct dmi_system_id sof_es8336_quirk_table[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "IP3 tech"),
                        DMI_MATCH(DMI_BOARD_NAME, "WN1"),
                },
-               .driver_data = (void *)(SOF_ES8336_TGL_GPIO_QUIRK)
+               .driver_data = (void *)(SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+       },
+       {
+               .callback = sof_es8336_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "HUAWEI"),
+                       DMI_MATCH(DMI_BOARD_NAME, "BOHB-WAX9-PCB-B2"),
+               },
+               .driver_data = (void *)(SOF_ES8336_HEADPHONE_GPIO |
+                                       SOC_ES8336_HEADSET_MIC1)
        },
        {}
 };
@@ -585,10 +639,17 @@ static int sof_es8336_probe(struct platform_device *pdev)
        if (ret)
                dev_warn(codec_dev, "unable to add GPIO mapping table\n");
 
-       priv->gpio_pa = gpiod_get_optional(codec_dev, "pa-enable", GPIOD_OUT_LOW);
-       if (IS_ERR(priv->gpio_pa)) {
-               ret = dev_err_probe(dev, PTR_ERR(priv->gpio_pa),
-                                   "could not get pa-enable GPIO\n");
+       priv->gpio_speakers = gpiod_get_optional(codec_dev, "speakers-enable", GPIOD_OUT_LOW);
+       if (IS_ERR(priv->gpio_speakers)) {
+               ret = dev_err_probe(dev, PTR_ERR(priv->gpio_speakers),
+                                   "could not get speakers-enable GPIO\n");
+               goto err_put_codec;
+       }
+
+       priv->gpio_headphone = gpiod_get_optional(codec_dev, "headphone-enable", GPIOD_OUT_LOW);
+       if (IS_ERR(priv->gpio_headphone)) {
+               ret = dev_err_probe(dev, PTR_ERR(priv->gpio_headphone),
+                                   "could not get headphone-enable GPIO\n");
                goto err_put_codec;
        }
 
@@ -604,7 +665,7 @@ static int sof_es8336_probe(struct platform_device *pdev)
 
        ret = devm_snd_soc_register_card(dev, card);
        if (ret) {
-               gpiod_put(priv->gpio_pa);
+               gpiod_put(priv->gpio_speakers);
                dev_err(dev, "snd_soc_register_card failed: %d\n", ret);
                goto err_put_codec;
        }
@@ -622,7 +683,7 @@ static int sof_es8336_remove(struct platform_device *pdev)
        struct snd_soc_card *card = platform_get_drvdata(pdev);
        struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card);
 
-       gpiod_put(priv->gpio_pa);
+       gpiod_put(priv->gpio_speakers);
        device_remove_software_node(priv->codec_dev);
        put_device(priv->codec_dev);
 
index ebec4d15edaac945e5197512719f33a3aad88961..7126fcb63d9040f121800f00a84ae9f15d3e3664 100644 (file)
@@ -212,6 +212,19 @@ static const struct dmi_system_id sof_rt5682_quirk_table[] = {
                                        SOF_SSP_BT_OFFLOAD_PRESENT),
 
        },
+       {
+               .callback = sof_rt5682_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"),
+                       DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98360_ALC5682I_I2S_AMP_SSP2"),
+               },
+               .driver_data = (void *)(SOF_RT5682_MCLK_EN |
+                                       SOF_RT5682_SSP_CODEC(0) |
+                                       SOF_SPEAKER_AMP_PRESENT |
+                                       SOF_MAX98360A_SPEAKER_AMP_PRESENT |
+                                       SOF_RT5682_SSP_AMP(2) |
+                                       SOF_RT5682_NUM_HDMIDEV(4)),
+       },
        {}
 };
 
index 6edc9b7108cd58af79091eb5a610accdfb7ac64f..ef19150e7b2e9a72b10b568aa7a62d3fdb8ecdc4 100644 (file)
@@ -132,13 +132,13 @@ static const struct snd_soc_acpi_adr_device mx8373_1_adr[] = {
        {
                .adr = 0x000123019F837300ull,
                .num_endpoints = 1,
-               .endpoints = &spk_l_endpoint,
+               .endpoints = &spk_r_endpoint,
                .name_prefix = "Right"
        },
        {
                .adr = 0x000127019F837300ull,
                .num_endpoints = 1,
-               .endpoints = &spk_r_endpoint,
+               .endpoints = &spk_l_endpoint,
                .name_prefix = "Left"
        }
 };
index 27a6d3259c50ad6fa26a913cf7b5b45b8c186d97..3776b073a3dbb090333bdb495ca128ec265f17c9 100644 (file)
@@ -58,7 +58,7 @@ static int aiu_acodec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
 
        snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
 
-       return 0;
+       return 1;
 }
 
 static SOC_ENUM_SINGLE_DECL(aiu_acodec_ctrl_mux_enum, AIU_ACODEC_CTRL,
@@ -193,6 +193,9 @@ static const struct snd_soc_component_driver aiu_acodec_ctrl_component = {
        .of_xlate_dai_name      = aiu_acodec_of_xlate_dai_name,
        .endianness             = 1,
        .non_legacy_dai_naming  = 1,
+#ifdef CONFIG_DEBUG_FS
+       .debugfs_prefix         = "acodec",
+#endif
 };
 
 int aiu_acodec_ctrl_register_component(struct device *dev)
index c3ea733fce91fdbe79c05bc4ce53a1aed69a95ea..286ac4983d40c22589fdd713689a4ed6700bd34b 100644 (file)
@@ -57,7 +57,7 @@ static int aiu_codec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
 
        snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
 
-       return 0;
+       return 1;
 }
 
 static SOC_ENUM_SINGLE_DECL(aiu_hdmi_ctrl_mux_enum, AIU_HDMI_CLK_DATA_CTRL,
@@ -140,6 +140,9 @@ static const struct snd_soc_component_driver aiu_hdmi_ctrl_component = {
        .of_xlate_dai_name      = aiu_hdmi_of_xlate_dai_name,
        .endianness             = 1,
        .non_legacy_dai_naming  = 1,
+#ifdef CONFIG_DEBUG_FS
+       .debugfs_prefix         = "hdmi",
+#endif
 };
 
 int aiu_hdmi_ctrl_register_component(struct device *dev)
index d299a70db7e598300ec2aaac09101806ea0b305e..88e611e64d14f54fcd10caa5c5c0e49f43dec680 100644 (file)
@@ -103,6 +103,9 @@ static const struct snd_soc_component_driver aiu_cpu_component = {
        .pointer                = aiu_fifo_pointer,
        .probe                  = aiu_cpu_component_probe,
        .remove                 = aiu_cpu_component_remove,
+#ifdef CONFIG_DEBUG_FS
+       .debugfs_prefix         = "cpu",
+#endif
 };
 
 static struct snd_soc_dai_driver aiu_cpu_dai_drv[] = {
index cbbaa55d92a662c6f43ba90b7d0f1a508899e0eb..2b77010c2c5ce8398c330e8c8e13cc9248daa828 100644 (file)
@@ -320,7 +320,6 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np,
 
        dai_link->cpus = cpu;
        dai_link->num_cpus = 1;
-       dai_link->nonatomic = true;
 
        ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node,
                                   &dai_link->cpus->dai_name);
index 0c31934a96301b4b3b0fa5c6ba88051694f89515..e076ced30025741af2ad13d0574ba1ab15ff6777 100644 (file)
@@ -351,29 +351,13 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream,
        return 0;
 }
 
-static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream,
-                                int cmd,
+static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream,
                                 struct snd_soc_dai *dai)
 {
-       struct axg_tdm_stream *ts =
-               snd_soc_dai_get_dma_data(dai, substream);
-
-       switch (cmd) {
-       case SNDRV_PCM_TRIGGER_START:
-       case SNDRV_PCM_TRIGGER_RESUME:
-       case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-               axg_tdm_stream_start(ts);
-               break;
-       case SNDRV_PCM_TRIGGER_SUSPEND:
-       case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-       case SNDRV_PCM_TRIGGER_STOP:
-               axg_tdm_stream_stop(ts);
-               break;
-       default:
-               return -EINVAL;
-       }
+       struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream);
 
-       return 0;
+       /* Force all attached formatters to update */
+       return axg_tdm_stream_reset(ts);
 }
 
 static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai)
@@ -413,8 +397,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = {
        .set_fmt        = axg_tdm_iface_set_fmt,
        .startup        = axg_tdm_iface_startup,
        .hw_params      = axg_tdm_iface_hw_params,
+       .prepare        = axg_tdm_iface_prepare,
        .hw_free        = axg_tdm_iface_hw_free,
-       .trigger        = axg_tdm_iface_trigger,
 };
 
 /* TDM Backend DAIs */
index 9b2b59536ced042b82ab328be9e2f873bf38d77d..6c99052feafd89048f167e19700761e47dcdbbe8 100644 (file)
@@ -67,7 +67,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol,
 
        snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
 
-       return 0;
+       return 1;
 }
 
 static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_i2s_mux_enum, TOHDMITX_CTRL0,
index ce153ac2c3ab6b95a307546596f14915f948391c..8c7da82a62cab057e5f532e3e3e79059480d3a9f 100644 (file)
@@ -2587,6 +2587,11 @@ int snd_soc_component_initialize(struct snd_soc_component *component,
        component->dev          = dev;
        component->driver       = driver;
 
+#ifdef CONFIG_DEBUG_FS
+       if (!component->debugfs_prefix)
+               component->debugfs_prefix = driver->debugfs_prefix;
+#endif
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(snd_soc_component_initialize);
index b435b5c4cfb7d43af4835b9694eece04f0fe31fa..ca917a849c423de2348542547868f0eccac1964d 100644 (file)
@@ -1687,8 +1687,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
                switch (w->id) {
                case snd_soc_dapm_pre:
                        if (!w->event)
-                               list_for_each_entry_safe_continue(w, n, list,
-                                                                 power_list);
+                               continue;
 
                        if (event == SND_SOC_DAPM_STREAM_START)
                                ret = w->event(w,
@@ -1700,8 +1699,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
 
                case snd_soc_dapm_post:
                        if (!w->event)
-                               list_for_each_entry_safe_continue(w, n, list,
-                                                                 power_list);
+                               continue;
 
                        if (event == SND_SOC_DAPM_STREAM_START)
                                ret = w->event(w,
index 2ab2ddc1294dd51e4f52d54fbfb5564c726ceba5..285441d6aeed475044901c414fe507b745386475 100644 (file)
@@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component,
 
        memset(&slave_config, 0, sizeof(slave_config));
 
-       if (pcm->config && pcm->config->prepare_slave_config)
-               prepare_slave_config = pcm->config->prepare_slave_config;
-       else
+       if (!pcm->config)
                prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config;
+       else
+               prepare_slave_config = pcm->config->prepare_slave_config;
 
        if (prepare_slave_config) {
                int ret = prepare_slave_config(substream, params, &slave_config);
index a0ca58ba16273b3767107512ede7cc214b1495cc..e693070f51fe8113dd5a6d57c378ccb7d460e302 100644 (file)
@@ -461,7 +461,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
                        ret = err;
                }
        }
-       return err;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx);
 
@@ -519,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
        unsigned int mask = (1 << fls(max)) - 1;
        unsigned int invert = mc->invert;
        unsigned int val, val_mask;
-       int err, ret;
+       int err, ret, tmp;
+
+       tmp = ucontrol->value.integer.value[0];
+       if (tmp < 0)
+               return -EINVAL;
+       if (mc->platform_max && tmp > mc->platform_max)
+               return -EINVAL;
+       if (tmp > mc->max - mc->min + 1)
+               return -EINVAL;
 
        if (invert)
                val = (max - ucontrol->value.integer.value[0]) & mask;
@@ -534,6 +542,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
        ret = err;
 
        if (snd_soc_volsw_is_stereo(mc)) {
+               tmp = ucontrol->value.integer.value[1];
+               if (tmp < 0)
+                       return -EINVAL;
+               if (mc->platform_max && tmp > mc->platform_max)
+                       return -EINVAL;
+               if (tmp > mc->max - mc->min + 1)
+                       return -EINVAL;
+
                if (invert)
                        val = (max - ucontrol->value.integer.value[1]) & mask;
                else
index 9a954680d492890133dfcb3ecb3e4fe4433262a3..11c9853e9e80775dc9801290e4f0ef2f9d7b7e94 100644 (file)
@@ -1214,7 +1214,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
                be_substream->pcm->nonatomic = 1;
        }
 
-       dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_ATOMIC);
+       dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_KERNEL);
        if (!dpcm)
                return -ENOMEM;
 
index 72e50df7052c3a835c8a0e90f7892e7cd9d1b071..3bb90a8196504b67ddd87d9a8a3df4a01fff3fbb 100644 (file)
@@ -1436,12 +1436,12 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
        template.num_kcontrols = le32_to_cpu(w->num_kcontrols);
        kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL);
        if (!kc)
-               goto err;
+               goto hdr_err;
 
        kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int),
                                     GFP_KERNEL);
        if (!kcontrol_type)
-               goto err;
+               goto hdr_err;
 
        for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) {
                control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos;
index 4c95967428444e5ac3a5a3c97e6817d27dd11ec9..7fa2649e56e57c6c330606c5f5396bd35b65899d 100644 (file)
@@ -83,7 +83,14 @@ static const struct dmi_system_id sof_tplg_table[] = {
                },
                .driver_data = "sof-adl-max98357a-rt5682-2way.tplg",
        },
-
+       {
+               .callback = sof_tplg_cb,
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"),
+                       DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98360_ALC5682I_I2S_AMP_SSP2"),
+               },
+               .driver_data = "sof-adl-max98357a-rt5682.tplg",
+       },
        {}
 };
 
@@ -146,6 +153,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 
        dev_dbg(&pci->dev, "PCI DSP detected");
 
+       if (!desc) {
+               dev_err(dev, "error: no matching PCI descriptor\n");
+               return -ENODEV;
+       }
+
        if (!desc->ops) {
                dev_err(dev, "error: no matching PCI descriptor ops\n");
                return -ENODEV;
index 9b11e9795a7a04378cfba08323991bad053ac166..3e5b319b44c754bfd567d8c4ce159db6b6335b1c 100644 (file)
@@ -904,8 +904,10 @@ static int sof_control_load(struct snd_soc_component *scomp, int index,
                return -ENOMEM;
 
        scontrol->name = kstrdup(hdr->name, GFP_KERNEL);
-       if (!scontrol->name)
+       if (!scontrol->name) {
+               kfree(scontrol);
                return -ENOMEM;
+       }
 
        scontrol->scomp = scomp;
        scontrol->access = kc->access;
@@ -941,11 +943,13 @@ static int sof_control_load(struct snd_soc_component *scomp, int index,
        default:
                dev_warn(scomp->dev, "control type not supported %d:%d:%d\n",
                         hdr->ops.get, hdr->ops.put, hdr->ops.info);
+               kfree(scontrol->name);
                kfree(scontrol);
                return 0;
        }
 
        if (ret < 0) {
+               kfree(scontrol->name);
                kfree(scontrol);
                return ret;
        }
@@ -1068,6 +1072,46 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp,
        return 0;
 }
 
+static void sof_disconnect_dai_widget(struct snd_soc_component *scomp,
+                                     struct snd_soc_dapm_widget *w)
+{
+       struct snd_soc_card *card = scomp->card;
+       struct snd_soc_pcm_runtime *rtd;
+       struct snd_soc_dai *cpu_dai;
+       int i;
+
+       if (!w->sname)
+               return;
+
+       list_for_each_entry(rtd, &card->rtd_list, list) {
+               /* does stream match DAI link ? */
+               if (!rtd->dai_link->stream_name ||
+                   strcmp(w->sname, rtd->dai_link->stream_name))
+                       continue;
+
+               switch (w->id) {
+               case snd_soc_dapm_dai_out:
+                       for_each_rtd_cpu_dais(rtd, i, cpu_dai) {
+                               if (cpu_dai->capture_widget == w) {
+                                       cpu_dai->capture_widget = NULL;
+                                       break;
+                               }
+                       }
+                       break;
+               case snd_soc_dapm_dai_in:
+                       for_each_rtd_cpu_dais(rtd, i, cpu_dai) {
+                               if (cpu_dai->playback_widget == w) {
+                                       cpu_dai->playback_widget = NULL;
+                                       break;
+                               }
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
 /* bind PCM ID to host component ID */
 static int spcm_bind(struct snd_soc_component *scomp, struct snd_sof_pcm *spcm,
                     int dir)
@@ -1353,6 +1397,9 @@ static int sof_widget_unload(struct snd_soc_component *scomp,
 
                if (dai)
                        list_del(&dai->list);
+
+               sof_disconnect_dai_widget(scomp, widget);
+
                break;
        default:
                break;
@@ -1380,6 +1427,7 @@ static int sof_widget_unload(struct snd_soc_component *scomp,
                }
                kfree(scontrol->ipc_control_data);
                list_del(&scontrol->list);
+               kfree(scontrol->name);
                kfree(scontrol);
        }
 
index 2c01649c70f619d6e9994e81bdc16c171bb6e471..7c6ca2b433a53ee718b093b768e2e442ac9f6c3a 100644 (file)
@@ -1194,6 +1194,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream)
                } while (drain_urbs && timeout);
                finish_wait(&ep->drain_wait, &wait);
        }
+       port->active = 0;
        spin_unlock_irq(&ep->buffer_lock);
 }
 
index 64f5544d0a0aa624f01f781c143db72865f353dc..7ef7a8abcc2b11ac0eadeaeda7676ffafa5c01f9 100644 (file)
@@ -599,6 +599,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .id = USB_ID(0x0db0, 0x419c),
                .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
        },
+       {       /* MSI MAG X570S Torpedo Max */
+               .id = USB_ID(0x0db0, 0xa073),
+               .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
+       },
        {       /* MSI TRX40 */
                .id = USB_ID(0x0db0, 0x543d),
                .map = trx40_mobo_map,
index cec6e91afea2403cc4fd56580c07a01291be3b26..6d699065e81a21bae12505f929131eb74cce7961 100644 (file)
@@ -669,9 +669,9 @@ static const struct snd_pcm_hardware snd_usb_hardware =
                                SNDRV_PCM_INFO_PAUSE,
        .channels_min =         1,
        .channels_max =         256,
-       .buffer_bytes_max =     1024 * 1024,
+       .buffer_bytes_max =     INT_MAX, /* limited by BUFFER_TIME later */
        .period_bytes_min =     64,
-       .period_bytes_max =     512 * 1024,
+       .period_bytes_max =     INT_MAX, /* limited by PERIOD_TIME later */
        .periods_min =          2,
        .periods_max =          1024,
 };
@@ -1064,6 +1064,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
                        return err;
        }
 
+       /* set max period and buffer sizes for 1 and 2 seconds, respectively */
+       err = snd_pcm_hw_constraint_minmax(runtime,
+                                          SNDRV_PCM_HW_PARAM_PERIOD_TIME,
+                                          0, 1000000);
+       if (err < 0)
+               return err;
+       err = snd_pcm_hw_constraint_minmax(runtime,
+                                          SNDRV_PCM_HW_PARAM_BUFFER_TIME,
+                                          0, 2000000);
+       if (err < 0)
+               return err;
+
        /* additional hw constraints for implicit fb */
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
                                  hw_rule_format_implicit_fb, subs,
index 0ea39565e62329c98bf283bcff25826c0fd0a11b..40a5e3eb4ef262ec0b589b3bf0c5aa7f1ed0632a 100644 (file)
@@ -3235,6 +3235,15 @@ YAMAHA_DEVICE(0x7010, "UB99"),
        }
 },
 
+/* Rane SL-1 */
+{
+       USB_DEVICE(0x13e5, 0x0001),
+       .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_AUDIO_STANDARD_INTERFACE
+        }
+},
+
 /* disabled due to regression for other devices;
  * see https://bugzilla.kernel.org/show_bug.cgi?id=199905
  */
index ab9f3da49941fc97f715e9270044f1cdd12ff291..fbbe59054c3fb1da77878095f8d74319d9bced41 100644 (file)
@@ -1822,6 +1822,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
        DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
+       DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
        DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */
                   QUIRK_FLAG_GET_SAMPLE_RATE),
        DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
index 167834133b9bc8b81a8333f32938fceba6eb7810..b8359a0aa008a0a9f5dfcffb232cbe46ff2094d6 100644 (file)
@@ -8,7 +8,7 @@
  */
 
 /* handling of USB vendor/product ID pairs as 32-bit numbers */
-#define USB_ID(vendor, product) (((vendor) << 16) | (product))
+#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product))
 #define USB_ID_VENDOR(id) ((id) >> 16)
 #define USB_ID_PRODUCT(id) ((u16)(id))
 
index b00634663346b99a242dc55b7353415816724da6..0d828e35b40191b55d91fdc49aa5e48353466d39 100644 (file)
@@ -1652,7 +1652,7 @@ static void hdmi_lpe_audio_free(struct snd_card *card)
  * This function is called when the i915 driver creates the
  * hdmi-lpe-audio platform device.
  */
-static int hdmi_lpe_audio_probe(struct platform_device *pdev)
+static int __hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
        struct snd_card *card;
        struct snd_intelhad_card *card_ctx;
@@ -1815,6 +1815,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
        return 0;
 }
 
+static int hdmi_lpe_audio_probe(struct platform_device *pdev)
+{
+       return snd_card_free_on_error(&pdev->dev, __hdmi_lpe_audio_probe(pdev));
+}
+
 static const struct dev_pm_ops hdmi_lpe_audio_pm = {
        SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume)
 };
index db2f7b8ebed59a0d067c48e5dc19f3ac9927d9e2..724134f0e56c982fadcf4bb2d30bccee5dbbdfcc 100644 (file)
@@ -24,6 +24,7 @@ help:
        @echo '  intel-speed-select     - Intel Speed Select tool'
        @echo '  kvm_stat               - top-like utility for displaying kvm statistics'
        @echo '  leds                   - LEDs  tools'
+       @echo '  nolibc                 - nolibc headers testing and installation'
        @echo '  objtool                - an ELF object analysis tool'
        @echo '  pci                    - PCI tools'
        @echo '  perf                   - Linux performance measurement and analysis tool'
@@ -74,6 +75,9 @@ bpf/%: FORCE
 libapi: FORCE
        $(call descend,lib/api)
 
+nolibc_%: FORCE
+       $(call descend,include/nolibc,$(patsubst nolibc_%,%,$@))
+
 # The perf build does not follow the descend function setup,
 # invoking it via it's own make rule.
 PERF_O   = $(if $(O),$(O)/tools/perf,)
index 9afcc6467a095c350840eee1f4037f4cff2e0cda..e09d6908a21d36b1ee2feb65b477ff4859cce725 100644 (file)
@@ -75,6 +75,7 @@
 #define ARM_CPU_PART_CORTEX_A77                0xD0D
 #define ARM_CPU_PART_NEOVERSE_V1       0xD40
 #define ARM_CPU_PART_CORTEX_A78                0xD41
+#define ARM_CPU_PART_CORTEX_A78AE      0xD42
 #define ARM_CPU_PART_CORTEX_X1         0xD44
 #define ARM_CPU_PART_CORTEX_A510       0xD46
 #define ARM_CPU_PART_CORTEX_A710       0xD47
 #define MIDR_CORTEX_A77        MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
 #define MIDR_NEOVERSE_V1       MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
 #define MIDR_CORTEX_A78        MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE      MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
 #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
 #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
index 1231d63f836d81386f122c1022a7f5a25d503ef1..1ae0fab7d902b0a722563cccf89aa770e4eb7f35 100644 (file)
  * cpu_feature_enabled().
  */
 
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP  0
-#else
-# define DISABLE_SMAP  (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
 #ifdef CONFIG_X86_UMIP
 # define DISABLE_UMIP  0
 #else
@@ -80,7 +74,7 @@
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK9 (DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        0
 #define DISABLED_MASK12        0
index 0eb90d21049e84a287f20b5abaf83ba359e0c824..ee15311b6be1d99e2bea11bd4c03a8a36fd8c706 100644 (file)
 #define TSX_CTRL_RTM_DISABLE           BIT(0)  /* Disable RTM feature */
 #define TSX_CTRL_CPUID_CLEAR           BIT(1)  /* Disable TSX enumeration */
 
-/* SRBDS support */
 #define MSR_IA32_MCU_OPT_CTRL          0x00000123
-#define RNGDS_MITG_DIS                 BIT(0)
+#define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
+#define RTM_ALLOW                      BIT(1)  /* TSX development mode */
 
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
index 91af2850b505742906b0f2bd24b851abf5de8988..7678af364793f2f209597e9fdd54499679b62524 100644 (file)
@@ -828,8 +828,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
                        s->map_cnt = %zu;                           \n\
                        s->map_skel_sz = sizeof(*s->maps);          \n\
                        s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
-                       if (!s->maps)                               \n\
+                       if (!s->maps) {                             \n\
+                               err = -ENOMEM;                      \n\
                                goto err;                           \n\
+                       }                                           \n\
                ",
                map_cnt
        );
@@ -870,8 +872,10 @@ codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_li
                        s->prog_cnt = %zu;                          \n\
                        s->prog_skel_sz = sizeof(*s->progs);        \n\
                        s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
-                       if (!s->progs)                              \n\
+                       if (!s->progs) {                            \n\
+                               err = -ENOMEM;                      \n\
                                goto err;                           \n\
+                       }                                           \n\
                ",
                prog_cnt
        );
@@ -1182,10 +1186,13 @@ static int do_skeleton(int argc, char **argv)
                %1$s__create_skeleton(struct %1$s *obj)                     \n\
                {                                                           \n\
                        struct bpf_object_skeleton *s;                      \n\
+                       int err;                                            \n\
                                                                            \n\
                        s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
-                       if (!s)                                             \n\
+                       if (!s) {                                           \n\
+                               err = -ENOMEM;                              \n\
                                goto err;                                   \n\
+                       }                                                   \n\
                                                                            \n\
                        s->sz = sizeof(*s);                                 \n\
                        s->name = \"%1$s\";                                 \n\
@@ -1206,7 +1213,7 @@ static int do_skeleton(int argc, char **argv)
                        return 0;                                           \n\
                err:                                                        \n\
                        bpf_object__destroy_skeleton(s);                    \n\
-                       return -ENOMEM;                                     \n\
+                       return err;                                         \n\
                }                                                           \n\
                                                                            \n\
                static inline const void *%2$s__elf_bytes(size_t *sz)       \n\
@@ -1466,12 +1473,12 @@ static int do_subskeleton(int argc, char **argv)
                                                                            \n\
                        obj = (struct %1$s *)calloc(1, sizeof(*obj));       \n\
                        if (!obj) {                                         \n\
-                               errno = ENOMEM;                             \n\
+                               err = -ENOMEM;                              \n\
                                goto err;                                   \n\
                        }                                                   \n\
                        s = (struct bpf_object_subskeleton *)calloc(1, sizeof(*s));\n\
                        if (!s) {                                           \n\
-                               errno = ENOMEM;                             \n\
+                               err = -ENOMEM;                              \n\
                                goto err;                                   \n\
                        }                                                   \n\
                        s->sz = sizeof(*s);                                 \n\
@@ -1483,7 +1490,7 @@ static int do_subskeleton(int argc, char **argv)
                        s->var_cnt = %2$d;                                  \n\
                        s->vars = (struct bpf_var_skeleton *)calloc(%2$d, sizeof(*s->vars));\n\
                        if (!s->vars) {                                     \n\
-                               errno = ENOMEM;                             \n\
+                               err = -ENOMEM;                              \n\
                                goto err;                                   \n\
                        }                                                   \n\
                ",
@@ -1538,6 +1545,7 @@ static int do_subskeleton(int argc, char **argv)
                        return obj;                                         \n\
                err:                                                        \n\
                        %1$s__destroy(obj);                                 \n\
+                       errno = -err;                                       \n\
                        return NULL;                                        \n\
                }                                                           \n\
                                                                            \n\
index ae61f464043a11fbe78d7fcebb5f2266a1b291a7..c6a48d0ef9ff06489a7a32340f2ee90e1b159efc 100644 (file)
@@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA :=                  \
          llvm-version                   \
          clang                          \
          libbpf                         \
+         libbpf-btf__load_from_kernel_by_id \
          libpfm4                        \
          libdebuginfod                 \
          clang-bpf-co-re
index 1480910c792e2cb3fc6f63f858cf089e18222d70..cb4a2a4fa2e48ebf444a0b1604b0a2619b26a58c 100644 (file)
@@ -57,6 +57,7 @@ FILES=                                          \
          test-lzma.bin                          \
          test-bpf.bin                           \
          test-libbpf.bin                        \
+         test-libbpf-btf__load_from_kernel_by_id.bin   \
          test-get_cpuid.bin                     \
          test-sdt.bin                           \
          test-cxx.bin                           \
@@ -217,9 +218,16 @@ strip-libs = $(filter-out -l%,$(1))
 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
+ifeq ($(CC_NO_CLANG), 0)
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+  PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
+endif
+
 $(OUTPUT)test-libperl.bin:
        $(BUILD) $(FLAGS_PERL_EMBED)
 
@@ -280,6 +288,9 @@ $(OUTPUT)test-bpf.bin:
 $(OUTPUT)test-libbpf.bin:
        $(BUILD) -lbpf
 
+$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
+       $(BUILD) -lbpf
+
 $(OUTPUT)test-sdt.bin:
        $(BUILD)
 
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
new file mode 100644 (file)
index 0000000..f7c0844
--- /dev/null
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+       return btf__load_from_kernel_by_id(20151128, NULL);
+}
index 586d35720f135f62b9d7434beccacb2e22a5bd2d..b9c1474a571e1c2e9c66c82b09135584de8aa480 100644 (file)
@@ -40,6 +40,8 @@ struct unwind_hint {
 
 #ifdef CONFIG_STACK_VALIDATION
 
+#include <asm/asm.h>
+
 #ifndef __ASSEMBLY__
 
 #define UNWIND_HINT(sp_reg, sp_offset, type, end)              \
@@ -137,7 +139,7 @@ struct unwind_hint {
 
 .macro STACK_FRAME_NON_STANDARD func:req
        .pushsection .discard.func_stack_frame_non_standard, "aw"
-               .long \func - .
+       _ASM_PTR \func
        .popsection
 .endm
 
index f41d8a0eb1a4204bf312d89de69a066b7d2a0ae1..0616409513eb7947409f23045dd6395a9b68ce0e 100644 (file)
@@ -28,7 +28,13 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
        return kmalloc(size, gfp | __GFP_ZERO);
 }
 
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+struct list_lru;
+
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags);
+static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+       return kmem_cache_alloc_lru(cachep, NULL, flags);
+}
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
 struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
new file mode 100644 (file)
index 0000000..7a16d91
--- /dev/null
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc installation and tests
+include ../../scripts/Makefile.include
+
+# we're in ".../tools/include/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR)))
+endif
+
+nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
+arch_file := arch-$(nolibc_arch).h
+all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
+             sys.h time.h types.h unistd.h
+
+# install all headers needed to support a bare-metal compiler
+all:
+
+# Note: when ARCH is "x86" we concatenate both x86_64 and i386
+headers:
+       $(Q)mkdir -p $(OUTPUT)sysroot
+       $(Q)mkdir -p $(OUTPUT)sysroot/include
+       $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+       $(Q)if [ "$(ARCH)" = "x86" ]; then      \
+               sed -e                          \
+                 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
+                 arch-x86_64.h;                \
+               sed -e                          \
+                 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \
+                 arch-i386.h;                  \
+       elif [ -e "$(arch_file)" ]; then        \
+               cat $(arch_file);               \
+       else                                    \
+               echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
+               exit 1;                         \
+       fi > $(OUTPUT)sysroot/include/arch.h
+
+headers_standalone: headers
+       $(Q)$(MAKE) -C $(srctree) headers
+       $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot
+
+clean:
+       $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
new file mode 100644 (file)
index 0000000..f68baf8
--- /dev/null
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * AARCH64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_AARCH64_H
+#define _NOLIBC_ARCH_AARCH64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+       unsigned long   st_dev;
+       unsigned long   st_ino;
+       unsigned int    st_mode;
+       unsigned int    st_nlink;
+       unsigned int    st_uid;
+       unsigned int    st_gid;
+
+       unsigned long   st_rdev;
+       unsigned long   __pad1;
+       long            st_size;
+       int             st_blksize;
+       int             __pad2;
+
+       long            st_blocks;
+       long            st_atime;
+       unsigned long   st_atime_nsec;
+       long            st_mtime;
+
+       unsigned long   st_mtime_nsec;
+       long            st_ctime;
+       unsigned long   st_ctime_nsec;
+       unsigned int    __unused[2];
+};
+
+/* Syscalls for AARCH64 :
+ *   - registers are 64-bit
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in x8
+ *   - arguments are in x0, x1, x2, x3, x4, x5
+ *   - the system call is performed by calling svc 0
+ *   - syscall return comes in x0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       register long _num  __asm__ ("x8") = (num);                           \
+       register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
+       register long _arg6 __asm__ ("x5") = (long)(arg6);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_arg6), "r"(_num)                                       \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "ldr x0, [sp]\n"              // argc (x0) was in the stack
+    "add x1, sp, 8\n"             // argv (x1) = sp
+    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
+    "add x2, x2, 8\n"             //           + 8 (skip null)
+    "add x2, x2, x1\n"            //           + argv
+    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "mov x8, 93\n"                // NR_exit == 93
+    "svc #0\n"
+    "");
+
+#endif // _NOLIBC_ARCH_AARCH64_H
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
new file mode 100644 (file)
index 0000000..f31be8e
--- /dev/null
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ARM specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_ARM_H
+#define _NOLIBC_ARCH_ARM_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY    0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+       unsigned short st_dev;
+       unsigned short __pad1;
+#else
+       unsigned long  st_dev;
+#endif
+       unsigned long  st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+
+#if defined(__ARMEB__)
+       unsigned short st_rdev;
+       unsigned short __pad2;
+#else
+       unsigned long  st_rdev;
+#endif
+       unsigned long  st_size;
+       unsigned long  st_blksize;
+       unsigned long  st_blocks;
+
+       unsigned long  st_atime;
+       unsigned long  st_atime_nsec;
+       unsigned long  st_mtime;
+       unsigned long  st_mtime_nsec;
+
+       unsigned long  st_ctime;
+       unsigned long  st_ctime_nsec;
+       unsigned long  __unused[2];
+};
+
+/* Syscalls for ARM in ARM or Thumb modes :
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ *   - syscall number is passed in r7
+ *   - arguments are in r0, r1, r2, r3, r4, r5
+ *   - the system call is performed by calling svc #0
+ *   - syscall return comes in r0.
+ *   - only lr is clobbered.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r"(_arg1)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num __asm__ ("r7") = (num);                            \
+       register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "svc #0\n"                                                    \
+               : "=r" (_arg1)                                                \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_num)                                                   \
+               : "memory", "cc", "lr"                                        \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+    /* We enter here in 32-bit mode but if some previous functions were in
+     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+     * 32-bit now, then switch to 16-bit (is there a better way to do it than
+     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+     * it generates correct instructions. Note that we do not support thumb1.
+     */
+    ".code 32\n"
+    "add     r0, pc, #1\n"
+    "bx      r0\n"
+    ".code 16\n"
+#endif
+    "pop {%r0}\n"                 // argc was in the stack
+    "mov %r1, %sp\n"              // argv = sp
+    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+    "add %r2, %r2, $4\n"          //        ... + 4
+    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
+    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
+    "bl main\n"                   // main() returns the status code, we'll exit with it.
+    "movs r7, $1\n"               // NR_exit == 1
+    "svc $0x00\n"
+    "");
+
+#endif // _NOLIBC_ARCH_ARM_H
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
new file mode 100644 (file)
index 0000000..d7e7212
--- /dev/null
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * i386 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_I386_H
+#define _NOLIBC_ARCH_I386_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+       unsigned long  st_dev;
+       unsigned long  st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+
+       unsigned long  st_rdev;
+       unsigned long  st_size;
+       unsigned long  st_blksize;
+       unsigned long  st_blocks;
+
+       unsigned long  st_atime;
+       unsigned long  st_atime_nsec;
+       unsigned long  st_mtime;
+       unsigned long  st_mtime_nsec;
+
+       unsigned long  st_ctime;
+       unsigned long  st_ctime_nsec;
+       unsigned long  __unused[2];
+};
+
+/* Syscalls for i386 :
+ *   - mostly similar to x86_64
+ *   - registers are 32-bit
+ *   - syscall number is passed in eax
+ *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ *   - all registers are preserved (except eax of course)
+ *   - the system call is performed by calling int $0x80
+ *   - syscall return comes in eax
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1),                                                 \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num __asm__ ("eax") = (num);                           \
+       register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("edi") = (long)(arg5);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "int $0x80\n"                                                 \
+               : "=a" (_ret)                                                 \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "0"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)   \
+({                                                             \
+       long _eax  = (long)(num);                               \
+       long _arg6 = (long)(arg6); /* Always in memory */       \
+       __asm__ volatile (                                      \
+               "pushl  %[_arg6]\n\t"                           \
+               "pushl  %%ebp\n\t"                              \
+               "movl   4(%%esp),%%ebp\n\t"                     \
+               "int    $0x80\n\t"                              \
+               "popl   %%ebp\n\t"                              \
+               "addl   $4,%%esp\n\t"                           \
+               : "+a"(_eax)            /* %eax */              \
+               : "b"(arg1),            /* %ebx */              \
+                 "c"(arg2),            /* %ecx */              \
+                 "d"(arg3),            /* %edx */              \
+                 "S"(arg4),            /* %esi */              \
+                 "D"(arg5),            /* %edi */              \
+                 [_arg6]"m"(_arg6)     /* memory */            \
+               : "memory", "cc"                                \
+       );                                                      \
+       _eax;                                                   \
+})
+
+/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "pop %eax\n"                // argc   (first arg, %eax)
+    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
+    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
+    "sub $4, %esp\n"            // the call instruction (args are aligned)
+    "push %ecx\n"               // push all registers on the stack so that we
+    "push %ebx\n"               // support both regparm and plain stack modes
+    "push %eax\n"
+    "call main\n"               // main() returns the status code in %eax
+    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
+    "movl $1, %eax\n"           // NR_exit == 1
+    "int $0x80\n"               // exit now
+    "hlt\n"                     // ensure it does not
+    "");
+
+#endif // _NOLIBC_ARCH_I386_H
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
new file mode 100644 (file)
index 0000000..5fc5b80
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * MIPS specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_MIPS_H
+#define _NOLIBC_ARCH_MIPS_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_APPEND       0x0008
+#define O_NONBLOCK     0x0080
+#define O_CREAT        0x0100
+#define O_TRUNC        0x0200
+#define O_EXCL         0x0400
+#define O_NOCTTY       0x0800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+       unsigned int  st_dev;
+       long          st_pad1[3];
+       unsigned long st_ino;
+       unsigned int  st_mode;
+       unsigned int  st_nlink;
+       unsigned int  st_uid;
+       unsigned int  st_gid;
+       unsigned int  st_rdev;
+       long          st_pad2[2];
+       long          st_size;
+       long          st_pad3;
+
+       long          st_atime;
+       long          st_atime_nsec;
+       long          st_mtime;
+       long          st_mtime_nsec;
+
+       long          st_ctime;
+       long          st_ctime_nsec;
+       long          st_blksize;
+       long          st_blocks;
+       long          st_pad4[14];
+};
+
+/* Syscalls for MIPS ABI O32 :
+ *   - WARNING! there's always a delayed slot!
+ *   - WARNING again, the syntax is different, registers take a '$' and numbers
+ *     do not.
+ *   - registers are 32-bit
+ *   - stack is 8-byte aligned
+ *   - syscall number is passed in v0 (starts at 0xfa0).
+ *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ *     leave some room in the stack for the callee to save a0..a3 if needed.
+ *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ *     scall32-o32.S in the kernel sources.
+ *   - the system call is performed by calling "syscall"
+ *   - syscall return comes in v0, and register a3 needs to be checked to know
+ *     if an error occurred, in which case errno is in v0.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "r"(_num)                                                   \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1)                                                  \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2)                                      \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num __asm__ ("v0")  = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r"(_num), "=r"(_arg4)                                     \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "syscall\n"                                                   \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r" (_num), "=r"(_arg4)                                    \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num __asm__ ("v0") = (num);                            \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 = (long)(arg5);                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "addiu $sp, $sp, -32\n"                                       \
+               "sw %7, 16($sp)\n"                                            \
+               "syscall\n  "                                                 \
+               "addiu $sp, $sp, 32\n"                                        \
+               : "=r" (_num), "=r"(_arg4)                                    \
+               : "0"(_num),                                                  \
+                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
+               : "memory", "cc", "at", "v1", "hi", "lo",                     \
+                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+       );                                                                    \
+       _arg4 ? -_num : _num;                                                 \
+})
+
+/* startup code, note that it's called __start on MIPS */
+__asm__ (".section .text\n"
+    ".weak __start\n"
+    ".set nomips16\n"
+    ".set    noreorder\n"
+    ".option pic0\n"
+    ".ent __start\n"
+    "__start:\n"
+    "lw $a0,($sp)\n"              // argc was in the stack
+    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
+    "sll $a2, $a0, 2\n"           // a2 = argc * 4
+    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
+    "addiu $a2, $a2, 4\n"         //        ... + 4
+    "li $t0, -8\n"
+    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
+    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
+    "jal main\n"                  // main() returns the status code, we'll exit with it.
+    "nop\n"                       // delayed slot
+    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
+    "li $v0, 4001\n"              // NR_exit == 4001
+    "syscall\n"
+    ".end __start\n"
+    "");
+
+#endif // _NOLIBC_ARCH_MIPS_H
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
new file mode 100644 (file)
index 0000000..95e2b79
--- /dev/null
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * RISCV (32 and 64) specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_RISCV_H
+#define _NOLIBC_ARCH_RISCV_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT         0x100
+#define O_EXCL          0x200
+#define O_NOCTTY        0x400
+#define O_TRUNC        0x1000
+#define O_APPEND       0x2000
+#define O_NONBLOCK     0x4000
+#define O_DIRECTORY  0x200000
+
+struct sys_stat_struct {
+       unsigned long   st_dev;         /* Device.  */
+       unsigned long   st_ino;         /* File serial number.  */
+       unsigned int    st_mode;        /* File mode.  */
+       unsigned int    st_nlink;       /* Link count.  */
+       unsigned int    st_uid;         /* User ID of the file's owner.  */
+       unsigned int    st_gid;         /* Group ID of the file's group. */
+       unsigned long   st_rdev;        /* Device number, if device.  */
+       unsigned long   __pad1;
+       long            st_size;        /* Size of file, in bytes.  */
+       int             st_blksize;     /* Optimal block size for I/O.  */
+       int             __pad2;
+       long            st_blocks;      /* Number 512-byte blocks allocated. */
+       long            st_atime;       /* Time of last access.  */
+       unsigned long   st_atime_nsec;
+       long            st_mtime;       /* Time of last modification.  */
+       unsigned long   st_mtime_nsec;
+       long            st_ctime;       /* Time of last status change.  */
+       unsigned long   st_ctime_nsec;
+       unsigned int    __unused4;
+       unsigned int    __unused5;
+};
+
+#if   __riscv_xlen == 64
+#define PTRLOG "3"
+#define SZREG  "8"
+#elif __riscv_xlen == 32
+#define PTRLOG "2"
+#define SZREG  "4"
+#endif
+
+/* Syscalls for RISCV :
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in a7
+ *   - arguments are in a0, a1, a2, a3, a4, a5
+ *   - the system call is performed by calling ecall
+ *   - syscall return comes in a0
+ *   - the arguments are cast to long and assigned into the target
+ *     registers which are then simply passed as registers to the asm code,
+ *     so that we don't have to experience issues with register constraints.
+ *
+ * On riscv, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0");                                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n\t"                                                   \
+               : "=r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2),                                                 \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n\t"                                                   \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3),                                     \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       register long _num  __asm__ ("a7") = (num);                           \
+       register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+       register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+       register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+       register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+       register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
+       register long _arg6 __asm__ ("a5") = (long)(arg6);                    \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "ecall\n"                                                     \
+               : "+r"(_arg1)                                                 \
+               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+                 "r"(_num)                                                   \
+               : "memory", "cc"                                              \
+       );                                                                    \
+       _arg1;                                                                \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    ".option push\n"
+    ".option norelax\n"
+    "lla   gp, __global_pointer$\n"
+    ".option pop\n"
+    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
+    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
+    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
+    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
+    "add   a2,a2,a1\n"           //             + argv
+    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
+    "call  main\n"               // main() returns the status code, we'll exit with it.
+    "li a7, 93\n"                // NR_exit == 93
+    "ecall\n"
+    "");
+
+#endif // _NOLIBC_ARCH_RISCV_H
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
new file mode 100644 (file)
index 0000000..0e1e9eb
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * x86_64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_X86_64_H
+#define _NOLIBC_ARCH_X86_64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY            0
+#define O_WRONLY            1
+#define O_RDWR              2
+#define O_CREAT          0x40
+#define O_EXCL           0x80
+#define O_NOCTTY        0x100
+#define O_TRUNC         0x200
+#define O_APPEND        0x400
+#define O_NONBLOCK      0x800
+#define O_DIRECTORY   0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+       unsigned long st_dev;
+       unsigned long st_ino;
+       unsigned long st_nlink;
+       unsigned int  st_mode;
+       unsigned int  st_uid;
+
+       unsigned int  st_gid;
+       unsigned int  __pad0;
+       unsigned long st_rdev;
+       long          st_size;
+       long          st_blksize;
+
+       long          st_blocks;
+       unsigned long st_atime;
+       unsigned long st_atime_nsec;
+       unsigned long st_mtime;
+
+       unsigned long st_mtime_nsec;
+       unsigned long st_ctime;
+       unsigned long st_ctime_nsec;
+       long          __unused[3];
+};
+
+/* Syscalls for x86_64 :
+ *   - registers are 64-bit
+ *   - syscall number is passed in rax
+ *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ *   - the system call is performed by calling the syscall instruction
+ *   - syscall return comes in rax
+ *   - rcx and r11 are clobbered, others are preserved.
+ *   - the arguments are cast to long and assigned into the target registers
+ *     which are then simply passed as registers to the asm code, so that we
+ *     don't have to experience issues with register constraints.
+ *   - the syscall number is always specified last in order to allow to force
+ *     some registers before (gcc refuses a %-register at the last position).
+ *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ *     Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+ *
+ */
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1),                                                 \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2),                                     \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "0"(_num)                                                   \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+       long _ret;                                                            \
+       register long _num  __asm__ ("rax") = (num);                          \
+       register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
+       register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
+       register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
+       register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
+       register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
+       register long _arg6 __asm__ ("r9")  = (long)(arg6);                   \
+                                                                             \
+       __asm__  volatile (                                                   \
+               "syscall\n"                                                   \
+               : "=a"(_ret)                                                  \
+               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+                 "r"(_arg6), "0"(_num)                                       \
+               : "rcx", "r11", "memory", "cc"                                \
+       );                                                                    \
+       _ret;                                                                 \
+})
+
+/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
+__asm__ (".section .text\n"
+    ".weak _start\n"
+    "_start:\n"
+    "pop %rdi\n"                // argc   (first arg, %rdi)
+    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
+    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
+    "call main\n"               // main() returns the status code, we'll exit with it.
+    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
+    "mov $60, %eax\n"           // NR_exit == 60
+    "syscall\n"                 // really exit
+    "hlt\n"                     // ensure it does not return
+    "");
+
+#endif // _NOLIBC_ARCH_X86_64_H
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
new file mode 100644 (file)
index 0000000..4c69923
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#ifndef _NOLIBC_ARCH_H
+#define _NOLIBC_ARCH_H
+
+#if defined(__x86_64__)
+#include "arch-x86_64.h"
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+#include "arch-i386.h"
+#elif defined(__ARM_EABI__)
+#include "arch-arm.h"
+#elif defined(__aarch64__)
+#include "arch-aarch64.h"
+#elif defined(__mips__) && defined(_ABIO32)
+#include "arch-mips.h"
+#elif defined(__riscv)
+#include "arch-riscv.h"
+#endif
+
+#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
new file mode 100644 (file)
index 0000000..e3000b2
--- /dev/null
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ctype function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_CTYPE_H
+#define _NOLIBC_CTYPE_H
+
+#include "std.h"
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int isascii(int c)
+{
+       /* 0x00..0x7f */
+       return (unsigned int)c <= 0x7f;
+}
+
+static __attribute__((unused))
+int isblank(int c)
+{
+       return c == '\t' || c == ' ';
+}
+
+static __attribute__((unused))
+int iscntrl(int c)
+{
+       /* 0x00..0x1f, 0x7f */
+       return (unsigned int)c < 0x20 || c == 0x7f;
+}
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+       return (unsigned int)(c - '0') < 10;
+}
+
+static __attribute__((unused))
+int isgraph(int c)
+{
+       /* 0x21..0x7e */
+       return (unsigned int)(c - 0x21) < 0x5e;
+}
+
+static __attribute__((unused))
+int islower(int c)
+{
+       return (unsigned int)(c - 'a') < 26;
+}
+
+static __attribute__((unused))
+int isprint(int c)
+{
+       /* 0x20..0x7e */
+       return (unsigned int)(c - 0x20) < 0x5f;
+}
+
+static __attribute__((unused))
+int isspace(int c)
+{
+       /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */
+       return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5;
+}
+
+static __attribute__((unused))
+int isupper(int c)
+{
+       return (unsigned int)(c - 'A') < 26;
+}
+
+static __attribute__((unused))
+int isxdigit(int c)
+{
+       return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6;
+}
+
+static __attribute__((unused))
+int isalpha(int c)
+{
+       return islower(c) || isupper(c);
+}
+
+static __attribute__((unused))
+int isalnum(int c)
+{
+       return isalpha(c) || isdigit(c);
+}
+
+static __attribute__((unused))
+int ispunct(int c)
+{
+       return isgraph(c) && !isalnum(c);
+}
+
+#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
new file mode 100644 (file)
index 0000000..06893d6
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Minimal errno definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ERRNO_H
+#define _NOLIBC_ERRNO_H
+
+#include <asm/errno.h>
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memory page.
+ */
+#define MAX_ERRNO 4095
+
+#endif /* _NOLIBC_ERRNO_H */
index c1c285fe494aa6ca1f334a1130889a80ec8330b0..b2bc48d3cfe4b9321f5478162dc29a623c1fc497 100644 (file)
  * having to specify anything.
  *
  * Finally some very common libc-level functions are provided. It is the case
- * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing
- * is currently provided regarding stdio emulation.
+ * for a few functions usually found in string.h, ctype.h, or stdlib.h.
  *
- * The macro NOLIBC is always defined, so that it is possible for a program to
- * check this macro to know if it is being built against and decide to disable
- * some features or simply not to include some standard libc files.
- *
- * Ideally this file should be split in multiple files for easier long term
- * maintenance, but provided as a single file as it is now, it's quite
- * convenient to use. Maybe some variations involving a set of includes at the
- * top could work.
+ * The nolibc.h file is only a convenient entry point which includes all other
+ * files. It also defines the NOLIBC macro, so that it is possible for a
+ * program to check this macro to know if it is being built against and decide
+ * to disable some features or simply not to include some standard libc files.
  *
  * A simple static executable may be built this way :
  *      $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
  *            -static -include nolibc.h -o hello hello.c -lgcc
  *
+ * Simple programs meant to be reasonably portable to various libc and using
+ * only a few common includes, may also be built by simply making the include
+ * path point to the nolibc directory:
+ *      $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ *            -I../nolibc -o hello hello.c -lgcc
+ *
+ * The available standard (but limited) include files are:
+ *   ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h
+ *
+ * In addition, the following ones are expected to be provided by the compiler:
+ *   float.h, stdarg.h, stddef.h
+ *
+ * The following ones which are part to the C standard are not provided:
+ *   assert.h, locale.h, math.h, setjmp.h, limits.h
+ *
  * A very useful calling convention table may be found here :
  *      http://man7.org/linux/man-pages/man2/syscall.2.html
  *
  *      https://w3challs.com/syscalls/
  *
  */
+#ifndef _NOLIBC_H
+#define _NOLIBC_H
 
-#include <asm/unistd.h>
-#include <asm/ioctls.h>
-#include <asm/errno.h>
-#include <linux/fs.h>
-#include <linux/loop.h>
-#include <linux/time.h>
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "ctype.h"
+#include "signal.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "time.h"
+#include "unistd.h"
 
+/* Used by programs to avoid std includes */
 #define NOLIBC
 
-/* this way it will be removed if unused */
-static int errno;
-
-#ifndef NOLIBC_IGNORE_ERRNO
-#define SET_ERRNO(v) do { errno = (v); } while (0)
-#else
-#define SET_ERRNO(v) do { } while (0)
-#endif
-
-/* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memory page.
- */
-#define MAX_ERRNO 4095
-
-/* Declare a few quite common macros and types that usually are in stdlib.h,
- * stdint.h, ctype.h, unistd.h and a few other common locations.
- */
-
-#define NULL ((void *)0)
-
-/* stdint types */
-typedef unsigned char       uint8_t;
-typedef   signed char        int8_t;
-typedef unsigned short     uint16_t;
-typedef   signed short      int16_t;
-typedef unsigned int       uint32_t;
-typedef   signed int        int32_t;
-typedef unsigned long long uint64_t;
-typedef   signed long long  int64_t;
-typedef unsigned long        size_t;
-typedef   signed long       ssize_t;
-typedef unsigned long     uintptr_t;
-typedef   signed long      intptr_t;
-typedef   signed long     ptrdiff_t;
-
-/* for stat() */
-typedef unsigned int          dev_t;
-typedef unsigned long         ino_t;
-typedef unsigned int         mode_t;
-typedef   signed int          pid_t;
-typedef unsigned int          uid_t;
-typedef unsigned int          gid_t;
-typedef unsigned long       nlink_t;
-typedef   signed long         off_t;
-typedef   signed long     blksize_t;
-typedef   signed long      blkcnt_t;
-typedef   signed long        time_t;
-
-/* for poll() */
-struct pollfd {
-       int fd;
-       short int events;
-       short int revents;
-};
-
-/* for getdents64() */
-struct linux_dirent64 {
-       uint64_t       d_ino;
-       int64_t        d_off;
-       unsigned short d_reclen;
-       unsigned char  d_type;
-       char           d_name[];
-};
-
-/* commonly an fd_set represents 256 FDs */
-#define FD_SETSIZE 256
-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
-
-/* needed by wait4() */
-struct rusage {
-       struct timeval ru_utime;
-       struct timeval ru_stime;
-       long   ru_maxrss;
-       long   ru_ixrss;
-       long   ru_idrss;
-       long   ru_isrss;
-       long   ru_minflt;
-       long   ru_majflt;
-       long   ru_nswap;
-       long   ru_inblock;
-       long   ru_oublock;
-       long   ru_msgsnd;
-       long   ru_msgrcv;
-       long   ru_nsignals;
-       long   ru_nvcsw;
-       long   ru_nivcsw;
-};
-
-/* stat flags (WARNING, octal here) */
-#define S_IFDIR       0040000
-#define S_IFCHR       0020000
-#define S_IFBLK       0060000
-#define S_IFREG       0100000
-#define S_IFIFO       0010000
-#define S_IFLNK       0120000
-#define S_IFSOCK      0140000
-#define S_IFMT        0170000
-
-#define S_ISDIR(mode)  (((mode) & S_IFDIR) == S_IFDIR)
-#define S_ISCHR(mode)  (((mode) & S_IFCHR) == S_IFCHR)
-#define S_ISBLK(mode)  (((mode) & S_IFBLK) == S_IFBLK)
-#define S_ISREG(mode)  (((mode) & S_IFREG) == S_IFREG)
-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
-#define S_ISLNK(mode)  (((mode) & S_IFLNK) == S_IFLNK)
-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
-
-#define DT_UNKNOWN 0
-#define DT_FIFO    1
-#define DT_CHR     2
-#define DT_DIR     4
-#define DT_BLK     6
-#define DT_REG     8
-#define DT_LNK    10
-#define DT_SOCK   12
-
-/* all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD             -100
-#endif
-
-/* lseek */
-#define SEEK_SET        0
-#define SEEK_CUR        1
-#define SEEK_END        2
-
-/* reboot */
-#define LINUX_REBOOT_MAGIC1         0xfee1dead
-#define LINUX_REBOOT_MAGIC2         0x28121969
-#define LINUX_REBOOT_CMD_HALT       0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART    0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
-
-
-/* The format of the struct as returned by the libc to the application, which
- * significantly differs from the format returned by the stat() syscall flavours.
- */
-struct stat {
-       dev_t     st_dev;     /* ID of device containing file */
-       ino_t     st_ino;     /* inode number */
-       mode_t    st_mode;    /* protection */
-       nlink_t   st_nlink;   /* number of hard links */
-       uid_t     st_uid;     /* user ID of owner */
-       gid_t     st_gid;     /* group ID of owner */
-       dev_t     st_rdev;    /* device ID (if special file) */
-       off_t     st_size;    /* total size, in bytes */
-       blksize_t st_blksize; /* blocksize for file system I/O */
-       blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
-       time_t    st_atime;   /* time of last access */
-       time_t    st_mtime;   /* time of last modification */
-       time_t    st_ctime;   /* time of last status change */
-};
-
-#define WEXITSTATUS(status)   (((status) & 0xff00) >> 8)
-#define WIFEXITED(status)     (((status) & 0x7f) == 0)
-
-/* for SIGCHLD */
-#include <asm/signal.h>
-
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
-#if defined(__x86_64__)
-/* Syscalls for x86_64 :
- *   - registers are 64-bit
- *   - syscall number is passed in rax
- *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
- *   - the system call is performed by calling the syscall instruction
- *   - syscall return comes in rax
- *   - rcx and r11 are clobbered, others are preserved.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
- *     Calling Conventions.
- *
- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
- *
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1),                                                 \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-       register long _arg5 asm("r8")  = (long)(arg5);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "0"(_num)                                                   \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num  asm("rax") = (num);                               \
-       register long _arg1 asm("rdi") = (long)(arg1);                        \
-       register long _arg2 asm("rsi") = (long)(arg2);                        \
-       register long _arg3 asm("rdx") = (long)(arg3);                        \
-       register long _arg4 asm("r10") = (long)(arg4);                        \
-       register long _arg5 asm("r8")  = (long)(arg5);                        \
-       register long _arg6 asm("r9")  = (long)(arg6);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "syscall\n"                                                   \
-               : "=a"(_ret)                                                  \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_arg6), "0"(_num)                                       \
-               : "rcx", "r11", "memory", "cc"                                \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-/* startup code */
-/*
- * x86-64 System V ABI mandates:
- * 1) %rsp must be 16-byte aligned right before the function call.
- * 2) The deepest stack frame should be zero (the %rbp).
- *
- */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "pop %rdi\n"                // argc   (first arg, %rdi)
-    "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
-    "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
-    "xor %ebp, %ebp\n"          // zero the stack frame
-    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
-    "call main\n"               // main() returns the status code, we'll exit with it.
-    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
-    "mov $60, %eax\n"           // NR_exit == 60
-    "syscall\n"                 // really exit
-    "hlt\n"                     // ensure it does not return
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
-       unsigned long st_dev;
-       unsigned long st_ino;
-       unsigned long st_nlink;
-       unsigned int  st_mode;
-       unsigned int  st_uid;
-
-       unsigned int  st_gid;
-       unsigned int  __pad0;
-       unsigned long st_rdev;
-       long          st_size;
-       long          st_blksize;
-
-       long          st_blocks;
-       unsigned long st_atime;
-       unsigned long st_atime_nsec;
-       unsigned long st_mtime;
-
-       unsigned long st_mtime_nsec;
-       unsigned long st_ctime;
-       unsigned long st_ctime_nsec;
-       long          __unused[3];
-};
-
-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
-/* Syscalls for i386 :
- *   - mostly similar to x86_64
- *   - registers are 32-bit
- *   - syscall number is passed in eax
- *   - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
- *   - all registers are preserved (except eax of course)
- *   - the system call is performed by calling int $0x80
- *   - syscall return comes in eax
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *
- * Also, i386 supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1),                                                 \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-       register long _arg4 asm("esi") = (long)(arg4);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       long _ret;                                                            \
-       register long _num asm("eax") = (num);                                \
-       register long _arg1 asm("ebx") = (long)(arg1);                        \
-       register long _arg2 asm("ecx") = (long)(arg2);                        \
-       register long _arg3 asm("edx") = (long)(arg3);                        \
-       register long _arg4 asm("esi") = (long)(arg4);                        \
-       register long _arg5 asm("edi") = (long)(arg5);                        \
-                                                                             \
-       asm volatile (                                                        \
-               "int $0x80\n"                                                 \
-               : "=a" (_ret)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "0"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _ret;                                                                 \
-})
-
-/* startup code */
-/*
- * i386 System V ABI mandates:
- * 1) last pushed argument must be 16-byte aligned.
- * 2) The deepest stack frame should be set to zero
- *
- */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "pop %eax\n"                // argc   (first arg, %eax)
-    "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
-    "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
-    "xor %ebp, %ebp\n"          // zero the stack frame
-    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
-    "sub $4, %esp\n"            // the call instruction (args are aligned)
-    "push %ecx\n"               // push all registers on the stack so that we
-    "push %ebx\n"               // support both regparm and plain stack modes
-    "push %eax\n"
-    "call main\n"               // main() returns the status code in %eax
-    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
-    "movl $1, %eax\n"           // NR_exit == 1
-    "int $0x80\n"               // exit now
-    "hlt\n"                     // ensure it does not
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
-       unsigned long  st_dev;
-       unsigned long  st_ino;
-       unsigned short st_mode;
-       unsigned short st_nlink;
-       unsigned short st_uid;
-       unsigned short st_gid;
-
-       unsigned long  st_rdev;
-       unsigned long  st_size;
-       unsigned long  st_blksize;
-       unsigned long  st_blocks;
-
-       unsigned long  st_atime;
-       unsigned long  st_atime_nsec;
-       unsigned long  st_mtime;
-       unsigned long  st_mtime_nsec;
-
-       unsigned long  st_ctime;
-       unsigned long  st_ctime_nsec;
-       unsigned long  __unused[2];
-};
-
-#elif defined(__ARM_EABI__)
-/* Syscalls for ARM in ARM or Thumb modes :
- *   - registers are 32-bit
- *   - stack is 8-byte aligned
- *     ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
- *   - syscall number is passed in r7
- *   - arguments are in r0, r1, r2, r3, r4, r5
- *   - the system call is performed by calling svc #0
- *   - syscall return comes in r0.
- *   - only lr is clobbered.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *   - the syscall number is always specified last in order to allow to force
- *     some registers before (gcc refuses a %-register at the last position).
- *
- * Also, ARM supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-       register long _arg4 asm("r3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num asm("r7") = (num);                                 \
-       register long _arg1 asm("r0") = (long)(arg1);                         \
-       register long _arg2 asm("r1") = (long)(arg2);                         \
-       register long _arg3 asm("r2") = (long)(arg3);                         \
-       register long _arg4 asm("r3") = (long)(arg4);                         \
-       register long _arg5 asm("r4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_num)                                                   \
-               : "memory", "cc", "lr"                                        \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-#if defined(__THUMBEB__) || defined(__THUMBEL__)
-    /* We enter here in 32-bit mode but if some previous functions were in
-     * 16-bit mode, the assembler cannot know, so we need to tell it we're in
-     * 32-bit now, then switch to 16-bit (is there a better way to do it than
-     * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
-     * it generates correct instructions. Note that we do not support thumb1.
-     */
-    ".code 32\n"
-    "add     r0, pc, #1\n"
-    "bx      r0\n"
-    ".code 16\n"
-#endif
-    "pop {%r0}\n"                 // argc was in the stack
-    "mov %r1, %sp\n"              // argv = sp
-    "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
-    "add %r2, %r2, $4\n"          //        ... + 4
-    "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
-    "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
-    "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "movs r7, $1\n"               // NR_exit == 1
-    "svc $0x00\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY    0x4000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
-       unsigned short st_dev;
-       unsigned short __pad1;
-#else
-       unsigned long  st_dev;
-#endif
-       unsigned long  st_ino;
-       unsigned short st_mode;
-       unsigned short st_nlink;
-       unsigned short st_uid;
-       unsigned short st_gid;
-#if defined(__ARMEB__)
-       unsigned short st_rdev;
-       unsigned short __pad2;
-#else
-       unsigned long  st_rdev;
-#endif
-       unsigned long  st_size;
-       unsigned long  st_blksize;
-       unsigned long  st_blocks;
-       unsigned long  st_atime;
-       unsigned long  st_atime_nsec;
-       unsigned long  st_mtime;
-       unsigned long  st_mtime_nsec;
-       unsigned long  st_ctime;
-       unsigned long  st_ctime_nsec;
-       unsigned long  __unused[2];
-};
-
-#elif defined(__aarch64__)
-/* Syscalls for AARCH64 :
- *   - registers are 64-bit
- *   - stack is 16-byte aligned
- *   - syscall number is passed in x8
- *   - arguments are in x0, x1, x2, x3, x4, x5
- *   - the system call is performed by calling svc 0
- *   - syscall return comes in x0.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- *
- * On aarch64, select() is not implemented so we have to use pselect6().
- */
-#define __ARCH_WANT_SYS_PSELECT6
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r"(_arg1)                                                 \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-       register long _arg5 asm("x4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       register long _num  asm("x8") = (num);                                \
-       register long _arg1 asm("x0") = (long)(arg1);                         \
-       register long _arg2 asm("x1") = (long)(arg2);                         \
-       register long _arg3 asm("x2") = (long)(arg3);                         \
-       register long _arg4 asm("x3") = (long)(arg4);                         \
-       register long _arg5 asm("x4") = (long)(arg5);                         \
-       register long _arg6 asm("x5") = (long)(arg6);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "svc #0\n"                                                    \
-               : "=r" (_arg1)                                                \
-               : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
-                 "r"(_arg6), "r"(_num)                                       \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    "ldr x0, [sp]\n"              // argc (x0) was in the stack
-    "add x1, sp, 8\n"             // argv (x1) = sp
-    "lsl x2, x0, 3\n"             // envp (x2) = 8*argc ...
-    "add x2, x2, 8\n"             //           + 8 (skip null)
-    "add x2, x2, x1\n"            //           + argv
-    "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
-    "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "mov x8, 93\n"                // NR_exit == 93
-    "svc #0\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT          0x40
-#define O_EXCL           0x80
-#define O_NOCTTY        0x100
-#define O_TRUNC         0x200
-#define O_APPEND        0x400
-#define O_NONBLOCK      0x800
-#define O_DIRECTORY    0x4000
-
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
-       unsigned long   st_dev;
-       unsigned long   st_ino;
-       unsigned int    st_mode;
-       unsigned int    st_nlink;
-       unsigned int    st_uid;
-       unsigned int    st_gid;
-
-       unsigned long   st_rdev;
-       unsigned long   __pad1;
-       long            st_size;
-       int             st_blksize;
-       int             __pad2;
-
-       long            st_blocks;
-       long            st_atime;
-       unsigned long   st_atime_nsec;
-       long            st_mtime;
-
-       unsigned long   st_mtime_nsec;
-       long            st_ctime;
-       unsigned long   st_ctime_nsec;
-       unsigned int    __unused[2];
-};
-
-#elif defined(__mips__) && defined(_ABIO32)
-/* Syscalls for MIPS ABI O32 :
- *   - WARNING! there's always a delayed slot!
- *   - WARNING again, the syntax is different, registers take a '$' and numbers
- *     do not.
- *   - registers are 32-bit
- *   - stack is 8-byte aligned
- *   - syscall number is passed in v0 (starts at 0xfa0).
- *   - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
- *     leave some room in the stack for the callee to save a0..a3 if needed.
- *   - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
- *     preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
- *     scall32-o32.S in the kernel sources.
- *   - the system call is performed by calling "syscall"
- *   - syscall return comes in v0, and register a3 needs to be checked to know
- *     if an error occurred, in which case errno is in v0.
- *   - the arguments are cast to long and assigned into the target registers
- *     which are then simply passed as registers to the asm code, so that we
- *     don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "r"(_num)                                                   \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1)                                                  \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2)                                      \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num asm("v0")  = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r"(_num), "=r"(_arg4)                                     \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "syscall\n"                                                   \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r" (_num), "=r"(_arg4)                                    \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num asm("v0") = (num);                                 \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 = (long)(arg5);                                   \
-                                                                             \
-       asm volatile (                                                        \
-               "addiu $sp, $sp, -32\n"                                       \
-               "sw %7, 16($sp)\n"                                            \
-               "syscall\n  "                                                 \
-               "addiu $sp, $sp, 32\n"                                        \
-               : "=r" (_num), "=r"(_arg4)                                    \
-               : "0"(_num),                                                  \
-                 "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
-               : "memory", "cc", "at", "v1", "hi", "lo",                     \
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
-       );                                                                    \
-       _arg4 ? -_num : _num;                                                 \
-})
-
-/* startup code, note that it's called __start on MIPS */
-asm(".section .text\n"
-    ".set nomips16\n"
-    ".global __start\n"
-    ".set    noreorder\n"
-    ".option pic0\n"
-    ".ent __start\n"
-    "__start:\n"
-    "lw $a0,($sp)\n"              // argc was in the stack
-    "addiu  $a1, $sp, 4\n"        // argv = sp + 4
-    "sll $a2, $a0, 2\n"           // a2 = argc * 4
-    "add   $a2, $a2, $a1\n"       // envp = argv + 4*argc ...
-    "addiu $a2, $a2, 4\n"         //        ... + 4
-    "li $t0, -8\n"
-    "and $sp, $sp, $t0\n"         // sp must be 8-byte aligned
-    "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
-    "jal main\n"                  // main() returns the status code, we'll exit with it.
-    "nop\n"                       // delayed slot
-    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
-    "li $v0, 4001\n"              // NR_exit == 4001
-    "syscall\n"
-    ".end __start\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_APPEND       0x0008
-#define O_NONBLOCK     0x0080
-#define O_CREAT        0x0100
-#define O_TRUNC        0x0200
-#define O_EXCL         0x0400
-#define O_NOCTTY       0x0800
-#define O_DIRECTORY   0x10000
-
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
-       unsigned int  st_dev;
-       long          st_pad1[3];
-       unsigned long st_ino;
-       unsigned int  st_mode;
-       unsigned int  st_nlink;
-       unsigned int  st_uid;
-       unsigned int  st_gid;
-       unsigned int  st_rdev;
-       long          st_pad2[2];
-       long          st_size;
-       long          st_pad3;
-       long          st_atime;
-       long          st_atime_nsec;
-       long          st_mtime;
-       long          st_mtime_nsec;
-       long          st_ctime;
-       long          st_ctime_nsec;
-       long          st_blksize;
-       long          st_blocks;
-       long          st_pad4[14];
-};
-
-#elif defined(__riscv)
-
-#if   __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG  "8"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG  "4"
-#endif
-
-/* Syscalls for RISCV :
- *   - stack is 16-byte aligned
- *   - syscall number is passed in a7
- *   - arguments are in a0, a1, a2, a3, a4, a5
- *   - the system call is performed by calling ecall
- *   - syscall return comes in a0
- *   - the arguments are cast to long and assigned into the target
- *     registers which are then simply passed as registers to the asm code,
- *     so that we don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num)                                                      \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0");                                        \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n\t"                                                   \
-               : "=r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall1(num, arg1)                                                \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall2(num, arg1, arg2)                                          \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2),                                                 \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3)                                    \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n\t"                                                   \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3),                                     \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 asm("a4") = (long)(arg5);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
-({                                                                            \
-       register long _num  asm("a7") = (num);                                \
-       register long _arg1 asm("a0") = (long)(arg1);                         \
-       register long _arg2 asm("a1") = (long)(arg2);                         \
-       register long _arg3 asm("a2") = (long)(arg3);                         \
-       register long _arg4 asm("a3") = (long)(arg4);                         \
-       register long _arg5 asm("a4") = (long)(arg5);                         \
-       register long _arg6 asm("a5") = (long)(arg6);                         \
-                                                                             \
-       asm volatile (                                                        \
-               "ecall\n"                                                     \
-               : "+r"(_arg1)                                                 \
-               : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
-                 "r"(_num)                                                   \
-               : "memory", "cc"                                              \
-       );                                                                    \
-       _arg1;                                                                \
-})
-
-/* startup code */
-asm(".section .text\n"
-    ".global _start\n"
-    "_start:\n"
-    ".option push\n"
-    ".option norelax\n"
-    "lla   gp, __global_pointer$\n"
-    ".option pop\n"
-    "ld    a0, 0(sp)\n"          // argc (a0) was in the stack
-    "add   a1, sp, "SZREG"\n"    // argv (a1) = sp
-    "slli  a2, a0, "PTRLOG"\n"   // envp (a2) = SZREG*argc ...
-    "add   a2, a2, "SZREG"\n"    //             + SZREG (skip null)
-    "add   a2,a2,a1\n"           //             + argv
-    "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
-    "call  main\n"               // main() returns the status code, we'll exit with it.
-    "li a7, 93\n"                // NR_exit == 93
-    "ecall\n"
-    "");
-
-/* fcntl / open */
-#define O_RDONLY            0
-#define O_WRONLY            1
-#define O_RDWR              2
-#define O_CREAT         0x100
-#define O_EXCL          0x200
-#define O_NOCTTY        0x400
-#define O_TRUNC        0x1000
-#define O_APPEND       0x2000
-#define O_NONBLOCK     0x4000
-#define O_DIRECTORY  0x200000
-
-struct sys_stat_struct {
-       unsigned long   st_dev;         /* Device.  */
-       unsigned long   st_ino;         /* File serial number.  */
-       unsigned int    st_mode;        /* File mode.  */
-       unsigned int    st_nlink;       /* Link count.  */
-       unsigned int    st_uid;         /* User ID of the file's owner.  */
-       unsigned int    st_gid;         /* Group ID of the file's group. */
-       unsigned long   st_rdev;        /* Device number, if device.  */
-       unsigned long   __pad1;
-       long            st_size;        /* Size of file, in bytes.  */
-       int             st_blksize;     /* Optimal block size for I/O.  */
-       int             __pad2;
-       long            st_blocks;      /* Number 512-byte blocks allocated. */
-       long            st_atime;       /* Time of last access.  */
-       unsigned long   st_atime_nsec;
-       long            st_mtime;       /* Time of last modification.  */
-       unsigned long   st_mtime_nsec;
-       long            st_ctime;       /* Time of last status change.  */
-       unsigned long   st_ctime_nsec;
-       unsigned int    __unused4;
-       unsigned int    __unused5;
-};
-
-#endif
-
-
-/* Below are the C functions used to declare the raw syscalls. They try to be
- * architecture-agnostic, and return either a success or -errno. Declaring them
- * static will lead to them being inlined in most cases, but it's still possible
- * to reference them by a pointer if needed.
- */
-static __attribute__((unused))
-void *sys_brk(void *addr)
-{
-       return (void *)my_syscall1(__NR_brk, addr);
-}
-
-static __attribute__((noreturn,unused))
-void sys_exit(int status)
-{
-       my_syscall1(__NR_exit, status & 255);
-       while(1); // shut the "noreturn" warnings.
-}
-
-static __attribute__((unused))
-int sys_chdir(const char *path)
-{
-       return my_syscall1(__NR_chdir, path);
-}
-
-static __attribute__((unused))
-int sys_chmod(const char *path, mode_t mode)
-{
-#ifdef __NR_fchmodat
-       return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
-       return my_syscall2(__NR_chmod, path, mode);
-#else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chown(const char *path, uid_t owner, gid_t group)
-{
-#ifdef __NR_fchownat
-       return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
-       return my_syscall3(__NR_chown, path, owner, group);
-#else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chroot(const char *path)
-{
-       return my_syscall1(__NR_chroot, path);
-}
-
-static __attribute__((unused))
-int sys_close(int fd)
-{
-       return my_syscall1(__NR_close, fd);
-}
-
-static __attribute__((unused))
-int sys_dup(int fd)
-{
-       return my_syscall1(__NR_dup, fd);
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int sys_dup3(int old, int new, int flags)
-{
-       return my_syscall3(__NR_dup3, old, new, flags);
-}
-#endif
-
-static __attribute__((unused))
-int sys_dup2(int old, int new)
-{
-#ifdef __NR_dup3
-       return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
-       return my_syscall2(__NR_dup2, old, new);
-#else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
-#endif
-}
-
-static __attribute__((unused))
-int sys_execve(const char *filename, char *const argv[], char *const envp[])
-{
-       return my_syscall3(__NR_execve, filename, argv, envp);
-}
-
-static __attribute__((unused))
-pid_t sys_fork(void)
-{
-#ifdef __NR_clone
-       /* note: some archs only have clone() and not fork(). Different archs
-        * have a different API, but most archs have the flags on first arg and
-        * will not use the rest with no other flag.
-        */
-       return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
-       return my_syscall0(__NR_fork);
-#else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
-#endif
-}
-
-static __attribute__((unused))
-int sys_fsync(int fd)
-{
-       return my_syscall1(__NR_fsync, fd);
-}
-
-static __attribute__((unused))
-int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
-       return my_syscall3(__NR_getdents64, fd, dirp, count);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgid(pid_t pid)
-{
-       return my_syscall1(__NR_getpgid, pid);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgrp(void)
-{
-       return sys_getpgid(0);
-}
-
-static __attribute__((unused))
-pid_t sys_getpid(void)
-{
-       return my_syscall0(__NR_getpid);
-}
-
-static __attribute__((unused))
-pid_t sys_gettid(void)
-{
-       return my_syscall0(__NR_gettid);
-}
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       return my_syscall2(__NR_gettimeofday, tv, tz);
-}
-
-static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
-{
-       return my_syscall3(__NR_ioctl, fd, req, value);
-}
-
-static __attribute__((unused))
-int sys_kill(pid_t pid, int signal)
-{
-       return my_syscall2(__NR_kill, pid, signal);
-}
-
-static __attribute__((unused))
-int sys_link(const char *old, const char *new)
-{
-#ifdef __NR_linkat
-       return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
-       return my_syscall2(__NR_link, old, new);
-#else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
-#endif
-}
-
-static __attribute__((unused))
-off_t sys_lseek(int fd, off_t offset, int whence)
-{
-       return my_syscall3(__NR_lseek, fd, offset, whence);
-}
-
-static __attribute__((unused))
-int sys_mkdir(const char *path, mode_t mode)
-{
-#ifdef __NR_mkdirat
-       return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
-       return my_syscall2(__NR_mkdir, path, mode);
-#else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
-#endif
-}
-
-static __attribute__((unused))
-long sys_mknod(const char *path, mode_t mode, dev_t dev)
-{
-#ifdef __NR_mknodat
-       return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
-       return my_syscall3(__NR_mknod, path, mode, dev);
-#else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
-             unsigned long flags, const void *data)
-{
-       return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
-#ifdef __NR_openat
-       return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
-       return my_syscall3(__NR_open, path, flags, mode);
-#else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
-#endif
-}
-
-static __attribute__((unused))
-int sys_pivot_root(const char *new, const char *old)
-{
-       return my_syscall2(__NR_pivot_root, new, old);
-}
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
-       struct timespec t;
-
-       if (timeout >= 0) {
-               t.tv_sec  = timeout / 1000;
-               t.tv_nsec = (timeout % 1000) * 1000000;
-       }
-       return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
-#elif defined(__NR_poll)
-       return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
-#endif
-}
-
-static __attribute__((unused))
-ssize_t sys_read(int fd, void *buf, size_t count)
-{
-       return my_syscall3(__NR_read, fd, buf, count);
-}
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
-       return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int sys_sched_yield(void)
-{
-       return my_syscall0(__NR_sched_yield);
-}
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
-       struct sel_arg_struct {
-               unsigned long n;
-               fd_set *r, *w, *e;
-               struct timeval *t;
-       } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
-       return my_syscall1(__NR_select, &arg);
-#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
-       struct timespec t;
-
-       if (timeout) {
-               t.tv_sec  = timeout->tv_sec;
-               t.tv_nsec = timeout->tv_usec * 1000;
-       }
-       return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR__newselect) || defined(__NR_select)
-#ifndef __NR__newselect
-#define __NR__newselect __NR_select
-#endif
-       return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
-#endif
-}
-
-static __attribute__((unused))
-int sys_setpgid(pid_t pid, pid_t pgid)
-{
-       return my_syscall2(__NR_setpgid, pid, pgid);
-}
-
-static __attribute__((unused))
-pid_t sys_setsid(void)
-{
-       return my_syscall0(__NR_setsid);
-}
-
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
-       struct sys_stat_struct stat;
-       long ret;
-
-#ifdef __NR_newfstatat
-       /* only solution for arm64 */
-       ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
-       ret = my_syscall2(__NR_stat, path, &stat);
-#else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
-#endif
-       buf->st_dev     = stat.st_dev;
-       buf->st_ino     = stat.st_ino;
-       buf->st_mode    = stat.st_mode;
-       buf->st_nlink   = stat.st_nlink;
-       buf->st_uid     = stat.st_uid;
-       buf->st_gid     = stat.st_gid;
-       buf->st_rdev    = stat.st_rdev;
-       buf->st_size    = stat.st_size;
-       buf->st_blksize = stat.st_blksize;
-       buf->st_blocks  = stat.st_blocks;
-       buf->st_atime   = stat.st_atime;
-       buf->st_mtime   = stat.st_mtime;
-       buf->st_ctime   = stat.st_ctime;
-       return ret;
-}
-
-
-static __attribute__((unused))
-int sys_symlink(const char *old, const char *new)
-{
-#ifdef __NR_symlinkat
-       return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
-       return my_syscall2(__NR_symlink, old, new);
-#else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
-#endif
-}
-
-static __attribute__((unused))
-mode_t sys_umask(mode_t mode)
-{
-       return my_syscall1(__NR_umask, mode);
-}
-
-static __attribute__((unused))
-int sys_umount2(const char *path, int flags)
-{
-       return my_syscall2(__NR_umount2, path, flags);
-}
-
-static __attribute__((unused))
-int sys_unlink(const char *path)
-{
-#ifdef __NR_unlinkat
-       return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
-       return my_syscall1(__NR_unlink, path);
-#else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
-#endif
-}
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-       return my_syscall4(__NR_wait4, pid, status, options, rusage);
-}
-
-static __attribute__((unused))
-pid_t sys_waitpid(pid_t pid, int *status, int options)
-{
-       return sys_wait4(pid, status, options, 0);
-}
-
-static __attribute__((unused))
-pid_t sys_wait(int *status)
-{
-       return sys_waitpid(-1, status, 0);
-}
-
-static __attribute__((unused))
-ssize_t sys_write(int fd, const void *buf, size_t count)
-{
-       return my_syscall3(__NR_write, fd, buf, count);
-}
-
-
-/* Below are the libc-compatible syscalls which return x or -1 and set errno.
- * They rely on the functions above. Similarly they're marked static so that it
- * is possible to assign pointers to them if needed.
- */
-
-static __attribute__((unused))
-int brk(void *addr)
-{
-       void *ret = sys_brk(addr);
-
-       if (!ret) {
-               SET_ERRNO(ENOMEM);
-               return -1;
-       }
-       return 0;
-}
-
-static __attribute__((noreturn,unused))
-void exit(int status)
-{
-       sys_exit(status);
-}
-
-static __attribute__((unused))
-int chdir(const char *path)
-{
-       int ret = sys_chdir(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chmod(const char *path, mode_t mode)
-{
-       int ret = sys_chmod(path, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chown(const char *path, uid_t owner, gid_t group)
-{
-       int ret = sys_chown(path, owner, group);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int chroot(const char *path)
-{
-       int ret = sys_chroot(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int close(int fd)
-{
-       int ret = sys_close(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int dup(int fd)
-{
-       int ret = sys_dup(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int dup2(int old, int new)
-{
-       int ret = sys_dup2(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int dup3(int old, int new, int flags)
-{
-       int ret = sys_dup3(old, new, flags);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-#endif
-
-static __attribute__((unused))
-int execve(const char *filename, char *const argv[], char *const envp[])
-{
-       int ret = sys_execve(filename, argv, envp);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t fork(void)
-{
-       pid_t ret = sys_fork();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int fsync(int fd)
-{
-       int ret = sys_fsync(fd);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
-       int ret = sys_getdents64(fd, dirp, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgid(pid_t pid)
-{
-       pid_t ret = sys_getpgid(pid);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgrp(void)
-{
-       pid_t ret = sys_getpgrp();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t getpid(void)
-{
-       pid_t ret = sys_getpid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t gettid(void)
-{
-       pid_t ret = sys_gettid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-       int ret = sys_gettimeofday(tv, tz);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
-       int ret = sys_ioctl(fd, req, value);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int kill(pid_t pid, int signal)
-{
-       int ret = sys_kill(pid, signal);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int link(const char *old, const char *new)
-{
-       int ret = sys_link(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-off_t lseek(int fd, off_t offset, int whence)
-{
-       off_t ret = sys_lseek(fd, offset, whence);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mkdir(const char *path, mode_t mode)
-{
-       int ret = sys_mkdir(path, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mknod(const char *path, mode_t mode, dev_t dev)
-{
-       int ret = sys_mknod(path, mode, dev);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
-         const char *fst, unsigned long flags,
-         const void *data)
-{
-       int ret = sys_mount(src, tgt, fst, flags, data);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, mode_t mode)
-{
-       int ret = sys_open(path, flags, mode);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int pivot_root(const char *new, const char *old)
-{
-       int ret = sys_pivot_root(new, old);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
-       int ret = sys_poll(fds, nfds, timeout);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-ssize_t read(int fd, void *buf, size_t count)
-{
-       ssize_t ret = sys_read(fd, buf, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
-       int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-void *sbrk(intptr_t inc)
-{
-       void *ret;
-
-       /* first call to find current end */
-       if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
-               return ret + inc;
-
-       SET_ERRNO(ENOMEM);
-       return (void *)-1;
-}
-
-static __attribute__((unused))
-int sched_yield(void)
-{
-       int ret = sys_sched_yield();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-       int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int setpgid(pid_t pid, pid_t pgid)
-{
-       int ret = sys_setpgid(pid, pgid);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t setsid(void)
-{
-       pid_t ret = sys_setsid();
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-unsigned int sleep(unsigned int seconds)
-{
-       struct timeval my_timeval = { seconds, 0 };
-
-       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
-               return my_timeval.tv_sec + !!my_timeval.tv_usec;
-       else
-               return 0;
-}
-
-static __attribute__((unused))
-int msleep(unsigned int msecs)
-{
-       struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
-
-       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
-               return (my_timeval.tv_sec * 1000) +
-                       (my_timeval.tv_usec / 1000) +
-                       !!(my_timeval.tv_usec % 1000);
-       else
-               return 0;
-}
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
-       int ret = sys_stat(path, buf);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int symlink(const char *old, const char *new)
-{
-       int ret = sys_symlink(old, new);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int tcsetpgrp(int fd, pid_t pid)
-{
-       return ioctl(fd, TIOCSPGRP, &pid);
-}
-
-static __attribute__((unused))
-mode_t umask(mode_t mode)
-{
-       return sys_umask(mode);
-}
-
-static __attribute__((unused))
-int umount2(const char *path, int flags)
-{
-       int ret = sys_umount2(path, flags);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-int unlink(const char *path)
-{
-       int ret = sys_unlink(path);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-       pid_t ret = sys_wait4(pid, status, options, rusage);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
-       pid_t ret = sys_waitpid(pid, status, options);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
-       pid_t ret = sys_wait(status);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-ssize_t write(int fd, const void *buf, size_t count)
-{
-       ssize_t ret = sys_write(fd, buf, count);
-
-       if (ret < 0) {
-               SET_ERRNO(-ret);
-               ret = -1;
-       }
-       return ret;
-}
-
-/* some size-optimized reimplementations of a few common str* and mem*
- * functions. They're marked static, except memcpy() and raise() which are used
- * by libgcc on ARM, so they are marked weak instead in order not to cause an
- * error when building a program made of multiple files (not recommended).
- */
-
-static __attribute__((unused))
-void *memmove(void *dst, const void *src, size_t len)
-{
-       ssize_t pos = (dst <= src) ? -1 : (long)len;
-       void *ret = dst;
-
-       while (len--) {
-               pos += (dst <= src) ? 1 : -1;
-               ((char *)dst)[pos] = ((char *)src)[pos];
-       }
-       return ret;
-}
-
-static __attribute__((unused))
-void *memset(void *dst, int b, size_t len)
-{
-       char *p = dst;
-
-       while (len--)
-               *(p++) = b;
-       return dst;
-}
-
-static __attribute__((unused))
-int memcmp(const void *s1, const void *s2, size_t n)
-{
-       size_t ofs = 0;
-       char c1 = 0;
-
-       while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
-               ofs++;
-       }
-       return c1;
-}
-
-static __attribute__((unused))
-char *strcpy(char *dst, const char *src)
-{
-       char *ret = dst;
-
-       while ((*dst++ = *src++));
-       return ret;
-}
-
-static __attribute__((unused))
-char *strchr(const char *s, int c)
-{
-       while (*s) {
-               if (*s == (char)c)
-                       return (char *)s;
-               s++;
-       }
-       return NULL;
-}
-
-static __attribute__((unused))
-char *strrchr(const char *s, int c)
-{
-       const char *ret = NULL;
-
-       while (*s) {
-               if (*s == (char)c)
-                       ret = s;
-               s++;
-       }
-       return (char *)ret;
-}
-
-static __attribute__((unused))
-size_t nolibc_strlen(const char *str)
-{
-       size_t len;
-
-       for (len = 0; str[len]; len++);
-       return len;
-}
-
-#define strlen(str) ({                          \
-       __builtin_constant_p((str)) ?           \
-               __builtin_strlen((str)) :       \
-               nolibc_strlen((str));           \
-})
-
-static __attribute__((unused))
-int isdigit(int c)
-{
-       return (unsigned int)(c - '0') <= 9;
-}
-
-static __attribute__((unused))
-long atol(const char *s)
-{
-       unsigned long ret = 0;
-       unsigned long d;
-       int neg = 0;
-
-       if (*s == '-') {
-               neg = 1;
-               s++;
-       }
-
-       while (1) {
-               d = (*s++) - '0';
-               if (d > 9)
-                       break;
-               ret *= 10;
-               ret += d;
-       }
-
-       return neg ? -ret : ret;
-}
-
-static __attribute__((unused))
-int atoi(const char *s)
-{
-       return atol(s);
-}
-
-static __attribute__((unused))
-const char *ltoa(long in)
-{
-       /* large enough for -9223372036854775808 */
-       static char buffer[21];
-       char       *pos = buffer + sizeof(buffer) - 1;
-       int         neg = in < 0;
-       unsigned long n = neg ? -in : in;
-
-       *pos-- = '\0';
-       do {
-               *pos-- = '0' + n % 10;
-               n /= 10;
-               if (pos < buffer)
-                       return pos + 1;
-       } while (n);
-
-       if (neg)
-               *pos-- = '-';
-       return pos + 1;
-}
-
-__attribute__((weak,unused))
-void *memcpy(void *dst, const void *src, size_t len)
-{
-       return memmove(dst, src, len);
-}
-
-/* needed by libgcc for divide by zero */
-__attribute__((weak,unused))
-int raise(int signal)
-{
-       return kill(getpid(), signal);
-}
-
-/* Here come a few helper functions */
-
-static __attribute__((unused))
-void FD_ZERO(fd_set *set)
-{
-       memset(set, 0, sizeof(*set));
-}
-
-static __attribute__((unused))
-void FD_SET(int fd, fd_set *set)
-{
-       if (fd < 0 || fd >= FD_SETSIZE)
-               return;
-       set->fd32[fd / 32] |= 1 << (fd & 31);
-}
-
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-static __attribute__((unused))
-dev_t makedev(unsigned int major, unsigned int minor)
-{
-       return ((major & 0xfff) << 8) | (minor & 0xff);
-}
+#endif /* _NOLIBC_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
new file mode 100644 (file)
index 0000000..ef47e71
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * signal function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SIGNAL_H
+#define _NOLIBC_SIGNAL_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/* This one is not marked static as it's needed by libgcc for divide by zero */
+__attribute__((weak,unused,section(".text.nolibc_raise")))
+int raise(int signal)
+{
+       return sys_kill(sys_getpid(), signal);
+}
+
+#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
new file mode 100644 (file)
index 0000000..1747ae1
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STD_H
+#define _NOLIBC_STD_H
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
+ * integer type definitions and generic macros here, but avoid OS-specific and
+ * syscall-specific stuff, as this file is expected to be included very early.
+ */
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+/* stdint types */
+typedef unsigned char       uint8_t;
+typedef   signed char        int8_t;
+typedef unsigned short     uint16_t;
+typedef   signed short      int16_t;
+typedef unsigned int       uint32_t;
+typedef   signed int        int32_t;
+typedef unsigned long long uint64_t;
+typedef   signed long long  int64_t;
+typedef unsigned long        size_t;
+typedef   signed long       ssize_t;
+typedef unsigned long     uintptr_t;
+typedef   signed long      intptr_t;
+typedef   signed long     ptrdiff_t;
+
+/* those are commonly provided by sys/types.h */
+typedef unsigned int          dev_t;
+typedef unsigned long         ino_t;
+typedef unsigned int         mode_t;
+typedef   signed int          pid_t;
+typedef unsigned int          uid_t;
+typedef unsigned int          gid_t;
+typedef unsigned long       nlink_t;
+typedef   signed long         off_t;
+typedef   signed long     blksize_t;
+typedef   signed long      blkcnt_t;
+typedef   signed long        time_t;
+
+#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
new file mode 100644 (file)
index 0000000..15dedf8
--- /dev/null
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * minimal stdio function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDIO_H
+#define _NOLIBC_STDIO_H
+
+#include <stdarg.h>
+
+#include "std.h"
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+#include "sys.h"
+#include "stdlib.h"
+#include "string.h"
+
+#ifndef EOF
+#define EOF (-1)
+#endif
+
+/* just define FILE as a non-empty type */
+typedef struct FILE {
+       char dummy[1];
+} FILE;
+
+/* We define the 3 common stdio files as constant invalid pointers that
+ * are easily recognized.
+ */
+static __attribute__((unused)) FILE* const stdin  = (FILE*)-3;
+static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
+static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+
+/* getc(), fgetc(), getchar() */
+
+#define getc(stream) fgetc(stream)
+
+static __attribute__((unused))
+int fgetc(FILE* stream)
+{
+       unsigned char ch;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       if (read(fd, &ch, 1) <= 0)
+               return EOF;
+       return ch;
+}
+
+static __attribute__((unused))
+int getchar(void)
+{
+       return fgetc(stdin);
+}
+
+
+/* putc(), fputc(), putchar() */
+
+#define putc(c, stream) fputc(c, stream)
+
+static __attribute__((unused))
+int fputc(int c, FILE* stream)
+{
+       unsigned char ch = c;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       if (write(fd, &ch, 1) <= 0)
+               return EOF;
+       return ch;
+}
+
+static __attribute__((unused))
+int putchar(int c)
+{
+       return fputc(c, stdout);
+}
+
+
+/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */
+
+/* internal fwrite()-like function which only takes a size and returns 0 on
+ * success or EOF on error. It automatically retries on short writes.
+ */
+static __attribute__((unused))
+int _fwrite(const void *buf, size_t size, FILE *stream)
+{
+       ssize_t ret;
+       int fd;
+
+       if (stream < stdin || stream > stderr)
+               return EOF;
+
+       fd = 3 + (long)stream;
+
+       while (size) {
+               ret = write(fd, buf, size);
+               if (ret <= 0)
+                       return EOF;
+               size -= ret;
+               buf += ret;
+       }
+       return 0;
+}
+
+static __attribute__((unused))
+size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream)
+{
+       size_t written;
+
+       for (written = 0; written < nmemb; written++) {
+               if (_fwrite(s, size, stream) != 0)
+                       break;
+               s += size;
+       }
+       return written;
+}
+
+static __attribute__((unused))
+int fputs(const char *s, FILE *stream)
+{
+       return _fwrite(s, strlen(s), stream);
+}
+
+static __attribute__((unused))
+int puts(const char *s)
+{
+       if (fputs(s, stdout) == EOF)
+               return EOF;
+       return putchar('\n');
+}
+
+
+/* fgets() */
+static __attribute__((unused))
+char *fgets(char *s, int size, FILE *stream)
+{
+       int ofs;
+       int c;
+
+       for (ofs = 0; ofs + 1 < size;) {
+               c = fgetc(stream);
+               if (c == EOF)
+                       break;
+               s[ofs++] = c;
+               if (c == '\n')
+                       break;
+       }
+       if (ofs < size)
+               s[ofs] = 0;
+       return ofs ? s : NULL;
+}
+
+
+/* minimal vfprintf(). It supports the following formats:
+ *  - %[l*]{d,u,c,x,p}
+ *  - %s
+ *  - unknown modifiers are ignored.
+ */
+static __attribute__((unused))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+       char escape, lpref, c;
+       unsigned long long v;
+       unsigned int written;
+       size_t len, ofs;
+       char tmpbuf[21];
+       const char *outstr;
+
+       written = ofs = escape = lpref = 0;
+       while (1) {
+               c = fmt[ofs++];
+
+               if (escape) {
+                       /* we're in an escape sequence, ofs == 1 */
+                       escape = 0;
+                       if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
+                               char *out = tmpbuf;
+
+                               if (c == 'p')
+                                       v = va_arg(args, unsigned long);
+                               else if (lpref) {
+                                       if (lpref > 1)
+                                               v = va_arg(args, unsigned long long);
+                                       else
+                                               v = va_arg(args, unsigned long);
+                               } else
+                                       v = va_arg(args, unsigned int);
+
+                               if (c == 'd') {
+                                       /* sign-extend the value */
+                                       if (lpref == 0)
+                                               v = (long long)(int)v;
+                                       else if (lpref == 1)
+                                               v = (long long)(long)v;
+                               }
+
+                               switch (c) {
+                               case 'c':
+                                       out[0] = v;
+                                       out[1] = 0;
+                                       break;
+                               case 'd':
+                                       i64toa_r(v, out);
+                                       break;
+                               case 'u':
+                                       u64toa_r(v, out);
+                                       break;
+                               case 'p':
+                                       *(out++) = '0';
+                                       *(out++) = 'x';
+                                       /* fall through */
+                               default: /* 'x' and 'p' above */
+                                       u64toh_r(v, out);
+                                       break;
+                               }
+                               outstr = tmpbuf;
+                       }
+                       else if (c == 's') {
+                               outstr = va_arg(args, char *);
+                               if (!outstr)
+                                       outstr="(null)";
+                       }
+                       else if (c == '%') {
+                               /* queue it verbatim */
+                               continue;
+                       }
+                       else {
+                               /* modifiers or final 0 */
+                               if (c == 'l') {
+                                       /* long format prefix, maintain the escape */
+                                       lpref++;
+                               }
+                               escape = 1;
+                               goto do_escape;
+                       }
+                       len = strlen(outstr);
+                       goto flush_str;
+               }
+
+               /* not an escape sequence */
+               if (c == 0 || c == '%') {
+                       /* flush pending data on escape or end */
+                       escape = 1;
+                       lpref = 0;
+                       outstr = fmt;
+                       len = ofs - 1;
+               flush_str:
+                       if (_fwrite(outstr, len, stream) != 0)
+                               break;
+
+                       written += len;
+               do_escape:
+                       if (c == 0)
+                               break;
+                       fmt += ofs;
+                       ofs = 0;
+                       continue;
+               }
+
+               /* literal char, just queue it */
+       }
+       return written;
+}
+
+static __attribute__((unused))
+int fprintf(FILE *stream, const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vfprintf(stream, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static __attribute__((unused))
+int printf(const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vfprintf(stdout, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static __attribute__((unused))
+void perror(const char *msg)
+{
+       fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+}
+
+#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
new file mode 100644 (file)
index 0000000..8fd32ea
--- /dev/null
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stdlib function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDLIB_H
+#define _NOLIBC_STDLIB_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "string.h"
+
+struct nolibc_heap {
+       size_t  len;
+       char    user_p[] __attribute__((__aligned__));
+};
+
+/* Buffer used to store int-to-ASCII conversions. Will only be implemented if
+ * any of the related functions is implemented. The area is large enough to
+ * store "18446744073709551615" or "-9223372036854775808" and the final zero.
+ */
+static __attribute__((unused)) char itoa_buffer[21];
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+/* must be exported, as it's used by libgcc for various divide functions */
+__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
+void abort(void)
+{
+       sys_kill(sys_getpid(), SIGABRT);
+       for (;;);
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+       unsigned long ret = 0;
+       unsigned long d;
+       int neg = 0;
+
+       if (*s == '-') {
+               neg = 1;
+               s++;
+       }
+
+       while (1) {
+               d = (*s++) - '0';
+               if (d > 9)
+                       break;
+               ret *= 10;
+               ret += d;
+       }
+
+       return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+       return atol(s);
+}
+
+static __attribute__((unused))
+void free(void *ptr)
+{
+       struct nolibc_heap *heap;
+
+       if (!ptr)
+               return;
+
+       heap = container_of(ptr, struct nolibc_heap, user_p);
+       munmap(heap, heap->len);
+}
+
+/* getenv() tries to find the environment variable named <name> in the
+ * environment array pointed to by global variable "environ" which must be
+ * declared as a char **, and must be terminated by a NULL (it is recommended
+ * to set this variable to the "envp" argument of main()). If the requested
+ * environment variable exists its value is returned otherwise NULL is
+ * returned. getenv() is forcefully inlined so that the reference to "environ"
+ * will be dropped if unused, even at -O0.
+ */
+static __attribute__((unused))
+char *_getenv(const char *name, char **environ)
+{
+       int idx, i;
+
+       if (environ) {
+               for (idx = 0; environ[idx]; idx++) {
+                       for (i = 0; name[i] && name[i] == environ[idx][i];)
+                               i++;
+                       if (!name[i] && environ[idx][i] == '=')
+                               return &environ[idx][i+1];
+               }
+       }
+       return NULL;
+}
+
+static inline __attribute__((unused,always_inline))
+char *getenv(const char *name)
+{
+       extern char **environ;
+       return _getenv(name, environ);
+}
+
+static __attribute__((unused))
+void *malloc(size_t len)
+{
+       struct nolibc_heap *heap;
+
+       /* Always allocate memory with size multiple of 4096. */
+       len  = sizeof(*heap) + len;
+       len  = (len + 4095UL) & -4096UL;
+       heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
+                   -1, 0);
+       if (__builtin_expect(heap == MAP_FAILED, 0))
+               return NULL;
+
+       heap->len = len;
+       return heap->user_p;
+}
+
+static __attribute__((unused))
+void *calloc(size_t size, size_t nmemb)
+{
+       void *orig;
+       size_t res = 0;
+
+       if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) {
+               SET_ERRNO(ENOMEM);
+               return NULL;
+       }
+
+       /*
+        * No need to zero the heap, the MAP_ANONYMOUS in malloc()
+        * already does it.
+        */
+       return malloc(res);
+}
+
+static __attribute__((unused))
+void *realloc(void *old_ptr, size_t new_size)
+{
+       struct nolibc_heap *heap;
+       size_t user_p_len;
+       void *ret;
+
+       if (!old_ptr)
+               return malloc(new_size);
+
+       heap = container_of(old_ptr, struct nolibc_heap, user_p);
+       user_p_len = heap->len - sizeof(*heap);
+       /*
+        * Don't realloc() if @user_p_len >= @new_size, this block of
+        * memory is still enough to handle the @new_size. Just return
+        * the same pointer.
+        */
+       if (user_p_len >= new_size)
+               return old_ptr;
+
+       ret = malloc(new_size);
+       if (__builtin_expect(!ret, 0))
+               return NULL;
+
+       memcpy(ret, heap->user_p, heap->len);
+       munmap(heap, heap->len);
+       return ret;
+}
+
+/* Converts the unsigned long integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
+ * buffer is filled from the first byte, and the number of characters emitted
+ * (not counting the trailing zero) is returned. The function is constructed
+ * in a way to optimize the code size and avoid any divide that could add a
+ * dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoh_r(unsigned long in, char *buffer)
+{
+       signed char pos = (~0UL > 0xfffffffful) ? 60 : 28;
+       int digits = 0;
+       int dig;
+
+       do {
+               dig = in >> pos;
+               in -= (uint64_t)dig << pos;
+               pos -= 4;
+               if (dig || digits || pos < 0) {
+                       if (dig > 9)
+                               dig += 'a' - '0' - 10;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos >= 0);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* converts unsigned long <in> to an hex string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoh(unsigned long in)
+{
+       utoh_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for
+ * 4294967295 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ * The function is constructed in a way to optimize the code size and avoid
+ * any divide that could add a dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoa_r(unsigned long in, char *buffer)
+{
+       unsigned long lim;
+       int digits = 0;
+       int pos = (~0UL > 0xfffffffful) ? 19 : 9;
+       int dig;
+
+       do {
+               for (dig = 0, lim = 1; dig < pos; dig++)
+                       lim *= 10;
+
+               if (digits || in >= lim || !pos) {
+                       for (dig = 0; in >= lim; dig++)
+                               in -= lim;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos--);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* Converts the signed long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for
+ * -2147483648 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ */
+static __attribute__((unused))
+int itoa_r(long in, char *buffer)
+{
+       char *ptr = buffer;
+       int len = 0;
+
+       if (in < 0) {
+               in = -in;
+               *(ptr++) = '-';
+               len++;
+       }
+       len += utoa_r(in, ptr);
+       return len;
+}
+
+/* for historical compatibility, same as above but returns the pointer to the
+ * buffer.
+ */
+static inline __attribute__((unused))
+char *ltoa_r(long in, char *buffer)
+{
+       itoa_r(in, buffer);
+       return buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *itoa(long in)
+{
+       itoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string. Same as above, for compatibility.
+ */
+static inline __attribute__((unused))
+char *ltoa(long in)
+{
+       itoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts unsigned long integer <in> to a string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoa(unsigned long in)
+{
+       utoa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toh_r(uint64_t in, char *buffer)
+{
+       signed char pos = 60;
+       int digits = 0;
+       int dig;
+
+       do {
+               if (sizeof(long) >= 8) {
+                       dig = (in >> pos) & 0xF;
+               } else {
+                       /* 32-bit platforms: avoid a 64-bit shift */
+                       uint32_t d = (pos >= 32) ? (in >> 32) : in;
+                       dig = (d >> (pos & 31)) & 0xF;
+               }
+               if (dig > 9)
+                       dig += 'a' - '0' - 10;
+               pos -= 4;
+               if (dig || digits || pos < 0)
+                       buffer[digits++] = '0' + dig;
+       } while (pos >= 0);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* converts uint64_t <in> to an hex string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toh(uint64_t in)
+{
+       u64toh_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toa_r(uint64_t in, char *buffer)
+{
+       unsigned long long lim;
+       int digits = 0;
+       int pos = 19; /* start with the highest possible digit */
+       int dig;
+
+       do {
+               for (dig = 0, lim = 1; dig < pos; dig++)
+                       lim *= 10;
+
+               if (digits || in >= lim || !pos) {
+                       for (dig = 0; in >= lim; dig++)
+                               in -= lim;
+                       buffer[digits++] = '0' + dig;
+               }
+       } while (pos--);
+
+       buffer[digits] = 0;
+       return digits;
+}
+
+/* Converts the signed 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned.
+ */
+static __attribute__((unused))
+int i64toa_r(int64_t in, char *buffer)
+{
+       char *ptr = buffer;
+       int len = 0;
+
+       if (in < 0) {
+               in = -in;
+               *(ptr++) = '-';
+               len++;
+       }
+       len += u64toa_r(in, ptr);
+       return len;
+}
+
+/* converts int64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *i64toa(int64_t in)
+{
+       i64toa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+/* converts uint64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toa(uint64_t in)
+{
+       u64toa_r(in, itoa_buffer);
+       return itoa_buffer;
+}
+
+#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
new file mode 100644 (file)
index 0000000..bef35be
--- /dev/null
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * string function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STRING_H
+#define _NOLIBC_STRING_H
+
+#include "std.h"
+
+static void *malloc(size_t len);
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+       size_t ofs = 0;
+       char c1 = 0;
+
+       while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+               ofs++;
+       }
+       return c1;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_up(void *dst, const void *src, size_t len)
+{
+       size_t pos = 0;
+
+       while (pos < len) {
+               ((char *)dst)[pos] = ((const char *)src)[pos];
+               pos++;
+       }
+       return dst;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_down(void *dst, const void *src, size_t len)
+{
+       while (len) {
+               len--;
+               ((char *)dst)[len] = ((const char *)src)[len];
+       }
+       return dst;
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memmove")))
+void *memmove(void *dst, const void *src, size_t len)
+{
+       size_t dir, pos;
+
+       pos = len;
+       dir = -1;
+
+       if (dst < src) {
+               pos = -1;
+               dir = 1;
+       }
+
+       while (len) {
+               pos += dir;
+               ((char *)dst)[pos] = ((const char *)src)[pos];
+               len--;
+       }
+       return dst;
+}
+
+/* must be exported, as it's used by libgcc on ARM */
+__attribute__((weak,unused,section(".text.nolibc_memcpy")))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+       return _nolibc_memcpy_up(dst, src, len);
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memset")))
+void *memset(void *dst, int b, size_t len)
+{
+       char *p = dst;
+
+       while (len--)
+               *(p++) = b;
+       return dst;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+       while (*s) {
+               if (*s == (char)c)
+                       return (char *)s;
+               s++;
+       }
+       return NULL;
+}
+
+static __attribute__((unused))
+int strcmp(const char *a, const char *b)
+{
+       unsigned int c;
+       int diff;
+
+       while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+               ;
+       return diff;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+       char *ret = dst;
+
+       while ((*dst++ = *src++));
+       return ret;
+}
+
+/* this function is only used with arguments that are not constants or when
+ * it's not known because optimizations are disabled.
+ */
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+       size_t len;
+
+       for (len = 0; str[len]; len++);
+       return len;
+}
+
+/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and
+ * the two branches, then will rely on an external definition of strlen().
+ */
+#if defined(__OPTIMIZE__)
+#define strlen(str) ({                          \
+       __builtin_constant_p((str)) ?           \
+               __builtin_strlen((str)) :       \
+               nolibc_strlen((str));           \
+})
+#else
+#define strlen(str) nolibc_strlen((str))
+#endif
+
+static __attribute__((unused))
+size_t strnlen(const char *str, size_t maxlen)
+{
+       size_t len;
+
+       for (len = 0; (len < maxlen) && str[len]; len++);
+       return len;
+}
+
+static __attribute__((unused))
+char *strdup(const char *str)
+{
+       size_t len;
+       char *ret;
+
+       len = strlen(str);
+       ret = malloc(len + 1);
+       if (__builtin_expect(ret != NULL, 1))
+               memcpy(ret, str, len + 1);
+
+       return ret;
+}
+
+static __attribute__((unused))
+char *strndup(const char *str, size_t maxlen)
+{
+       size_t len;
+       char *ret;
+
+       len = strnlen(str, maxlen);
+       ret = malloc(len + 1);
+       if (__builtin_expect(ret != NULL, 1)) {
+               memcpy(ret, str, len);
+               ret[len] = '\0';
+       }
+
+       return ret;
+}
+
+static __attribute__((unused))
+size_t strlcat(char *dst, const char *src, size_t size)
+{
+       size_t len;
+       char c;
+
+       for (len = 0; dst[len]; len++)
+               ;
+
+       for (;;) {
+               c = *src;
+               if (len < size)
+                       dst[len] = c;
+               if (!c)
+                       break;
+               len++;
+               src++;
+       }
+
+       return len;
+}
+
+static __attribute__((unused))
+size_t strlcpy(char *dst, const char *src, size_t size)
+{
+       size_t len;
+       char c;
+
+       for (len = 0;;) {
+               c = src[len];
+               if (len < size)
+                       dst[len] = c;
+               if (!c)
+                       break;
+               len++;
+       }
+       return len;
+}
+
+static __attribute__((unused))
+char *strncat(char *dst, const char *src, size_t size)
+{
+       char *orig = dst;
+
+       while (*dst)
+               dst++;
+
+       while (size && (*dst = *src)) {
+               src++;
+               dst++;
+               size--;
+       }
+
+       *dst = 0;
+       return orig;
+}
+
+static __attribute__((unused))
+int strncmp(const char *a, const char *b, size_t size)
+{
+       unsigned int c;
+       int diff = 0;
+
+       while (size-- &&
+              !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+               ;
+
+       return diff;
+}
+
+static __attribute__((unused))
+char *strncpy(char *dst, const char *src, size_t size)
+{
+       size_t len;
+
+       for (len = 0; len < size; len++)
+               if ((dst[len] = *src))
+                       src++;
+       return dst;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+       const char *ret = NULL;
+
+       while (*s) {
+               if (*s == (char)c)
+                       ret = s;
+               s++;
+       }
+       return (char *)ret;
+}
+
+#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
new file mode 100644 (file)
index 0000000..0849107
--- /dev/null
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Syscall definitions for NOLIBC (those in man(2))
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SYS_H
+#define _NOLIBC_SYS_H
+
+#include <stdarg.h>
+#include "std.h"
+
+/* system includes */
+#include <asm/unistd.h>
+#include <asm/signal.h>  // for SIGCHLD
+#include <asm/ioctls.h>
+#include <asm/mman.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <linux/time.h>
+
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+
+
+/* Functions in this file only describe syscalls. They're declared static so
+ * that the compiler usually decides to inline them while still being allowed
+ * to pass a pointer to one of their instances. Each syscall exists in two
+ * versions:
+ *   - the "internal" ones, which matches the raw syscall interface at the
+ *     kernel level, which may sometimes slightly differ from the documented
+ *     libc-level ones. For example most of them return either a valid value
+ *     or -errno. All of these are prefixed with "sys_". They may be called
+ *     by non-portable applications if desired.
+ *
+ *   - the "exported" ones, whose interface must closely match the one
+ *     documented in man(2), that applications are supposed to expect. These
+ *     ones rely on the internal ones, and set errno.
+ *
+ * Each syscall will be defined with the two functions, sorted in alphabetical
+ * order applied to the exported names.
+ *
+ * In case of doubt about the relevance of a function here, only those which
+ * set errno should be defined here. Wrappers like those appearing in man(3)
+ * should not be placed here.
+ */
+
+
+/*
+ * int brk(void *addr);
+ * void *sbrk(intptr_t inc)
+ */
+
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+       return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+       void *ret = sys_brk(addr);
+
+       if (!ret) {
+               SET_ERRNO(ENOMEM);
+               return -1;
+       }
+       return 0;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+       void *ret;
+
+       /* first call to find current end */
+       if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+               return ret + inc;
+
+       SET_ERRNO(ENOMEM);
+       return (void *)-1;
+}
+
+
+/*
+ * int chdir(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+       return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+       int ret = sys_chdir(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chmod(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+       return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#elif defined(__NR_chmod)
+       return my_syscall2(__NR_chmod, path, mode);
+#else
+#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+#endif
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+       int ret = sys_chmod(path, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chown(const char *path, uid_t owner, gid_t group);
+ */
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+       return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#elif defined(__NR_chown)
+       return my_syscall3(__NR_chown, path, owner, group);
+#else
+#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+#endif
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+       int ret = sys_chown(path, owner, group);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int chroot(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+       return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+       int ret = sys_chroot(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int close(int fd);
+ */
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+       return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+       int ret = sys_close(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup(int fd);
+ */
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+       return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int dup(int fd)
+{
+       int ret = sys_dup(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup2(int old, int new);
+ */
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+#ifdef __NR_dup3
+       return my_syscall3(__NR_dup3, old, new, 0);
+#elif defined(__NR_dup2)
+       return my_syscall2(__NR_dup2, old, new);
+#else
+#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+#endif
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+       int ret = sys_dup2(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int dup3(int old, int new, int flags);
+ */
+
+#ifdef __NR_dup3
+static __attribute__((unused))
+int sys_dup3(int old, int new, int flags)
+{
+       return my_syscall3(__NR_dup3, old, new, flags);
+}
+
+static __attribute__((unused))
+int dup3(int old, int new, int flags)
+{
+       int ret = sys_dup3(old, new, flags);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+#endif
+
+
+/*
+ * int execve(const char *filename, char *const argv[], char *const envp[]);
+ */
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+       int ret = sys_execve(filename, argv, envp);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * void exit(int status);
+ */
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+       my_syscall1(__NR_exit, status & 255);
+       while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+       sys_exit(status);
+}
+
+
+/*
+ * pid_t fork(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+#ifdef __NR_clone
+       /* note: some archs only have clone() and not fork(). Different archs
+        * have a different API, but most archs have the flags on first arg and
+        * will not use the rest with no other flag.
+        */
+       return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
+#elif defined(__NR_fork)
+       return my_syscall0(__NR_fork);
+#else
+#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+#endif
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+       pid_t ret = sys_fork();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int fsync(int fd);
+ */
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+       return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+       int ret = sys_fsync(fd);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int getdents64(int fd, struct linux_dirent64 *dirp, int count);
+ */
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+       return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+       int ret = sys_getdents64(fd, dirp, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t getpgid(pid_t pid);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgid(pid_t pid)
+{
+       return my_syscall1(__NR_getpgid, pid);
+}
+
+static __attribute__((unused))
+pid_t getpgid(pid_t pid)
+{
+       pid_t ret = sys_getpgid(pid);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t getpgrp(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+       return sys_getpgid(0);
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+       return sys_getpgrp();
+}
+
+
+/*
+ * pid_t getpid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+       return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+       return sys_getpid();
+}
+
+
+/*
+ * pid_t getppid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getppid(void)
+{
+       return my_syscall0(__NR_getppid);
+}
+
+static __attribute__((unused))
+pid_t getppid(void)
+{
+       return sys_getppid();
+}
+
+
+/*
+ * pid_t gettid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+       return my_syscall0(__NR_gettid);
+}
+
+static __attribute__((unused))
+pid_t gettid(void)
+{
+       return sys_gettid();
+}
+
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+       int ret = sys_gettimeofday(tv, tz);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int ioctl(int fd, unsigned long req, void *value);
+ */
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+       return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+       int ret = sys_ioctl(fd, req, value);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+/*
+ * int kill(pid_t pid, int signal);
+ */
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+       return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+       int ret = sys_kill(pid, signal);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int link(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+       return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#elif defined(__NR_link)
+       return my_syscall2(__NR_link, old, new);
+#else
+#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+#endif
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+       int ret = sys_link(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * off_t lseek(int fd, off_t offset, int whence);
+ */
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+       return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+       off_t ret = sys_lseek(fd, offset, whence);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int mkdir(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+       return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#elif defined(__NR_mkdir)
+       return my_syscall2(__NR_mkdir, path, mode);
+#else
+#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+#endif
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+       int ret = sys_mkdir(path, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int mknod(const char *path, mode_t mode, dev_t dev);
+ */
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+       return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#elif defined(__NR_mknod)
+       return my_syscall3(__NR_mknod, path, mode, dev);
+#else
+#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+#endif
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+       int ret = sys_mknod(path, mode, dev);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+#ifndef MAP_SHARED
+#define MAP_SHARED             0x01    /* Share changes */
+#define MAP_PRIVATE            0x02    /* Changes are private */
+#define MAP_SHARED_VALIDATE    0x03    /* share + validate extension flags */
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+              off_t offset)
+{
+#ifndef my_syscall6
+       /* Function not implemented. */
+       return -ENOSYS;
+#else
+
+       int n;
+
+#if defined(__i386__)
+       n = __NR_mmap2;
+       offset >>= 12;
+#else
+       n = __NR_mmap;
+#endif
+
+       return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+#endif
+}
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+       void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+       if ((unsigned long)ret >= -4095UL) {
+               SET_ERRNO(-(long)ret);
+               ret = MAP_FAILED;
+       }
+       return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+       return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+       int ret = sys_munmap(addr, length);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+/*
+ * int mount(const char *source, const char *target,
+ *           const char *fstype, unsigned long flags,
+ *           const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+                     unsigned long flags, const void *data)
+{
+       return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+          const char *fst, unsigned long flags,
+          const void *data)
+{
+       int ret = sys_mount(src, tgt, fst, flags, data);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+       return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#elif defined(__NR_open)
+       return my_syscall3(__NR_open, path, flags, mode);
+#else
+#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
+#endif
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+       mode_t mode = 0;
+       int ret;
+
+       if (flags & O_CREAT) {
+               va_list args;
+
+               va_start(args, flags);
+               mode = va_arg(args, mode_t);
+               va_end(args);
+       }
+
+       ret = sys_open(path, flags, mode);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int pivot_root(const char *new, const char *old);
+ */
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+       return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+       int ret = sys_pivot_root(new, old);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+       struct timespec t;
+
+       if (timeout >= 0) {
+               t.tv_sec  = timeout / 1000;
+               t.tv_nsec = (timeout % 1000) * 1000000;
+       }
+       return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+#elif defined(__NR_poll)
+       return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+       int ret = sys_poll(fds, nfds, timeout);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * ssize_t read(int fd, void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+       return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+       ssize_t ret = sys_read(fd, buf, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+       return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+       int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int sched_yield(void);
+ */
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+       return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+       int ret = sys_sched_yield();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ *            fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+       struct sel_arg_struct {
+               unsigned long n;
+               fd_set *r, *w, *e;
+               struct timeval *t;
+       } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+       return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+       struct timespec t;
+
+       if (timeout) {
+               t.tv_sec  = timeout->tv_sec;
+               t.tv_nsec = timeout->tv_usec * 1000;
+       }
+       return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR__newselect) || defined(__NR_select)
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+       return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#else
+#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+       int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int setpgid(pid_t pid, pid_t pgid);
+ */
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+       return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+       int ret = sys_setpgid(pid, pgid);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t setsid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+       return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+       pid_t ret = sys_setsid();
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int stat(const char *path, struct stat *buf);
+ * Warning: the struct stat's layout is arch-dependent.
+ */
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+       struct sys_stat_struct stat;
+       long ret;
+
+#ifdef __NR_newfstatat
+       /* only solution for arm64 */
+       ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#elif defined(__NR_stat)
+       ret = my_syscall2(__NR_stat, path, &stat);
+#else
+#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
+#endif
+       buf->st_dev     = stat.st_dev;
+       buf->st_ino     = stat.st_ino;
+       buf->st_mode    = stat.st_mode;
+       buf->st_nlink   = stat.st_nlink;
+       buf->st_uid     = stat.st_uid;
+       buf->st_gid     = stat.st_gid;
+       buf->st_rdev    = stat.st_rdev;
+       buf->st_size    = stat.st_size;
+       buf->st_blksize = stat.st_blksize;
+       buf->st_blocks  = stat.st_blocks;
+       buf->st_atime   = stat.st_atime;
+       buf->st_mtime   = stat.st_mtime;
+       buf->st_ctime   = stat.st_ctime;
+       return ret;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+       int ret = sys_stat(path, buf);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int symlink(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+       return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#elif defined(__NR_symlink)
+       return my_syscall2(__NR_symlink, old, new);
+#else
+#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+#endif
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+       int ret = sys_symlink(old, new);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * mode_t umask(mode_t mode);
+ */
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+       return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+       return sys_umask(mode);
+}
+
+
+/*
+ * int umount2(const char *path, int flags);
+ */
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+       return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+       int ret = sys_umount2(path, flags);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * int unlink(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+       return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#elif defined(__NR_unlink)
+       return my_syscall1(__NR_unlink, path);
+#else
+#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+#endif
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+       int ret = sys_unlink(path);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+       return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+       pid_t ret = sys_wait4(-1, status, 0, NULL);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+       pid_t ret = sys_wait4(pid, status, options, rusage);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+       pid_t ret = sys_wait4(pid, status, options, NULL);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+/*
+ * ssize_t write(int fd, const void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+       return my_syscall3(__NR_write, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+       ssize_t ret = sys_write(fd, buf, count);
+
+       if (ret < 0) {
+               SET_ERRNO(-ret);
+               ret = -1;
+       }
+       return ret;
+}
+
+
+#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
new file mode 100644 (file)
index 0000000..d18b766
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TIME_H
+#define _NOLIBC_TIME_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+static __attribute__((unused))
+time_t time(time_t *tptr)
+{
+       struct timeval tv;
+
+       /* note, cannot fail here */
+       sys_gettimeofday(&tv, NULL);
+
+       if (tptr)
+               *tptr = tv.tv_sec;
+       return tv.tv_sec;
+}
+
+#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
new file mode 100644 (file)
index 0000000..9599970
--- /dev/null
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Special types used by various syscalls for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TYPES_H
+#define _NOLIBC_TYPES_H
+
+#include "std.h"
+#include <linux/time.h>
+
+
+/* Only the generic macros and types may be defined here. The arch-specific
+ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
+ * the layout of sys_stat_struct must not be defined here.
+ */
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR        0040000
+#define S_IFCHR        0020000
+#define S_IFBLK        0060000
+#define S_IFREG        0100000
+#define S_IFIFO        0010000
+#define S_IFLNK        0120000
+#define S_IFSOCK       0140000
+#define S_IFMT         0170000
+
+#define S_ISDIR(mode)  (((mode) & S_IFDIR)  == S_IFDIR)
+#define S_ISCHR(mode)  (((mode) & S_IFCHR)  == S_IFCHR)
+#define S_ISBLK(mode)  (((mode) & S_IFBLK)  == S_IFBLK)
+#define S_ISREG(mode)  (((mode) & S_IFREG)  == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO)  == S_IFIFO)
+#define S_ISLNK(mode)  (((mode) & S_IFLNK)  == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+/* dirent types */
+#define DT_UNKNOWN     0x0
+#define DT_FIFO        0x1
+#define DT_CHR         0x2
+#define DT_DIR         0x4
+#define DT_BLK         0x6
+#define DT_REG         0x8
+#define DT_LNK         0xa
+#define DT_SOCK        0xc
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE     256
+#endif
+
+/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
+ * values.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX       4096
+#endif
+
+#ifndef MAXPATHLEN
+#define MAXPATHLEN     (PATH_MAX)
+#endif
+
+/* Special FD used by all the *at functions */
+#ifndef AT_FDCWD
+#define AT_FDCWD       (-100)
+#endif
+
+/* whence values for lseek() */
+#define SEEK_SET       0
+#define SEEK_CUR       1
+#define SEEK_END       2
+
+/* cmd for reboot() */
+#define LINUX_REBOOT_MAGIC1         0xfee1dead
+#define LINUX_REBOOT_MAGIC2         0x28121969
+#define LINUX_REBOOT_CMD_HALT       0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF  0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART    0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+/* Macros used on waitpid()'s return status */
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFEXITED(status)   (((status) & 0x7f) == 0)
+
+/* waitpid() flags */
+#define WNOHANG      1
+
+/* standard exit() codes */
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+
+/* for select() */
+typedef struct {
+       uint32_t fd32[(FD_SETSIZE + 31) / 32];
+} fd_set;
+
+#define FD_CLR(fd, set) do {                                            \
+               fd_set *__set = (set);                                  \
+               int __fd = (fd);                                        \
+               if (__fd >= 0)                                          \
+                       __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \
+       } while (0)
+
+#define FD_SET(fd, set) do {                                            \
+               fd_set *__set = (set);                                  \
+               int __fd = (fd);                                        \
+               if (__fd >= 0)                                          \
+                       __set->fd32[__fd / 32] |= 1U << (__fd & 31);    \
+       } while (0)
+
+#define FD_ISSET(fd, set) ({                                                  \
+               fd_set *__set = (set);                                        \
+               int __fd = (fd);                                              \
+               int __r = 0;                                                  \
+               if (__fd >= 0)                                                \
+                       __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \
+               __r;                                                          \
+       })
+
+#define FD_ZERO(set) do {                                               \
+               fd_set *__set = (set);                                  \
+               int __idx;                                              \
+               for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \
+                       __set->fd32[__idx] = 0;                         \
+       } while (0)
+
+/* for poll() */
+#define POLLIN          0x0001
+#define POLLPRI         0x0002
+#define POLLOUT         0x0004
+#define POLLERR         0x0008
+#define POLLHUP         0x0010
+#define POLLNVAL        0x0020
+
+struct pollfd {
+       int fd;
+       short int events;
+       short int revents;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+       uint64_t       d_ino;
+       int64_t        d_off;
+       unsigned short d_reclen;
+       unsigned char  d_type;
+       char           d_name[];
+};
+
+/* needed by wait4() */
+struct rusage {
+       struct timeval ru_utime;
+       struct timeval ru_stime;
+       long   ru_maxrss;
+       long   ru_ixrss;
+       long   ru_idrss;
+       long   ru_isrss;
+       long   ru_minflt;
+       long   ru_majflt;
+       long   ru_nswap;
+       long   ru_inblock;
+       long   ru_oublock;
+       long   ru_msgsnd;
+       long   ru_msgrcv;
+       long   ru_nsignals;
+       long   ru_nvcsw;
+       long   ru_nivcsw;
+};
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+       dev_t     st_dev;     /* ID of device containing file */
+       ino_t     st_ino;     /* inode number */
+       mode_t    st_mode;    /* protection */
+       nlink_t   st_nlink;   /* number of hard links */
+       uid_t     st_uid;     /* user ID of owner */
+       gid_t     st_gid;     /* group ID of owner */
+       dev_t     st_rdev;    /* device ID (if special file) */
+       off_t     st_size;    /* total size, in bytes */
+       blksize_t st_blksize; /* blocksize for file system I/O */
+       blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+       time_t    st_atime;   /* time of last access */
+       time_t    st_mtime;   /* time of last modification */
+       time_t    st_ctime;   /* time of last status change */
+};
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)(((dev) & 0xff))
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#ifndef container_of
+#define container_of(PTR, TYPE, FIELD) ({                      \
+       __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR);    \
+       (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \
+})
+#endif
+
+#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
new file mode 100644 (file)
index 0000000..1c25e20
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * unistd function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_UNISTD_H
+#define _NOLIBC_UNISTD_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+
+static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+       struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+               return (my_timeval.tv_sec * 1000) +
+                       (my_timeval.tv_usec / 1000) +
+                       !!(my_timeval.tv_usec % 1000);
+       else
+               return 0;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+       struct timeval my_timeval = { seconds, 0 };
+
+       if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+               return my_timeval.tv_sec + !!my_timeval.tv_usec;
+       else
+               return 0;
+}
+
+static __attribute__((unused))
+int usleep(unsigned int usecs)
+{
+       struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
+
+       return sys_select(0, 0, 0, 0, &my_timeval);
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+       return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+#endif /* _NOLIBC_UNISTD_H */
index 91a6fe4e02c08c4b6ac6f1fb91f8f9e3fce85c0f..6a184d260c7f2e17d05831e702410175b18e550e 100644 (file)
@@ -445,7 +445,13 @@ struct kvm_run {
 #define KVM_SYSTEM_EVENT_RESET          2
 #define KVM_SYSTEM_EVENT_CRASH          3
                        __u32 type;
-                       __u64 flags;
+                       __u32 ndata;
+                       union {
+#ifndef __KERNEL__
+                               __u64 flags;
+#endif
+                               __u64 data[16];
+                       };
                } system_event;
                /* KVM_EXIT_S390_STSI */
                struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_MEM_OP_EXTENSION 211
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index c998860d7bbc4351c37c702ea69ea88a814b19cf..5d99e7c242a25e11dc579e059564aec4989b3387 100644 (file)
 /* Get the valid iova range */
 #define VHOST_VDPA_GET_IOVA_RANGE      _IOR(VHOST_VIRTIO, 0x78, \
                                             struct vhost_vdpa_iova_range)
+
+/* Get the config size */
+#define VHOST_VDPA_GET_CONFIG_SIZE     _IOR(VHOST_VIRTIO, 0x79, __u32)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT       _IOR(VHOST_VIRTIO, 0x80, __u32)
+
 #endif
index 1b15ba13c477865b1c5e73edc602e84a859e778c..a09315538a303b788dd3d7107717d6aa3ffc2237 100644 (file)
@@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 {
        struct perf_evsel *evsel;
        const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
-       const struct perf_thread_map *threads = evlist->threads;
 
        if (!ops || !ops->get || !ops->mmap)
                return -EINVAL;
@@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
        perf_evlist__for_each_entry(evlist, evsel) {
                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
                    evsel->sample_id == NULL &&
-                   perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+                   perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
                        return -ENOMEM;
        }
 
index 9edd402704c4f2870d448a1d3f4c3b9f15d7962d..dab38904206a0ba0fea7ccd10469443fec1f396c 100644 (file)
@@ -54,7 +54,8 @@ klitmus7 Compatibility Table
             -- 4.14  7.48 --
        4.15 -- 4.19  7.49 --
        4.20 -- 5.5   7.54 --
-       5.6  --       7.56 --
+       5.6  -- 5.16  7.56 --
+       5.17 --       7.56.1 --
        ============  ==========
 
 
index 6de5085e3e5a948454d1b962c03137450dce5a6f..ca5b746030089c35c9c55788c75315de71aefb08 100644 (file)
@@ -184,6 +184,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
                "do_group_exit",
                "stop_this_cpu",
                "__invalid_creds",
+               "cpu_startup_entry",
        };
 
        if (!func)
@@ -559,12 +560,12 @@ static int add_dead_ends(struct objtool_file *file)
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
-                               WARN("can't find unreachable insn at %s+0x%x",
+                               WARN("can't find unreachable insn at %s+0x%lx",
                                     reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
-                       WARN("can't find unreachable insn at %s+0x%x",
+                       WARN("can't find unreachable insn at %s+0x%lx",
                             reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
@@ -594,12 +595,12 @@ reachable:
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
-                               WARN("can't find reachable insn at %s+0x%x",
+                               WARN("can't find reachable insn at %s+0x%lx",
                                     reloc->sym->sec->name, reloc->addend);
                                return -1;
                        }
                } else {
-                       WARN("can't find reachable insn at %s+0x%x",
+                       WARN("can't find reachable insn at %s+0x%lx",
                             reloc->sym->sec->name, reloc->addend);
                        return -1;
                }
@@ -1155,6 +1156,17 @@ static void annotate_call_site(struct objtool_file *file,
                                       : arch_nop_insn(insn->len));
 
                insn->type = sibling ? INSN_RETURN : INSN_NOP;
+
+               if (sibling) {
+                       /*
+                        * We've replaced the tail-call JMP insn by two new
+                        * insn: RET; INT3, except we only have a single struct
+                        * insn here. Mark it retpoline_safe to avoid the SLS
+                        * warning, instead of adding another insn.
+                        */
+                       insn->retpoline_safe = true;
+               }
+
                return;
        }
 
@@ -1239,11 +1251,20 @@ static bool same_function(struct instruction *insn1, struct instruction *insn2)
        return insn1->func->pfunc == insn2->func->pfunc;
 }
 
-static bool is_first_func_insn(struct instruction *insn)
+static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn)
 {
-       return insn->offset == insn->func->offset ||
-              (insn->type == INSN_ENDBR &&
-               insn->offset == insn->func->offset + insn->len);
+       if (insn->offset == insn->func->offset)
+               return true;
+
+       if (ibt) {
+               struct instruction *prev = prev_insn_same_sym(file, insn);
+
+               if (prev && prev->type == INSN_ENDBR &&
+                   insn->offset == insn->func->offset + prev->len)
+                       return true;
+       }
+
+       return false;
 }
 
 /*
@@ -1251,12 +1272,19 @@ static bool is_first_func_insn(struct instruction *insn)
  */
 static int add_jump_destinations(struct objtool_file *file)
 {
-       struct instruction *insn;
+       struct instruction *insn, *jump_dest;
        struct reloc *reloc;
        struct section *dest_sec;
        unsigned long dest_off;
 
        for_each_insn(file, insn) {
+               if (insn->jump_dest) {
+                       /*
+                        * handle_group_alt() may have previously set
+                        * 'jump_dest' for some alternatives.
+                        */
+                       continue;
+               }
                if (!is_static_jump(insn))
                        continue;
 
@@ -1271,7 +1299,10 @@ static int add_jump_destinations(struct objtool_file *file)
                        add_retpoline_call(file, insn);
                        continue;
                } else if (insn->func) {
-                       /* internal or external sibling call (with reloc) */
+                       /*
+                        * External sibling call or internal sibling call with
+                        * STT_FUNC reloc.
+                        */
                        add_call_dest(file, insn, reloc->sym, true);
                        continue;
                } else if (reloc->sym->sec->idx) {
@@ -1283,17 +1314,8 @@ static int add_jump_destinations(struct objtool_file *file)
                        continue;
                }
 
-               insn->jump_dest = find_insn(file, dest_sec, dest_off);
-               if (!insn->jump_dest) {
-
-                       /*
-                        * This is a special case where an alt instruction
-                        * jumps past the end of the section.  These are
-                        * handled later in handle_group_alt().
-                        */
-                       if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-                               continue;
-
+               jump_dest = find_insn(file, dest_sec, dest_off);
+               if (!jump_dest) {
                        WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
                                  insn->sec, insn->offset, dest_sec->name,
                                  dest_off);
@@ -1303,8 +1325,8 @@ static int add_jump_destinations(struct objtool_file *file)
                /*
                 * Cross-function jump.
                 */
-               if (insn->func && insn->jump_dest->func &&
-                   insn->func != insn->jump_dest->func) {
+               if (insn->func && jump_dest->func &&
+                   insn->func != jump_dest->func) {
 
                        /*
                         * For GCC 8+, create parent/child links for any cold
@@ -1322,16 +1344,22 @@ static int add_jump_destinations(struct objtool_file *file)
                         * subfunction is through a jump table.
                         */
                        if (!strstr(insn->func->name, ".cold") &&
-                           strstr(insn->jump_dest->func->name, ".cold")) {
-                               insn->func->cfunc = insn->jump_dest->func;
-                               insn->jump_dest->func->pfunc = insn->func;
+                           strstr(jump_dest->func->name, ".cold")) {
+                               insn->func->cfunc = jump_dest->func;
+                               jump_dest->func->pfunc = insn->func;
 
-                       } else if (!same_function(insn, insn->jump_dest) &&
-                                  is_first_func_insn(insn->jump_dest)) {
-                               /* internal sibling call (without reloc) */
-                               add_call_dest(file, insn, insn->jump_dest->func, true);
+                       } else if (!same_function(insn, jump_dest) &&
+                                  is_first_func_insn(file, jump_dest)) {
+                               /*
+                                * Internal sibling call without reloc or with
+                                * STT_SECTION reloc.
+                                */
+                               add_call_dest(file, insn, jump_dest->func, true);
+                               continue;
                        }
                }
+
+               insn->jump_dest = jump_dest;
        }
 
        return 0;
@@ -1520,13 +1548,13 @@ static int handle_group_alt(struct objtool_file *file,
                        continue;
 
                dest_off = arch_jump_destination(insn);
-               if (dest_off == special_alt->new_off + special_alt->new_len)
+               if (dest_off == special_alt->new_off + special_alt->new_len) {
                        insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
-
-               if (!insn->jump_dest) {
-                       WARN_FUNC("can't find alternative jump destination",
-                                 insn->sec, insn->offset);
-                       return -1;
+                       if (!insn->jump_dest) {
+                               WARN_FUNC("can't find alternative jump destination",
+                                         insn->sec, insn->offset);
+                               return -1;
+                       }
                }
        }
 
@@ -2225,14 +2253,14 @@ static int decode_sections(struct objtool_file *file)
                return ret;
 
        /*
-        * Must be before add_special_section_alts() as that depends on
-        * jump_dest being set.
+        * Must be before add_jump_destinations(), which depends on 'func'
+        * being set for alternatives, to enable proper sibling call detection.
         */
-       ret = add_jump_destinations(file);
+       ret = add_special_section_alts(file);
        if (ret)
                return ret;
 
-       ret = add_special_section_alts(file);
+       ret = add_jump_destinations(file);
        if (ret)
                return ret;
 
@@ -3190,9 +3218,8 @@ validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
 static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
                         struct instruction *dest)
 {
-       WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg,
-                 dest->func ? dest->func->name : dest->sec->name,
-                 dest->func ? dest->offset - dest->func->offset : dest->offset);
+       WARN_FUNC("%srelocation to !ENDBR: %s", sec, offset, msg,
+                 offstr(dest->sec, dest->offset));
 }
 
 static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn,
@@ -3283,7 +3310,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
        while (1) {
                next_insn = next_insn_to_validate(file, insn);
 
-               if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+               if (func && insn->func && func != insn->func->pfunc) {
                        WARN("%s() falls through to next function %s()",
                             func->name, insn->func->name);
                        return 1;
@@ -3796,11 +3823,8 @@ static int validate_ibt(struct objtool_file *file)
                        struct instruction *dest;
 
                        dest = validate_ibt_reloc(file, reloc);
-                       if (is_data && dest && !dest->noendbr) {
-                               warn_noendbr("data ", reloc->sym->sec,
-                                            reloc->sym->offset + reloc->addend,
-                                            dest);
-                       }
+                       if (is_data && dest && !dest->noendbr)
+                               warn_noendbr("data ", sec, reloc->offset, dest);
                }
        }
 
index d7b99a737496bdda1458bf56a5ae074d8407d802..ebf2ba5755c1e1a115dbf06f09139a73ae4e8f8f 100644 (file)
@@ -546,7 +546,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
                                                int reltype);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
-                 unsigned int type, struct symbol *sym, int addend)
+                 unsigned int type, struct symbol *sym, long addend)
 {
        struct reloc *reloc;
 
@@ -575,37 +575,180 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
        return 0;
 }
 
-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
-                         unsigned long offset, unsigned int type,
-                         struct section *insn_sec, unsigned long insn_off)
+/*
+ * Ensure that any reloc section containing references to @sym is marked
+ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
+ * with the new symbol index.
+ */
+static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
+{
+       struct section *sec;
+
+       list_for_each_entry(sec, &elf->sections, list) {
+               struct reloc *reloc;
+
+               if (sec->changed)
+                       continue;
+
+               list_for_each_entry(reloc, &sec->reloc_list, list) {
+                       if (reloc->sym == sym) {
+                               sec->changed = true;
+                               break;
+                       }
+               }
+       }
+}
+
+/*
+ * Move the first global symbol, as per sh_info, into a new, higher symbol
+ * index. This fees up the shndx for a new local symbol.
+ */
+static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
+                                 struct section *symtab_shndx)
 {
+       Elf_Data *data, *shndx_data = NULL;
+       Elf32_Word first_non_local;
        struct symbol *sym;
-       int addend;
+       Elf_Scn *s;
 
-       if (insn_sec->sym) {
-               sym = insn_sec->sym;
-               addend = insn_off;
+       first_non_local = symtab->sh.sh_info;
 
-       } else {
-               /*
-                * The Clang assembler strips section symbols, so we have to
-                * reference the function symbol instead:
-                */
-               sym = find_symbol_containing(insn_sec, insn_off);
-               if (!sym) {
-                       /*
-                        * Hack alert.  This happens when we need to reference
-                        * the NOP pad insn immediately after the function.
-                        */
-                       sym = find_symbol_containing(insn_sec, insn_off - 1);
+       sym = find_symbol_by_index(elf, first_non_local);
+       if (!sym) {
+               WARN("no non-local symbols !?");
+               return first_non_local;
+       }
+
+       s = elf_getscn(elf->elf, symtab->idx);
+       if (!s) {
+               WARN_ELF("elf_getscn");
+               return -1;
+       }
+
+       data = elf_newdata(s);
+       if (!data) {
+               WARN_ELF("elf_newdata");
+               return -1;
+       }
+
+       data->d_buf = &sym->sym;
+       data->d_size = sizeof(sym->sym);
+       data->d_align = 1;
+       data->d_type = ELF_T_SYM;
+
+       sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+       elf_dirty_reloc_sym(elf, sym);
+
+       symtab->sh.sh_info += 1;
+       symtab->sh.sh_size += data->d_size;
+       symtab->changed = true;
+
+       if (symtab_shndx) {
+               s = elf_getscn(elf->elf, symtab_shndx->idx);
+               if (!s) {
+                       WARN_ELF("elf_getscn");
+                       return -1;
                }
 
-               if (!sym) {
-                       WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+               shndx_data = elf_newdata(s);
+               if (!shndx_data) {
+                       WARN_ELF("elf_newshndx_data");
                        return -1;
                }
 
-               addend = insn_off - sym->offset;
+               shndx_data->d_buf = &sym->sec->idx;
+               shndx_data->d_size = sizeof(Elf32_Word);
+               shndx_data->d_align = 4;
+               shndx_data->d_type = ELF_T_WORD;
+
+               symtab_shndx->sh.sh_size += 4;
+               symtab_shndx->changed = true;
+       }
+
+       return first_non_local;
+}
+
+static struct symbol *
+elf_create_section_symbol(struct elf *elf, struct section *sec)
+{
+       struct section *symtab, *symtab_shndx;
+       Elf_Data *shndx_data = NULL;
+       struct symbol *sym;
+       Elf32_Word shndx;
+
+       symtab = find_section_by_name(elf, ".symtab");
+       if (symtab) {
+               symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+               if (symtab_shndx)
+                       shndx_data = symtab_shndx->data;
+       } else {
+               WARN("no .symtab");
+               return NULL;
+       }
+
+       sym = malloc(sizeof(*sym));
+       if (!sym) {
+               perror("malloc");
+               return NULL;
+       }
+       memset(sym, 0, sizeof(*sym));
+
+       sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
+       if (sym->idx < 0) {
+               WARN("elf_move_global_symbol");
+               return NULL;
+       }
+
+       sym->name = sec->name;
+       sym->sec = sec;
+
+       // st_name 0
+       sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
+       // st_other 0
+       // st_value 0
+       // st_size 0
+       shndx = sec->idx;
+       if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+               sym->sym.st_shndx = shndx;
+               if (!shndx_data)
+                       shndx = 0;
+       } else {
+               sym->sym.st_shndx = SHN_XINDEX;
+               if (!shndx_data) {
+                       WARN("no .symtab_shndx");
+                       return NULL;
+               }
+       }
+
+       if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
+               WARN_ELF("gelf_update_symshndx");
+               return NULL;
+       }
+
+       elf_add_symbol(elf, sym);
+
+       return sym;
+}
+
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+                         unsigned long offset, unsigned int type,
+                         struct section *insn_sec, unsigned long insn_off)
+{
+       struct symbol *sym = insn_sec->sym;
+       int addend = insn_off;
+
+       if (!sym) {
+               /*
+                * Due to how weak functions work, we must use section based
+                * relocations. Symbol based relocations would result in the
+                * weak and non-weak function annotations being overlaid on the
+                * non-weak function after linking.
+                */
+               sym = elf_create_section_symbol(elf, insn_sec);
+               if (!sym)
+                       return -1;
+
+               insn_sec->sym = sym;
        }
 
        return elf_add_reloc(elf, sec, offset, type, sym, addend);
index 22ba7e2b816e1388d6de0ed000534537c2e4b457..9b36802ed86f605606aaa674f19cb79121425a2e 100644 (file)
@@ -73,7 +73,7 @@ struct reloc {
        struct symbol *sym;
        unsigned long offset;
        unsigned int type;
-       int addend;
+       long addend;
        int idx;
        bool jump_table_start;
 };
@@ -135,7 +135,7 @@ struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
-                 unsigned int type, struct symbol *sym, int addend);
+                 unsigned int type, struct symbol *sym, long addend);
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
                          unsigned long offset, unsigned int type,
                          struct section *insn_sec, unsigned long insn_off);
index 7a5c13a78f87d612537f35e25112738403fb5e45..a6e72d916807d4e7e18f6c1b915a78afeab84cdc 100644 (file)
@@ -27,7 +27,7 @@ struct objtool_file {
        struct list_head static_call_list;
        struct list_head mcount_loc_list;
        struct list_head endbr_list;
-       bool ignore_unreachables, c_file, hints, rodata;
+       bool ignore_unreachables, hints, rodata;
 
        unsigned int nr_endbr;
        unsigned int nr_endbr_int;
index b09946f4e1d64279edc3224049b0b55eb22e3a69..843ff3c2f28e4aa6e08c09a8a527aa2ab5aafed7 100644 (file)
@@ -129,7 +129,6 @@ struct objtool_file *objtool_open_read(const char *_objname)
        INIT_LIST_HEAD(&file.static_call_list);
        INIT_LIST_HEAD(&file.mcount_loc_list);
        INIT_LIST_HEAD(&file.endbr_list);
-       file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
        file.ignore_unreachables = no_unreachable;
        file.hints = false;
 
index 9c330cdfa973abac7fb25f23f48cb1eeffee70b2..71ebdf8125de31b7c0c0be39c4d3762fa6722ff0 100644 (file)
@@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
 linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
 linkperf:perf-evlist[1], linkperf:perf-ftrace[1],
 linkperf:perf-help[1], linkperf:perf-inject[1],
-linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1],
+linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1],
 linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1],
 linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1],
 linkperf:perf-script[1], linkperf:perf-test[1],
index 96ad944ca6a885cdb1a6c2dd1cd63d33026c2c8b..1bd64e7404b9fbd473c45fafbf15236a3b229246 100644 (file)
@@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+  ifeq ($(CC_NO_CLANG), 0)
+    PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
+  endif
 endif
 
 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@@ -550,9 +553,16 @@ ifndef NO_LIBELF
         ifeq ($(feature-libbpf), 1)
           EXTLIBS += -lbpf
           $(call detected,CONFIG_LIBBPF_DYNAMIC)
+
+          $(call feature_check,libbpf-btf__load_from_kernel_by_id)
+          ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
+            CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+          endif
         else
           dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
         endif
+      else
+       CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
       endif
     endif
 
@@ -790,6 +800,9 @@ else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
     CFLAGS += -DHAVE_LIBPERL_SUPPORT
+    ifeq ($(CC_NO_CLANG), 0)
+      CFLAGS += -Wno-compound-token-split-by-macro
+    endif
     $(call detected,CONFIG_LIBPERL)
   endif
 endif
index 86e2e926aa0e16a3b078850b7dbd875261f73eb7..e8b577d33e531a9bf5d89faac10afb7f5114407d 100644 (file)
@@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
        bool privileged = perf_event_paranoid_check(-1);
        struct evsel *tracking_evsel;
        int err;
+       u64 bit;
 
        sper->evlist = evlist;
 
@@ -239,6 +240,21 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
                arm_spe_set_timestamp(itr, arm_spe_evsel);
        }
 
+       /*
+        * Set this only so that perf report knows that SPE generates memory info. It has no effect
+        * on the opening of the event or the SPE data produced.
+        */
+       evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
+
+       /*
+        * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
+        * inform that the resulting output's SPE samples contain physical addresses
+        * where applicable.
+        */
+       bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
+       if (arm_spe_evsel->core.attr.config & bit)
+               evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
+
        /* Add dummy event to keep tracking */
        err = parse_events(evlist, "dummy:u", NULL);
        if (err)
index d2ce31e28cd797763edd7aec0d7382f540dabe89..41c1596e520712a9ca690285146f1dcff6b40610 100644 (file)
@@ -8,27 +8,6 @@
 #include "callchain.h"
 #include "record.h"
 
-/* On arm64, kernel text segment starts at high memory address,
- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only small amount of
- * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may reach 2G.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-#define SYMBOL_LIMIT (1 << 12) /* 4K */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
-       if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
-                       (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
-               /* Limit range of last symbol in module and kernel */
-               p->end += SYMBOL_LIMIT;
-       else
-               p->end = c->start;
-       pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
-
 void arch__add_leaf_frame_record_opts(struct record_opts *opts)
 {
        opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
index 8a79c4126e5b4b9e3c29b7a2f5432836007e3bf9..0115f3166568415c0b15d53939008e799913bd80 100644 (file)
@@ -1,5 +1,4 @@
 perf-y += header.o
-perf-y += machine.o
 perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
deleted file mode 100644 (file)
index e652a1a..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <internal/lib.h> // page_size
-#include "debug.h"
-#include "symbol.h"
-
-/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
- * whereas the modules are located at very high memory addresses,
- * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very high.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
-       if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
-               /* Limit the range of last kernel symbol */
-               p->end += page_size;
-       else
-               p->end = c->start;
-       pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
index 7644a4f6d4a40384980c108eba52e6a9b3b2e209..98bc3f39d5f35e7a06df7a6b22e4af8d0564fb35 100644 (file)
@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
 
        return 0;
 }
-
-/* On s390 kernel text segment start is located at very low memory addresses,
- * for example 0x10000. Modules are located at very high memory addresses,
- * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very big.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
-       if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
-               /* Last kernel symbol mapped to end of page */
-               p->end = roundup(p->end, page_size);
-       else
-               p->end = c->start;
-       pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
index 207c56805c551dec7ed651bab32d39cef39f86ec..0ed177991ad05e42f07efbe25c285d56806a6d67 100644 (file)
@@ -9,6 +9,8 @@
 #include "../../../util/perf_regs.h"
 #include "../../../util/debug.h"
 #include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmu-hybrid.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG(AX, PERF_REG_X86_AX),
@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
                .disabled               = 1,
                .exclude_kernel         = 1,
        };
+       struct perf_pmu *pmu;
        int fd;
        /*
         * In an unnamed union, init it here to build on older gcc versions
         */
        attr.sample_period = 1;
 
+       if (perf_pmu__has_hybrid()) {
+               /*
+                * The same register set is supported among different hybrid PMUs.
+                * Only check the first available one.
+                */
+               pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
+               attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+       }
+
        event_attr_init(&attr);
 
        fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
index 134612bde0cb3c0c9f706e58dd24b5ce64ab6b74..4256dc5d6236d4aeef59b02cc2cbcfa3dec4c26b 100644 (file)
@@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct)
 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
 {
        pthread_attr_t thread_attr, *attrp = NULL;
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i, j;
        int ret = 0;
+       int nrcpus;
+       size_t size;
 
        if (!noaffinity)
                pthread_attr_init(&thread_attr);
 
+       nrcpus = perf_cpu_map__nr(cpu);
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        for (i = 0; i < nthreads; i++) {
                struct worker *w = &worker[i];
 
@@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
                        init_fdmaps(w, 50);
 
                if (!noaffinity) {
-                       CPU_ZERO(&cpuset);
-                       CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+                       CPU_ZERO_S(size, cpuset);
+                       CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+                                       size, cpuset);
 
-                       ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
-                       if (ret)
+                       ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+                       if (ret) {
+                               CPU_FREE(cpuset);
                                err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+                       }
 
                        attrp = &thread_attr;
                }
 
                ret = pthread_create(&w->thread, attrp, workerfn,
                                     (void *)(struct worker *) w);
-               if (ret)
+               if (ret) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
 
+       CPU_FREE(cpuset);
        if (!noaffinity)
                pthread_attr_destroy(&thread_attr);
 
index 37de970c97437ccb983c2154a250d3891c867546..2728b0140853fd6a3e89b90317e0944b0140df40 100644 (file)
@@ -291,9 +291,11 @@ static void print_summary(void)
 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
 {
        pthread_attr_t thread_attr, *attrp = NULL;
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i, j;
        int ret = 0, events = EPOLLIN;
+       int nrcpus;
+       size_t size;
 
        if (oneshot)
                events |= EPOLLONESHOT;
@@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
        if (!noaffinity)
                pthread_attr_init(&thread_attr);
 
+       nrcpus = perf_cpu_map__nr(cpu);
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        for (i = 0; i < nthreads; i++) {
                struct worker *w = &worker[i];
 
@@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
                }
 
                if (!noaffinity) {
-                       CPU_ZERO(&cpuset);
-                       CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+                       CPU_ZERO_S(size, cpuset);
+                       CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
+                                       size, cpuset);
 
-                       ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
-                       if (ret)
+                       ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+                       if (ret) {
+                               CPU_FREE(cpuset);
                                err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+                       }
 
                        attrp = &thread_attr;
                }
 
                ret = pthread_create(&w->thread, attrp, workerfn,
                                     (void *)(struct worker *) w);
-               if (ret)
+               if (ret) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
 
+       CPU_FREE(cpuset);
        if (!noaffinity)
                pthread_attr_destroy(&thread_attr);
 
index dbcecec4eedacec29b9f2c88654c7a0d8395fd51..f05db4cf983d6e0c8e32ab7920a6a0865002c870 100644 (file)
@@ -122,12 +122,14 @@ static void print_summary(void)
 int bench_futex_hash(int argc, const char **argv)
 {
        int ret = 0;
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        struct sigaction act;
        unsigned int i;
        pthread_attr_t thread_attr;
        struct worker *worker = NULL;
        struct perf_cpu_map *cpu;
+       int nrcpus;
+       size_t size;
 
        argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
        if (argc) {
@@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
        threads_starting = params.nthreads;
        pthread_attr_init(&thread_attr);
        gettimeofday(&bench__start, NULL);
+
+       nrcpus = perf_cpu_map__nr(cpu);
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        for (i = 0; i < params.nthreads; i++) {
                worker[i].tid = i;
                worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
                if (!worker[i].futex)
                        goto errmem;
 
-               CPU_ZERO(&cpuset);
-               CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+               CPU_ZERO_S(size, cpuset);
 
-               ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
-               if (ret)
+               CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
+               ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
+               if (ret) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
-
+               }
                ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
                                     (void *)(struct worker *) &worker[i]);
-               if (ret)
+               if (ret) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
 
        }
+       CPU_FREE(cpuset);
        pthread_attr_destroy(&thread_attr);
 
        pthread_mutex_lock(&thread_lock);
index 6fc9a3d55c1f768da88b33f2a88e23f4bfab84d9..0abb3f7ee24f78ece65b33c69e2f95461f250d35 100644 (file)
@@ -120,11 +120,17 @@ static void *workerfn(void *arg)
 static void create_threads(struct worker *w, pthread_attr_t thread_attr,
                           struct perf_cpu_map *cpu)
 {
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i;
+       int nrcpus =  perf_cpu_map__nr(cpu);
+       size_t size;
 
        threads_starting = params.nthreads;
 
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        for (i = 0; i < params.nthreads; i++) {
                worker[i].tid = i;
 
@@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
                } else
                        worker[i].futex = &global_futex;
 
-               CPU_ZERO(&cpuset);
-               CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+               CPU_ZERO_S(size, cpuset);
+               CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
 
-               if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+               if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+               }
 
-               if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+               if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
+       CPU_FREE(cpuset);
 }
 
 int bench_futex_lock_pi(int argc, const char **argv)
index 2f59d5d1c50968cf906de919e303be1c472aa370..b6faabfafb8eed33d6046c4d74c7c842f23aca17 100644 (file)
@@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused)
 static void block_threads(pthread_t *w,
                          pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
 {
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i;
+       int nrcpus = perf_cpu_map__nr(cpu);
+       size_t size;
 
        threads_starting = params.nthreads;
 
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        /* create and block all threads */
        for (i = 0; i < params.nthreads; i++) {
-               CPU_ZERO(&cpuset);
-               CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+               CPU_ZERO_S(size, cpuset);
+               CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
 
-               if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+               if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+               }
 
-               if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+               if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
+       CPU_FREE(cpuset);
 }
 
 static void toggle_done(int sig __maybe_unused,
index 861deb934745d279238b2a91c646bbb315274be8..e47f46a3a47e934db6aa875cfb16e6c1482241bf 100644 (file)
@@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
 static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
                          struct perf_cpu_map *cpu)
 {
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i;
+       int nrcpus = perf_cpu_map__nr(cpu);
+       size_t size;
 
        threads_starting = params.nthreads;
 
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        /* create and block all threads */
        for (i = 0; i < params.nthreads; i++) {
-               CPU_ZERO(&cpuset);
-               CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+               CPU_ZERO_S(size, cpuset);
+               CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
 
-               if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+               if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+               }
 
-               if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+               if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
+       CPU_FREE(cpuset);
 }
 
 static void print_run(struct thread_data *waking_worker, unsigned int run_num)
index cfda48bef1d72d954a81a71b44bb18a4c9f51efc..201a3555f09a2053fa2e30176faaae25e7003555 100644 (file)
@@ -97,22 +97,32 @@ static void print_summary(void)
 static void block_threads(pthread_t *w,
                          pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
 {
-       cpu_set_t cpuset;
+       cpu_set_t *cpuset;
        unsigned int i;
-
+       size_t size;
+       int nrcpus = perf_cpu_map__nr(cpu);
        threads_starting = params.nthreads;
 
+       cpuset = CPU_ALLOC(nrcpus);
+       BUG_ON(!cpuset);
+       size = CPU_ALLOC_SIZE(nrcpus);
+
        /* create and block all threads */
        for (i = 0; i < params.nthreads; i++) {
-               CPU_ZERO(&cpuset);
-               CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
+               CPU_ZERO_S(size, cpuset);
+               CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
 
-               if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+               if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+               }
 
-               if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+               if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
+                       CPU_FREE(cpuset);
                        err(EXIT_FAILURE, "pthread_create");
+               }
        }
+       CPU_FREE(cpuset);
 }
 
 static void toggle_done(int sig __maybe_unused,
index f2640179ada9edb617bbbd7ad4fab7691f224004..20eed1e53f8092836b35051d392d7bd98b737539 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/numa.h>
 #include <linux/zalloc.h>
 
+#include "../util/header.h"
 #include <numa.h>
 #include <numaif.h>
 
@@ -54,7 +55,7 @@
 
 struct thread_data {
        int                     curr_cpu;
-       cpu_set_t               bind_cpumask;
+       cpu_set_t               *bind_cpumask;
        int                     bind_node;
        u8                      *process_data;
        int                     process_nr;
@@ -266,71 +267,117 @@ static bool node_has_cpus(int node)
        return ret;
 }
 
-static cpu_set_t bind_to_cpu(int target_cpu)
+static cpu_set_t *bind_to_cpu(int target_cpu)
 {
-       cpu_set_t orig_mask, mask;
-       int ret;
+       int nrcpus = numa_num_possible_cpus();
+       cpu_set_t *orig_mask, *mask;
+       size_t size;
 
-       ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-       BUG_ON(ret);
+       orig_mask = CPU_ALLOC(nrcpus);
+       BUG_ON(!orig_mask);
+       size = CPU_ALLOC_SIZE(nrcpus);
+       CPU_ZERO_S(size, orig_mask);
+
+       if (sched_getaffinity(0, size, orig_mask))
+               goto err_out;
 
-       CPU_ZERO(&mask);
+       mask = CPU_ALLOC(nrcpus);
+       if (!mask)
+               goto err_out;
+
+       CPU_ZERO_S(size, mask);
 
        if (target_cpu == -1) {
                int cpu;
 
                for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-                       CPU_SET(cpu, &mask);
+                       CPU_SET_S(cpu, size, mask);
        } else {
-               BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
-               CPU_SET(target_cpu, &mask);
+               if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
+                       goto err;
+
+               CPU_SET_S(target_cpu, size, mask);
        }
 
-       ret = sched_setaffinity(0, sizeof(mask), &mask);
-       BUG_ON(ret);
+       if (sched_setaffinity(0, size, mask))
+               goto err;
 
        return orig_mask;
+
+err:
+       CPU_FREE(mask);
+err_out:
+       CPU_FREE(orig_mask);
+
+       /* BUG_ON due to failure in allocation of orig_mask/mask */
+       BUG_ON(-1);
+       return NULL;
 }
 
-static cpu_set_t bind_to_node(int target_node)
+static cpu_set_t *bind_to_node(int target_node)
 {
-       cpu_set_t orig_mask, mask;
+       int nrcpus = numa_num_possible_cpus();
+       size_t size;
+       cpu_set_t *orig_mask, *mask;
        int cpu;
-       int ret;
 
-       ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-       BUG_ON(ret);
+       orig_mask = CPU_ALLOC(nrcpus);
+       BUG_ON(!orig_mask);
+       size = CPU_ALLOC_SIZE(nrcpus);
+       CPU_ZERO_S(size, orig_mask);
 
-       CPU_ZERO(&mask);
+       if (sched_getaffinity(0, size, orig_mask))
+               goto err_out;
+
+       mask = CPU_ALLOC(nrcpus);
+       if (!mask)
+               goto err_out;
+
+       CPU_ZERO_S(size, mask);
 
        if (target_node == NUMA_NO_NODE) {
                for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-                       CPU_SET(cpu, &mask);
+                       CPU_SET_S(cpu, size, mask);
        } else {
                struct bitmask *cpumask = numa_allocate_cpumask();
 
-               BUG_ON(!cpumask);
+               if (!cpumask)
+                       goto err;
+
                if (!numa_node_to_cpus(target_node, cpumask)) {
                        for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
                                if (numa_bitmask_isbitset(cpumask, cpu))
-                                       CPU_SET(cpu, &mask);
+                                       CPU_SET_S(cpu, size, mask);
                        }
                }
                numa_free_cpumask(cpumask);
        }
 
-       ret = sched_setaffinity(0, sizeof(mask), &mask);
-       BUG_ON(ret);
+       if (sched_setaffinity(0, size, mask))
+               goto err;
 
        return orig_mask;
+
+err:
+       CPU_FREE(mask);
+err_out:
+       CPU_FREE(orig_mask);
+
+       /* BUG_ON due to failure in allocation of orig_mask/mask */
+       BUG_ON(-1);
+       return NULL;
 }
 
-static void bind_to_cpumask(cpu_set_t mask)
+static void bind_to_cpumask(cpu_set_t *mask)
 {
        int ret;
+       size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
 
-       ret = sched_setaffinity(0, sizeof(mask), &mask);
-       BUG_ON(ret);
+       ret = sched_setaffinity(0, size, mask);
+       if (ret) {
+               CPU_FREE(mask);
+               BUG_ON(ret);
+       }
 }
 
 static void mempol_restore(void)
@@ -376,7 +423,7 @@ do {                                                        \
 static u8 *alloc_data(ssize_t bytes0, int map_flags,
                      int init_zero, int init_cpu0, int thp, int init_random)
 {
-       cpu_set_t orig_mask;
+       cpu_set_t *orig_mask = NULL;
        ssize_t bytes;
        u8 *buf;
        int ret;
@@ -434,6 +481,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
        /* Restore affinity: */
        if (init_cpu0) {
                bind_to_cpumask(orig_mask);
+               CPU_FREE(orig_mask);
                mempol_restore();
        }
 
@@ -585,10 +633,16 @@ static int parse_setup_cpu_list(void)
                        return -1;
                }
 
+               if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
+                       printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
+                       return -1;
+               }
+
                BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
                BUG_ON(bind_cpu_0 > bind_cpu_1);
 
                for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+                       size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
                        int i;
 
                        for (i = 0; i < mul; i++) {
@@ -608,10 +662,15 @@ static int parse_setup_cpu_list(void)
                                        tprintf("%2d", bind_cpu);
                                }
 
-                               CPU_ZERO(&td->bind_cpumask);
+                               td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+                               BUG_ON(!td->bind_cpumask);
+                               CPU_ZERO_S(size, td->bind_cpumask);
                                for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
-                                       BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
-                                       CPU_SET(cpu, &td->bind_cpumask);
+                                       if (cpu < 0 || cpu >= g->p.nr_cpus) {
+                                               CPU_FREE(td->bind_cpumask);
+                                               BUG_ON(-1);
+                                       }
+                                       CPU_SET_S(cpu, size, td->bind_cpumask);
                                }
                                t++;
                        }
@@ -752,8 +811,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
        return parse_node_list(arg);
 }
 
-#define BIT(x) (1ul << x)
-
 static inline uint32_t lfsr_32(uint32_t lfsr)
 {
        const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
@@ -1241,7 +1298,7 @@ static void *worker_thread(void *__tdata)
                 * by migrating to CPU#0:
                 */
                if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
-                       cpu_set_t orig_mask;
+                       cpu_set_t *orig_mask;
                        int target_cpu;
                        int this_cpu;
 
@@ -1265,6 +1322,7 @@ static void *worker_thread(void *__tdata)
                                printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
 
                        bind_to_cpumask(orig_mask);
+                       CPU_FREE(orig_mask);
                }
 
                if (details >= 3) {
@@ -1398,21 +1456,31 @@ static void init_thread_data(void)
 
        for (t = 0; t < g->p.nr_tasks; t++) {
                struct thread_data *td = g->threads + t;
+               size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
                int cpu;
 
                /* Allow all nodes by default: */
                td->bind_node = NUMA_NO_NODE;
 
                /* Allow all CPUs by default: */
-               CPU_ZERO(&td->bind_cpumask);
+               td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+               BUG_ON(!td->bind_cpumask);
+               CPU_ZERO_S(cpuset_size, td->bind_cpumask);
                for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-                       CPU_SET(cpu, &td->bind_cpumask);
+                       CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
        }
 }
 
 static void deinit_thread_data(void)
 {
        ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+       int t;
+
+       /* Free the bind_cpumask allocated for thread_data */
+       for (t = 0; t < g->p.nr_tasks; t++) {
+               struct thread_data *td = g->threads + t;
+               CPU_FREE(td->bind_cpumask);
+       }
 
        free_data(g->threads, size);
 }
@@ -1672,7 +1740,7 @@ static int __bench_numa(const char *name)
                "GB/sec,", "total-speed",       "GB/sec total speed");
 
        if (g->p.show_details >= 2) {
-               char tname[14 + 2 * 10 + 1];
+               char tname[14 + 2 * 11 + 1];
                struct thread_data *td;
                for (p = 0; p < g->p.nr_proc; p++) {
                        for (t = 0; t < g->p.nr_threads; t++) {
index ba74fab02e6266e12448a7399426202cccaf5768..069825c48d404bc4dbe4901f0f45b8d97f683ff9 100644 (file)
@@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
        struct mmap *overwrite_mmap = evlist->overwrite_mmap;
        struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
 
-       thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
-                                             thread_data->mask->maps.nbits);
+       if (cpu_map__is_dummy(cpus))
+               thread_data->nr_mmaps = nr_mmaps;
+       else
+               thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
+                                                     thread_data->mask->maps.nbits);
        if (mmap) {
                thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
                if (!thread_data->maps)
@@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
                 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
 
        for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
-               if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+               if (cpu_map__is_dummy(cpus) ||
+                   test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
                        if (thread_data->maps) {
                                thread_data->maps[tm] = &mmap[m];
                                pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
-                                         thread_data, cpus->map[m].cpu, tm, m);
+                                         thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
                        }
                        if (thread_data->overwrite_maps) {
                                thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
                                pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
-                                         thread_data, cpus->map[m].cpu, tm, m);
+                                         thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
                        }
                        tm++;
                }
@@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
 {
        int c;
 
+       if (cpu_map__is_dummy(cpus))
+               return;
+
        for (c = 0; c < cpus->nr; c++)
                set_bit(cpus->map[c].cpu, mask->bits);
 }
@@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
        if (!record__threads_enabled(rec))
                return record__init_thread_default_masks(rec, cpus);
 
+       if (cpu_map__is_dummy(cpus)) {
+               pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
+               return -EINVAL;
+       }
+
        switch (rec->opts.threads_spec) {
        case THREAD_SPEC__CPU:
                ret = record__init_thread_cpu_masks(rec, cpus);
index 1ad75c7ba07408484eb116915c9814429fa2f02e..afe4a5539ecc70b71e681b38b7507aa25f03dd0a 100644 (file)
@@ -353,6 +353,7 @@ static int report__setup_sample_type(struct report *rep)
        struct perf_session *session = rep->session;
        u64 sample_type = evlist__combined_sample_type(session->evlist);
        bool is_pipe = perf_data__is_pipe(session->data);
+       struct evsel *evsel;
 
        if (session->itrace_synth_opts->callchain ||
            session->itrace_synth_opts->add_callchain ||
@@ -407,6 +408,19 @@ static int report__setup_sample_type(struct report *rep)
        }
 
        if (sort__mode == SORT_MODE__MEMORY) {
+               /*
+                * FIXUP: prior to kernel 5.18, Arm SPE missed to set
+                * PERF_SAMPLE_DATA_SRC bit in sample type.  For backward
+                * compatibility, set the bit if it's an old perf data file.
+                */
+               evlist__for_each_entry(session->evlist, evsel) {
+                       if (strstr(evsel->name, "arm_spe") &&
+                               !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+                               evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
+                               sample_type |= PERF_SAMPLE_DATA_SRC;
+                       }
+               }
+
                if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
                        ui__error("Selected --mem-mode but no mem data. "
                                  "Did you call perf record without -d?\n");
index a2f1179361886f91107fdadbd057b6c76e9edb3d..cf5eab5431b4c7f33ae788f905ab14591ad91e51 100644 (file)
@@ -461,7 +461,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
                return -EINVAL;
 
        if (PRINT_FIELD(DATA_SRC) &&
-           evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
+           evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set))
                return -EINVAL;
 
        if (PRINT_FIELD(WEIGHT) &&
index 2f6b67189b426c33b07f66f6cd6209743afc297b..0170cb0819d6ad530695f75b0af4439725dec69b 100644 (file)
@@ -55,6 +55,7 @@ struct cmd_struct {
 };
 
 static struct cmd_struct commands[] = {
+       { "archive",    NULL,   0 },
        { "buildid-cache", cmd_buildid_cache, 0 },
        { "buildid-list", cmd_buildid_list, 0 },
        { "config",     cmd_config,     0 },
@@ -62,6 +63,7 @@ static struct cmd_struct commands[] = {
        { "diff",       cmd_diff,       0 },
        { "evlist",     cmd_evlist,     0 },
        { "help",       cmd_help,       0 },
+       { "iostat",     NULL,   0 },
        { "kallsyms",   cmd_kallsyms,   0 },
        { "list",       cmd_list,       0 },
        { "record",     cmd_record,     0 },
@@ -360,6 +362,8 @@ static void handle_internal_command(int argc, const char **argv)
 
        for (i = 0; i < ARRAY_SIZE(commands); i++) {
                struct cmd_struct *p = commands+i;
+               if (p->fn == NULL)
+                       continue;
                if (strcmp(p->cmd, cmd))
                        continue;
                exit(run_builtin(p, argc, argv));
@@ -434,7 +438,7 @@ void pthread__unblock_sigwinch(void)
 static int libperf_print(enum libperf_print_level level,
                         const char *fmt, va_list ap)
 {
-       return eprintf(level, verbose, fmt, ap);
+       return veprintf(level, verbose, fmt, ap);
 }
 
 int main(int argc, const char **argv)
index 454505d343fa5489077ad31a6524fd733c841ff9..eb3f7d4bb32458bfc87d3742e068e96a9ff491b0 100644 (file)
@@ -60,6 +60,7 @@ Following tests are defined (with perf commands):
   perf record -R kill                           (test-record-raw)
   perf record -c 2 -e arm_spe_0// -- kill       (test-record-spe-period)
   perf record -e arm_spe_0/period=3/ -- kill    (test-record-spe-period-term)
+  perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address)
   perf stat -e cycles kill                      (test-stat-basic)
   perf stat kill                                (test-stat-default)
   perf stat -d kill                             (test-stat-detailed-1)
diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/attr/test-record-spe-physical-address
new file mode 100644 (file)
index 0000000..7ebcf50
--- /dev/null
@@ -0,0 +1,12 @@
+[config]
+command = record
+args    = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1
+ret     = 1
+arch    = aarch64
+
+[event-10:base-record-spe]
+# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR
+sample_type=622727
+
+# dummy event
+[event-1:base-record-spe]
\ No newline at end of file
index 57b9591f7cbb422d3afc32e4336931e4c202b10a..17c023823713d4b96ef8404083ce02b794dffe9c 100644 (file)
@@ -222,11 +222,11 @@ static int __test__bpf(int idx)
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
                                       bpf_testcase_table[idx].prog_id,
-                                      true, NULL);
+                                      false, NULL);
        if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
                pr_debug("Unable to get BPF object, %s\n",
                         bpf_testcase_table[idx].msg_compile_fail);
-               if (idx == 0)
+               if ((idx == 0) || (ret == TEST_SKIP))
                        return TEST_SKIP;
                else
                        return TEST_FAIL;
@@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
 static struct test_case bpf_tests[] = {
 #ifdef HAVE_LIBBPF_SUPPORT
        TEST_CASE("Basic BPF filtering", basic_bpf_test),
-       TEST_CASE("BPF pinning", bpf_pinning),
+       TEST_CASE_REASON("BPF pinning", bpf_pinning,
+                       "clang isn't installed or environment missing BPF support"),
 #ifdef HAVE_BPF_PROLOGUE
-       TEST_CASE("BPF prologue generation", bpf_prologue_test),
+       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
+                       "clang isn't installed or environment missing BPF support"),
 #else
        TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
 #endif
index fac3717d9ba1bb9902fd2372477b175aa4646bea..d336cda94a115fdbdeb15fd71437755dcb75a59b 100644 (file)
@@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size,
 {
        FILE *fp;
        char filename[PATH_MAX];
+       int ch;
 
        path__join(filename, sizeof(filename), path, name);
        fp = fopen(filename, "r");
@@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size,
                return NULL;
 
        /* Skip shebang */
-       while (fgetc(fp) != '\n');
+       do {
+               ch = fgetc(fp);
+       } while (ch != EOF && ch != '\n');
 
        description = fgets(description, size, fp);
        fclose(fp);
@@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
                        .priv = &st,
                };
 
-               if (!perf_test__matches(test_suite.desc, curr, argc, argv))
+               if (test_suite.desc == NULL ||
+                   !perf_test__matches(test_suite.desc, curr, argc, argv))
                        continue;
 
                st.file = ent->d_name;
index 2dab2d2620608b5b5bf458d76144aa72253321d5..afdca7f2959f07d87716c26508178bb6647bd9cf 100644 (file)
@@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr
        }
 
        err = unwind__get_entries(unwind_entry, &cnt, thread,
-                                 &sample, MAX_STACK);
+                                 &sample, MAX_STACK, false);
        if (err)
                pr_debug("unwind failed\n");
        else if (cnt != MAX_STACK) {
index d12d0ad8180107f721426b6e437c34f4cb944de8..4ad0dfbc8b21fda12f5c6468b8fdd1095efa73f3 100644 (file)
        }                                       \
 }
 
+static int test__tsc_is_supported(struct test_suite *test __maybe_unused,
+                                 int subtest __maybe_unused)
+{
+       if (!TSC_IS_SUPPORTED) {
+               pr_debug("Test not supported on this architecture\n");
+               return TEST_SKIP;
+       }
+
+       return TEST_OK;
+}
+
 /**
  * test__perf_time_to_tsc - test converting perf time to TSC.
  *
@@ -70,7 +81,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
        struct perf_cpu_map *cpus = NULL;
        struct evlist *evlist = NULL;
        struct evsel *evsel = NULL;
-       int err = -1, ret, i;
+       int err = TEST_FAIL, ret, i;
        const char *comm1, *comm2;
        struct perf_tsc_conversion tc;
        struct perf_event_mmap_page *pc;
@@ -79,10 +90,6 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
        u64 test_time, comm1_time = 0, comm2_time = 0;
        struct mmap *md;
 
-       if (!TSC_IS_SUPPORTED) {
-               pr_debug("Test not supported on this architecture");
-               return TEST_SKIP;
-       }
 
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        CHECK_NOT_NULL__(threads);
@@ -116,6 +123,10 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
                evsel->core.attr.enable_on_exec = 0;
        }
 
+       if (evlist__open(evlist) == -ENOENT) {
+               err = TEST_SKIP;
+               goto out_err;
+       }
        CHECK__(evlist__open(evlist));
 
        CHECK__(evlist__mmap(evlist, UINT_MAX));
@@ -124,8 +135,8 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
        ret = perf_read_tsc_conversion(pc, &tc);
        if (ret) {
                if (ret == -EOPNOTSUPP) {
-                       fprintf(stderr, " (not supported)");
-                       return 0;
+                       pr_debug("perf_read_tsc_conversion is not supported in current kernel\n");
+                       err = TEST_SKIP;
                }
                goto out_err;
        }
@@ -191,7 +202,7 @@ next_event:
            test_tsc >= comm2_tsc)
                goto out_err;
 
-       err = 0;
+       err = TEST_OK;
 
 out_err:
        evlist__delete(evlist);
@@ -200,4 +211,15 @@ out_err:
        return err;
 }
 
-DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc);
+static struct test_case time_to_tsc_tests[] = {
+       TEST_CASE_REASON("TSC support", tsc_is_supported,
+                        "This architecture does not support"),
+       TEST_CASE_REASON("Perf time to TSC", perf_time_to_tsc,
+                        "perf_read_tsc_conversion is not supported"),
+       { .name = NULL, }
+};
+
+struct test_suite suite__perf_time_to_tsc = {
+       .desc = "Convert perf time to TSC",
+       .test_cases = time_to_tsc_tests,
+};
index b30dba455f36c665227b2d90c26112f48fdb5974..9c9ef33e0b3c609e39be0449ba823f6096105e8a 100755 (executable)
@@ -5,6 +5,16 @@
 set -e
 
 for p in $(perf list --raw-dump pmu); do
+  # In powerpc, skip the events for hv_24x7 and hv_gpci.
+  # These events needs input values to be filled in for
+  # core, chip, partition id based on system.
+  # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/
+  # hv_gpci/event,partition_id=?/
+  # Hence skip these events for ppc.
+  if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then
+    echo "Skipping: Event '$p' in powerpc"
+    continue
+  fi
   echo "Testing $p"
   result=$(perf stat -e "$p" true 2>&1)
   if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
index 6de53b7ef5ffd9a4ba8cb555b9ee8976a95a6017..e4cb4f1806ffa6b4b99e336abe08441e78ef7797 100755 (executable)
@@ -29,7 +29,6 @@ cleanup_files()
        rm -f ${file}
        rm -f "${perfdata}.old"
        trap - exit term int
-       kill -2 $$
        exit $glb_err
 }
 
index ee1e3dcbc0bdb185f0ce98bf20210eddee58f339..d23a9e322ff52868f3a5dbe70b04770c9a4b6c47 100644 (file)
@@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
                        && strncmp(session->header.env.arch, "aarch64", 7))
                return TEST_SKIP;
 
+       /*
+        * In powerpc pSeries platform, not all the topology information
+        * are exposed via sysfs. Due to restriction, detail like
+        * physical_package_id will be set to -1. Hence skip this
+        * test if physical_package_id returns -1 for cpu from perf_cpu_map.
+        */
+       if (strncmp(session->header.env.arch, "powerpc", 7)) {
+               if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
+                       return TEST_SKIP;
+       }
+
        TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
 
        for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
index e4c641b240df4dcaeb52d8d822f9243a64b777f9..82cc396ef516c45c75163538502ecdcba94b7d74 100644 (file)
@@ -2047,6 +2047,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
        objdump_process.argv = objdump_argv;
        objdump_process.out = -1;
        objdump_process.err = -1;
+       objdump_process.no_stderr = 1;
        if (start_command(&objdump_process)) {
                pr_err("Failure starting to run %s\n", command);
                err = -1;
index d2b64e3f588b29563f1b7d30b0c5031933bacaa8..1a80151baed96733d078c31e1166da3f6d82a179 100644 (file)
@@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
        memset(&attr, 0, sizeof(struct perf_event_attr));
        attr.size = sizeof(struct perf_event_attr);
        attr.type = PERF_TYPE_HARDWARE;
-       attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+       attr.sample_type = evsel->core.attr.sample_type &
+                               (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
        attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
                            PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
-                           PERF_SAMPLE_WEIGHT;
+                           PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
        if (spe->timeless_decoding)
                attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
        else
index 2242a885fbd73387ffd347deba3bceb4ac74a95a..4940be4a0569cd441cee2c265433a2a06181c8fa 100644 (file)
@@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr
                sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
        }
 
-       ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
+       ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true);
        sample->user_regs = old_regs;
 
        if (ret || entries.length != 2)
index 94624733af7e225a91456d52420f51baf600ab94..8271ab764eb56c7bf5df40324c72d00a0b85430d 100644 (file)
@@ -22,7 +22,8 @@
 #include "record.h"
 #include "util/synthetic-events.h"
 
-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
 {
        struct btf *btf;
 #pragma GCC diagnostic push
@@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
 
        return err ? ERR_PTR(err) : btf;
 }
+#endif
 
 int __weak bpf_prog_load(enum bpf_prog_type prog_type,
                         const char *prog_name __maybe_unused,
index df7b18fb6b6e625dc6c33da85c08b76028b2635c..1aad7d6d34aaa639ceb9eaf260ef88c264179c12 100644 (file)
 #include "llvm/Option/Option.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
+#if CLANG_VERSION_MAJOR >= 14
+#include "llvm/MC/TargetRegistry.h"
+#else
 #include "llvm/Support/TargetRegistry.h"
+#endif
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
index d546ff724dbe250c78784758959c3bfe630597ee..a27132e5a5efee55a2936ecb14b0044a13d57c7b 100644 (file)
@@ -983,6 +983,57 @@ static int write_dir_format(struct feat_fd *ff,
        return do_write(ff, &data->dir.version, sizeof(data->dir.version));
 }
 
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *    -1 -> error case
+ */
+int is_cpu_online(unsigned int cpu)
+{
+       char *str;
+       size_t strlen;
+       char buf[256];
+       int status = -1;
+       struct stat statbuf;
+
+       snprintf(buf, sizeof(buf),
+               "/sys/devices/system/cpu/cpu%d", cpu);
+       if (stat(buf, &statbuf) != 0)
+               return 0;
+
+       /*
+        * Check if /sys/devices/system/cpu/cpux/online file
+        * exists. Some cases cpu0 won't have online file since
+        * it is not expected to be turned off generally.
+        * In kernels without CONFIG_HOTPLUG_CPU, this
+        * file won't exist
+        */
+       snprintf(buf, sizeof(buf),
+               "/sys/devices/system/cpu/cpu%d/online", cpu);
+       if (stat(buf, &statbuf) != 0)
+               return 1;
+
+       /*
+        * Read online file using sysfs__read_str.
+        * If read or open fails, return -1.
+        * If read succeeds, return value from file
+        * which gets stored in "str"
+        */
+       snprintf(buf, sizeof(buf),
+               "devices/system/cpu/cpu%d/online", cpu);
+
+       if (sysfs__read_str(buf, &str, &strlen) < 0)
+               return status;
+
+       status = atoi(str);
+
+       free(str);
+       return status;
+}
+
 #ifdef HAVE_LIBBPF_SUPPORT
 static int write_bpf_prog_info(struct feat_fd *ff,
                               struct evlist *evlist __maybe_unused)
index c9e3265832d92c1d44a1833e4e2880d4a1bf8c60..0eb4bc29a5a46c2862f49ec4058712a644f3565b 100644 (file)
@@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size);
 int write_padded(struct feat_fd *fd, const void *bf,
                 size_t count, size_t count_aligned);
 
+int is_cpu_online(unsigned int cpu);
 /*
  * arch specific callback
  */
index b80048546451359291fc886b6322d4f8c35cb600..95391236f5f6a9103a233d0e8feffb30b49e995a 100644 (file)
@@ -2987,7 +2987,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
                return 0;
 
        return unwind__get_entries(unwind_entry, cursor,
-                                  thread, sample, max_stack);
+                                  thread, sample, max_stack, false);
 }
 
 int thread__resolve_callchain(struct thread *thread,
index 24997925ae00d66a95c58b2afdac3d54894661f3..dd84fed698a3bedd087a61ea9aa478ee15995fff 100644 (file)
@@ -1523,7 +1523,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
        bool use_uncore_alias;
        LIST_HEAD(config_terms);
 
-       if (verbose > 1) {
+       pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+       if (verbose > 1 && !(pmu && pmu->selectable)) {
                fprintf(stderr, "Attempting to add event pmu '%s' with '",
                        name);
                if (head_config) {
@@ -1536,7 +1538,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
                fprintf(stderr, "' that may result in non-fatal errors\n");
        }
 
-       pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
        if (!pmu) {
                char *err_str;
 
index 3b8dfe603e50b1a51f8b58a1e8c23716728aee48..a7f93f5a1ac81968fd4d2cc4eef37c0806b9493b 100644 (file)
@@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
        struct branch_entry *entries = perf_sample__branch_entries(sample);
        uint64_t i;
 
-       printf("%s: nr:%" PRIu64 "\n",
-               !callstack ? "... branch stack" : "... branch callstack",
-               sample->branch_stack->nr);
+       if (!callstack) {
+               printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+       } else {
+               /* the reason of adding 1 to nr is because after expanding
+                * branch stack it generates nr + 1 callstack records. e.g.,
+                *         B()->C()
+                *         A()->B()
+                * the final callstack should be:
+                *         C()
+                *         B()
+                *         A()
+                */
+               printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+       }
 
        for (i = 0; i < sample->branch_stack->nr; i++) {
                struct branch_entry *e = &entries[i];
@@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
                                (unsigned)e->flags.reserved,
                                e->flags.type ? branch_type_name(e->flags.type) : "");
                } else {
-                       printf("..... %2"PRIu64": %016" PRIx64 "\n",
-                               i, i > 0 ? e->from : e->to);
+                       if (i == 0) {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n"
+                                      "..... %2"PRIu64": %016" PRIx64 "\n",
+                                               i, e->to, i+1, e->from);
+                       } else {
+                               printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+                       }
                }
        }
 }
@@ -2095,6 +2111,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
               bool needs_swap, union perf_event *error)
 {
        union perf_event *event;
+       u16 event_size;
 
        /*
         * Ensure we have enough space remaining to read
@@ -2107,15 +2124,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
        if (needs_swap)
                perf_event_header__bswap(&event->header);
 
-       if (head + event->header.size <= mmap_size)
+       event_size = event->header.size;
+       if (head + event_size <= mmap_size)
                return event;
 
        /* We're not fetching the event so swap back again */
        if (needs_swap)
                perf_event_header__bswap(&event->header);
 
-       pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
-                " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
+       /* Check if the event fits into the next mmapped buf. */
+       if (event_size <= mmap_size - head % page_size) {
+               /* Remap buf and fetch again. */
+               return NULL;
+       }
+
+       /* Invalid input. Event size should never exceed mmap_size. */
+       pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+                " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
 
        return error;
 }
@@ -2567,7 +2592,7 @@ int perf_session__process_events(struct perf_session *session)
        if (perf_data__is_pipe(session->data))
                return __perf_session__process_pipe_events(session);
 
-       if (perf_data__is_dir(session->data))
+       if (perf_data__is_dir(session->data) && session->data->dir.nr)
                return __perf_session__process_dir_events(session);
 
        return __perf_session__process_events(session);
index 483f05004e682081be7b87c8dcb20c4e244d9c68..c255a2c90cd672b1ecfa36eaa0883045f34175f6 100644 (file)
@@ -1,12 +1,14 @@
-from os import getenv
+from os import getenv, path
 from subprocess import Popen, PIPE
 from re import sub
 
 cc = getenv("CC")
 cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests  = getenv('srctree') + '/tools/build/feature'
 
 def clang_has_option(option):
-    return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+    cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+    return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
 
 if cc_is_clang:
     from distutils.sysconfig import get_config_vars
@@ -23,6 +25,8 @@ if cc_is_clang:
             vars[var] = sub("-fstack-protector-strong", "", vars[var])
         if not clang_has_option("-fno-semantic-interposition"):
             vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+        if not clang_has_option("-ffat-lto-objects"):
+            vars[var] = sub("-ffat-lto-objects", "", vars[var])
 
 from distutils.core import setup, Extension
 
index ee6f0348121514220df185ec7e19c4afb548850e..c1af37e11f9898522d48ca5a79ce1faf19f7f7c6 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
+#include <linux/err.h>
 #include <inttypes.h>
 #include <math.h>
 #include <string.h>
@@ -311,7 +312,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
 
        if (!mask) {
                mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
-               if (!mask)
+               if (IS_ERR(mask))
                        return -ENOMEM;
 
                counter->per_pkg_mask = mask;
@@ -471,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 int perf_event__process_stat_event(struct perf_session *session,
                                   union perf_event *event)
 {
-       struct perf_counts_values count;
+       struct perf_counts_values count, *ptr;
        struct perf_record_stat *st = &event->stat;
        struct evsel *counter;
+       int cpu_map_idx;
 
        count.val = st->val;
        count.ena = st->ena;
@@ -484,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session,
                pr_err("Failed to resolve counter for stat event.\n");
                return -EINVAL;
        }
-
-       *perf_counts(counter->counts, st->cpu, st->thread) = count;
+       cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+       if (cpu_map_idx == -1) {
+               pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+               return -EINVAL;
+       }
+       ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+       if (ptr == NULL) {
+               pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+                       st->cpu, st->thread, evsel__name(counter));
+               return -EINVAL;
+       }
+       *ptr = count;
        counter->supported = true;
        return 0;
 }
index 31cd59a2b66e6cc2223ad6fb18bfe90d40e9d4b2..ecd377938eea8e5e5346c45e84f22f56aa607b96 100644 (file)
@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
         * For misannotated, zeroed, ASM function sizes.
         */
        if (nr > 0) {
-               symbols__fixup_end(&dso->symbols);
+               symbols__fixup_end(&dso->symbols, false);
                symbols__fixup_duplicate(&dso->symbols);
                if (kmap) {
                        /*
index dea0fc495185daac9db744e9832895171e3206b7..f72baf636724ce32a8533f9236f1a91cb138fb48 100644 (file)
@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
        return tail - str;
 }
 
-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
-       p->end = c->start;
-}
-
 const char * __weak arch__normalize_symbol_name(const char *name)
 {
        return name;
@@ -217,7 +212,8 @@ again:
        }
 }
 
-void symbols__fixup_end(struct rb_root_cached *symbols)
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
 {
        struct rb_node *nd, *prevnd = rb_first_cached(symbols);
        struct symbol *curr, *prev;
@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
                prev = curr;
                curr = rb_entry(nd, struct symbol, rb_node);
 
-               if (prev->end == prev->start || prev->end != curr->start)
-                       arch__symbols__fixup_end(prev, curr);
+               /*
+                * On some architecture kernel text segment start is located at
+                * some low memory address, while modules are located at high
+                * memory addresses (or vice versa).  The gap between end of
+                * kernel text segment and beginning of first module's text
+                * segment is very big.  Therefore do not fill this gap and do
+                * not assign it to the kernel dso map (kallsyms).
+                *
+                * In kallsyms, it determines module symbols using '[' character
+                * like in:
+                *   ffffffffc1937000 T hdmi_driver_init  [snd_hda_codec_hdmi]
+                */
+               if (prev->end == prev->start) {
+                       /* Last kernel/module symbol mapped to end of page */
+                       if (is_kallsyms && (!strchr(prev->name, '[') !=
+                                           !strchr(curr->name, '[')))
+                               prev->end = roundup(prev->end + 4096, 4096);
+                       else
+                               prev->end = curr->start;
+
+                       pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+                                 __func__, prev->name, prev->end);
+               }
        }
 
        /* Last entry */
@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
        if (kallsyms__delta(kmap, filename, &delta))
                return -1;
 
-       symbols__fixup_end(&dso->symbols);
+       symbols__fixup_end(&dso->symbols, true);
        symbols__fixup_duplicate(&dso->symbols);
 
        if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
 #undef bfd_asymbol_section
 #endif
 
-       symbols__fixup_end(&dso->symbols);
+       symbols__fixup_end(&dso->symbols, false);
        symbols__fixup_duplicate(&dso->symbols);
        dso->adjust_symbols = 1;
 
index fbf866d82dccdd0658345cb320ed9b59b45e363c..0b893dcc8ea6807262642e7aac5661df60238c3f 100644 (file)
@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
                       bool kernel);
 void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root_cached *symbols);
-void symbols__fixup_end(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
 void maps__fixup_end(struct maps *maps);
 
 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
 #define SYMBOL_A 0
 #define SYMBOL_B 1
 
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
 int arch__compare_symbol_names(const char *namea, const char *nameb);
 int arch__compare_symbol_names_n(const char *namea, const char *nameb,
                                 unsigned int n);
index a74b517f74974dc9d1ecef4c1306915ca6c91850..94aa40f6e3482fc13fe13c70be640a19004f8fdd 100644 (file)
@@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
        bool isactivation;
 
        if (!dwfl_frame_pc(state, &pc, NULL)) {
-               pr_err("%s", dwfl_errmsg(-1));
+               if (!ui->best_effort)
+                       pr_err("%s", dwfl_errmsg(-1));
                return DWARF_CB_ABORT;
        }
 
@@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
        report_module(pc, ui);
 
        if (!dwfl_frame_pc(state, &pc, &isactivation)) {
-               pr_err("%s", dwfl_errmsg(-1));
+               if (!ui->best_effort)
+                       pr_err("%s", dwfl_errmsg(-1));
                return DWARF_CB_ABORT;
        }
 
@@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg)
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                        struct thread *thread,
                        struct perf_sample *data,
-                       int max_stack)
+                       int max_stack,
+                       bool best_effort)
 {
        struct unwind_info *ui, ui_buf = {
                .sample         = data,
@@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                .cb             = cb,
                .arg            = arg,
                .max_stack      = max_stack,
+               .best_effort    = best_effort
        };
        Dwarf_Word ip;
        int err = -EINVAL, i;
index 0cbd2650e280e52635f7e0d9058467d633239c5f..8c88bc4f2304b59561ffd482469ca63ce0f531f0 100644 (file)
@@ -20,6 +20,7 @@ struct unwind_info {
        void                    *arg;
        int                     max_stack;
        int                     idx;
+       bool                    best_effort;
        struct unwind_entry     entries[];
 };
 
index 71a3533491815749bd1ec84e659d659cba09957c..41e29fc7648ae9f79fe258d731b5cd1172c61cd6 100644 (file)
@@ -96,6 +96,7 @@ struct unwind_info {
        struct perf_sample      *sample;
        struct machine          *machine;
        struct thread           *thread;
+       bool                     best_effort;
 };
 
 #define dw_read(ptr, type, end) ({     \
@@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as,
 
        ret = perf_reg_value(&val, &ui->sample->user_regs, id);
        if (ret) {
-               pr_err("unwind: can't read reg %d\n", regnum);
+               if (!ui->best_effort)
+                       pr_err("unwind: can't read reg %d\n", regnum);
                return ret;
        }
 
@@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
                        return -1;
 
                ret = unw_init_remote(&c, addr_space, ui);
-               if (ret)
+               if (ret && !ui->best_effort)
                        display_error(ret);
 
                while (!ret && (unw_step(&c) > 0) && i < max_stack) {
@@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
 
 static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                        struct thread *thread,
-                       struct perf_sample *data, int max_stack)
+                       struct perf_sample *data, int max_stack,
+                       bool best_effort)
 {
        struct unwind_info ui = {
                .sample       = data,
                .thread       = thread,
                .machine      = thread->maps->machine,
+               .best_effort  = best_effort
        };
 
        if (!data->user_regs.regs)
index e89a5479b361303e8b323182dd5509a18e5ead1b..509c287ee762808061821a9d2c52301e3b6955e4 100644 (file)
@@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps)
 
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                         struct thread *thread,
-                        struct perf_sample *data, int max_stack)
+                        struct perf_sample *data, int max_stack,
+                        bool best_effort)
 {
        if (thread->maps->unwind_libunwind_ops)
-               return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+               return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data,
+                                                                      max_stack, best_effort);
        return 0;
 }
index ab8ad469c8de5aea6c7bf437bfee156f1b0ed2ed..b2a03fa5289b37c2744aab105415e8febe250f0b 100644 (file)
@@ -23,13 +23,19 @@ struct unwind_libunwind_ops {
        void (*finish_access)(struct maps *maps);
        int (*get_entries)(unwind_entry_cb_t cb, void *arg,
                           struct thread *thread,
-                          struct perf_sample *data, int max_stack);
+                          struct perf_sample *data, int max_stack, bool best_effort);
 };
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
+/*
+ * When best_effort is set, don't report errors and fail silently. This could
+ * be expanded in the future to be more permissive about things other than
+ * error messages.
+ */
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                        struct thread *thread,
-                       struct perf_sample *data, int max_stack);
+                       struct perf_sample *data, int max_stack,
+                       bool best_effort);
 /* libunwind specific */
 #ifdef HAVE_LIBUNWIND_SUPPORT
 #ifndef LIBUNWIND__ARCH_REG_ID
@@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
                    void *arg __maybe_unused,
                    struct thread *thread __maybe_unused,
                    struct perf_sample *data __maybe_unused,
-                   int max_stack __maybe_unused)
+                   int max_stack __maybe_unused,
+                   bool best_effort __maybe_unused)
 {
        return 0;
 }
index 846f785e278d7f4c7071ee0ef93c9372796c3b9a..7221f2f55e8bff4572bac925cf4d2e73ebff9d4e 100644 (file)
@@ -42,7 +42,7 @@ ISST_IN := $(OUTPUT)intel-speed-select-in.o
 $(ISST_IN): prepare FORCE
        $(Q)$(MAKE) $(build)=intel-speed-select
 $(OUTPUT)intel-speed-select: $(ISST_IN)
-       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+       $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
 
 clean:
        rm -f $(ALL_PROGRAMS)
index 65dbdda3a0544652f7d266e2b2a51a7f6d0f5361..1da76ccde448f894a81fcc58a315a864d28577bc 100644 (file)
@@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t)
        return 0;
 }
 
-static void security_init(struct nfit_test *t)
+static void nfit_security_init(struct nfit_test *t)
 {
        int i;
 
@@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
        if (nfit_test_dimm_init(t))
                return -ENOMEM;
        smart_init(t);
-       security_init(t);
+       nfit_security_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
index 81539f5439546868dc2d8d9bfeed7374aa8ef8f3..d5c1bcba86fe00ffa379a6d5bb38095c084f3ce2 100644 (file)
@@ -25,7 +25,8 @@ struct kmem_cache {
        void (*ctor)(void *);
 };
 
-void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+               int gfp)
 {
        void *p;
 
index 5aa52cc31dc2e36f2bc2d5b439f96ace5e881188..c11832657d2bb44cf4d4d3fd81ffb5c167914b4e 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2021. Huawei Technologies Co., Ltd */
 #include <test_progs.h>
 #include "dummy_st_ops.skel.h"
+#include "trace_dummy_st_ops.skel.h"
 
 /* Need to keep consistent with definition in include/linux/bpf.h */
 struct bpf_dummy_ops_state {
@@ -56,6 +57,7 @@ static void test_dummy_init_ptr_arg(void)
                .ctx_in = args,
                .ctx_size_in = sizeof(args),
        );
+       struct trace_dummy_st_ops *trace_skel;
        struct dummy_st_ops *skel;
        int fd, err;
 
@@ -64,12 +66,33 @@ static void test_dummy_init_ptr_arg(void)
                return;
 
        fd = bpf_program__fd(skel->progs.test_1);
+
+       trace_skel = trace_dummy_st_ops__open();
+       if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open"))
+               goto done;
+
+       err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1,
+                                            fd, "test_1");
+       if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)"))
+               goto done;
+
+       err = trace_dummy_st_ops__load(trace_skel);
+       if (!ASSERT_OK(err, "load(trace_skel)"))
+               goto done;
+
+       err = trace_dummy_st_ops__attach(trace_skel);
+       if (!ASSERT_OK(err, "attach(trace_skel)"))
+               goto done;
+
        err = bpf_prog_test_run_opts(fd, &attr);
        ASSERT_OK(err, "test_run");
        ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
        ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+       ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
 
+done:
        dummy_st_ops__destroy(skel);
+       trace_dummy_st_ops__destroy(trace_skel);
 }
 
 static void test_dummy_multiple_args(void)
index b64df94ec4762ca7503c09c2cea1a63f474eb8a2..db388f593d0a2463cce35235b8aca82c50d89687 100644 (file)
@@ -367,7 +367,7 @@ static inline int check_array_of_maps(void)
 
        VERIFY(check_default(&array_of_maps->map, map));
        inner_map = bpf_map_lookup_elem(array_of_maps, &key);
-       VERIFY(inner_map != 0);
+       VERIFY(inner_map != NULL);
        VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
 
        return 1;
@@ -394,7 +394,7 @@ static inline int check_hash_of_maps(void)
 
        VERIFY(check_default(&hash_of_maps->map, map));
        inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
-       VERIFY(inner_map != 0);
+       VERIFY(inner_map != NULL);
        VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
 
        return 1;
diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
new file mode 100644 (file)
index 0000000..00a4be9
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int val = 0;
+
+SEC("fentry/test_1")
+int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx)
+{
+       __u64 state;
+
+       /* Read the traced st_ops arg1 which is a pointer */
+       bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx);
+       /* Read state->val */
+       bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index b9e991d431556ce84064e479bb05e35ac7eea4a7..e7775d3bbe0877377e8ba1a12d5dd8090107adbf 100644 (file)
@@ -18,8 +18,9 @@
 #include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
-static int start_server(const struct sockaddr *addr, socklen_t len)
+static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
 {
+       int mode = !dual;
        int fd;
 
        fd = socket(addr->sa_family, SOCK_STREAM, 0);
@@ -28,6 +29,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len)
                goto out;
        }
 
+       if (addr->sa_family == AF_INET6) {
+               if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
+                              sizeof(mode)) == -1) {
+                       log_err("Failed to set the dual-stack mode");
+                       goto close_out;
+               }
+       }
+
        if (bind(fd, addr, len) == -1) {
                log_err("Failed to bind server socket");
                goto close_out;
@@ -47,24 +56,17 @@ out:
        return fd;
 }
 
-static int connect_to_server(int server_fd)
+static int connect_to_server(const struct sockaddr *addr, socklen_t len)
 {
-       struct sockaddr_storage addr;
-       socklen_t len = sizeof(addr);
        int fd = -1;
 
-       if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
-               log_err("Failed to get server addr");
-               goto out;
-       }
-
-       fd = socket(addr.ss_family, SOCK_STREAM, 0);
+       fd = socket(addr->sa_family, SOCK_STREAM, 0);
        if (fd == -1) {
                log_err("Failed to create client socket");
                goto out;
        }
 
-       if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+       if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
                log_err("Fail to connect to server");
                goto close_out;
        }
@@ -116,7 +118,8 @@ err:
        return map_fd;
 }
 
-static int run_test(int server_fd, int results_fd, bool xdp)
+static int run_test(int server_fd, int results_fd, bool xdp,
+                   const struct sockaddr *addr, socklen_t len)
 {
        int client = -1, srv_client = -1;
        int ret = 0;
@@ -142,7 +145,7 @@ static int run_test(int server_fd, int results_fd, bool xdp)
                goto err;
        }
 
-       client = connect_to_server(server_fd);
+       client = connect_to_server(addr, len);
        if (client == -1)
                goto err;
 
@@ -199,12 +202,30 @@ out:
        return ret;
 }
 
+static bool get_port(int server_fd, in_port_t *port)
+{
+       struct sockaddr_in addr;
+       socklen_t len = sizeof(addr);
+
+       if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+               log_err("Failed to get server addr");
+               return false;
+       }
+
+       /* sin_port and sin6_port are located at the same offset. */
+       *port = addr.sin_port;
+       return true;
+}
+
 int main(int argc, char **argv)
 {
        struct sockaddr_in addr4;
        struct sockaddr_in6 addr6;
+       struct sockaddr_in addr4dual;
+       struct sockaddr_in6 addr6dual;
        int server = -1;
        int server_v6 = -1;
+       int server_dual = -1;
        int results = -1;
        int err = 0;
        bool xdp;
@@ -224,25 +245,43 @@ int main(int argc, char **argv)
        addr4.sin_family = AF_INET;
        addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
        addr4.sin_port = 0;
+       memcpy(&addr4dual, &addr4, sizeof(addr4dual));
 
        memset(&addr6, 0, sizeof(addr6));
        addr6.sin6_family = AF_INET6;
        addr6.sin6_addr = in6addr_loopback;
        addr6.sin6_port = 0;
 
-       server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
-       if (server == -1)
+       memset(&addr6dual, 0, sizeof(addr6dual));
+       addr6dual.sin6_family = AF_INET6;
+       addr6dual.sin6_addr = in6addr_any;
+       addr6dual.sin6_port = 0;
+
+       server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
+                             false);
+       if (server == -1 || !get_port(server, &addr4.sin_port))
                goto err;
 
        server_v6 = start_server((const struct sockaddr *)&addr6,
-                                sizeof(addr6));
-       if (server_v6 == -1)
+                                sizeof(addr6), false);
+       if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+               goto err;
+
+       server_dual = start_server((const struct sockaddr *)&addr6dual,
+                                  sizeof(addr6dual), true);
+       if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+               goto err;
+
+       if (run_test(server, results, xdp,
+                    (const struct sockaddr *)&addr4, sizeof(addr4)))
                goto err;
 
-       if (run_test(server, results, xdp))
+       if (run_test(server_v6, results, xdp,
+                    (const struct sockaddr *)&addr6, sizeof(addr6)))
                goto err;
 
-       if (run_test(server_v6, results, xdp))
+       if (run_test(server_dual, results, xdp,
+                    (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
                goto err;
 
        printf("ok\n");
@@ -252,6 +291,7 @@ err:
 out:
        close(server);
        close(server_v6);
+       close(server_dual);
        close(results);
        return err;
 }
index 429f7ee735cf4f1554ddd5bb2807aa60cebece41..fd23c80eba315a17d9d1ffdb38e6b0e9c2f70c91 100755 (executable)
@@ -159,6 +159,17 @@ flooding_remotes_add()
        local lsb
        local i
 
+       # Prevent unwanted packets from entering the bridge and interfering
+       # with the test.
+       tc qdisc add dev br0 clsact
+       tc filter add dev br0 egress protocol all pref 1 handle 1 \
+               matchall skip_hw action drop
+       tc qdisc add dev $h1 clsact
+       tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+               flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+       tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+               matchall skip_hw action drop
+
        for i in $(eval echo {1..$num_remotes}); do
                lsb=$((i + 1))
 
@@ -195,6 +206,12 @@ flooding_filters_del()
        done
 
        tc qdisc del dev $rp2 clsact
+
+       tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+       tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+       tc qdisc del dev $h1 clsact
+       tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+       tc qdisc del dev br0 clsact
 }
 
 flooding_check_packets()
index fedcb7b35af9f3f2f412ba6a1cadb56b3e35d71d..af5ea50ed5c0ecac41b22d844979f1df59ca106a 100755 (executable)
@@ -172,6 +172,17 @@ flooding_filters_add()
        local lsb
        local i
 
+       # Prevent unwanted packets from entering the bridge and interfering
+       # with the test.
+       tc qdisc add dev br0 clsact
+       tc filter add dev br0 egress protocol all pref 1 handle 1 \
+               matchall skip_hw action drop
+       tc qdisc add dev $h1 clsact
+       tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+               flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+       tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+               matchall skip_hw action drop
+
        tc qdisc add dev $rp2 clsact
 
        for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
        done
 
        tc qdisc del dev $rp2 clsact
+
+       tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+       tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+       tc qdisc del dev $h1 clsact
+       tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+       tc qdisc del dev br0 clsact
 }
 
 flooding_check_packets()
index eaf8a04a7ca5f18633871b79eed97e9b3d9f3f40..10e54bcca7a938fbd2e6cd8d04339e0ad0bc71ac 100755 (executable)
@@ -190,7 +190,7 @@ setup_prepare()
 
        tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
                protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
-               action police rate 50mbit burst 64k \
+               action police rate 50mbit burst 64k conform-exceed drop/pipe \
                action goto chain $(IS2 1 0)
 }
 
index 11779405dc804d34ef8ac767ec5e22b6d2b5c0ba..25f4d54067c0ee243114bdf6733390a761d26b5b 100644 (file)
@@ -64,6 +64,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <setjmp.h>
 
 #include "kselftest.h"
 
                struct __test_metadata *_metadata, \
                struct __fixture_variant_metadata *variant) \
        { \
-               test_name(_metadata); \
+               _metadata->setup_completed = true; \
+               if (setjmp(_metadata->env) == 0) \
+                       test_name(_metadata); \
+               __test_check_assert(_metadata); \
        } \
        static struct __test_metadata _##test_name##_object = \
                { .name = #test_name, \
 #define FIXTURE_TEARDOWN(fixture_name) \
        void fixture_name##_teardown( \
                struct __test_metadata __attribute__((unused)) *_metadata, \
-               FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+               FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+               const FIXTURE_VARIANT(fixture_name) \
+                       __attribute__((unused)) *variant)
 
 /**
  * FIXTURE_VARIANT() - Optionally called once per fixture
  *       ...
  *     };
  *
- * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
- * as *variant*. Variants allow the same tests to be run with different
- * arguments.
+ * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and
+ * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with
+ * different arguments.
  */
 #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
 
  * Defines a test that depends on a fixture (e.g., is part of a test case).
  * Very similar to TEST() except that *self* is the setup instance of fixture's
  * datatype exposed for use by the implementation.
- *
- * Warning: use of ASSERT_* here will skip TEARDOWN.
  */
-/* TODO(wad) register fixtures on dedicated test lists. */
 #define TEST_F(fixture_name, test_name) \
        __TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
 
                /* fixture data is alloced, setup, and torn down per call. */ \
                FIXTURE_DATA(fixture_name) self; \
                memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
-               fixture_name##_setup(_metadata, &self, variant->data); \
-               /* Let setup failure terminate early. */ \
-               if (!_metadata->passed) \
-                       return; \
-               fixture_name##_##test_name(_metadata, &self, variant->data); \
-               fixture_name##_teardown(_metadata, &self); \
+               if (setjmp(_metadata->env) == 0) { \
+                       fixture_name##_setup(_metadata, &self, variant->data); \
+                       /* Let setup failure terminate early. */ \
+                       if (!_metadata->passed) \
+                               return; \
+                       _metadata->setup_completed = true; \
+                       fixture_name##_##test_name(_metadata, &self, variant->data); \
+               } \
+               if (_metadata->setup_completed) \
+                       fixture_name##_teardown(_metadata, &self, variant->data); \
+               __test_check_assert(_metadata); \
        } \
        static struct __test_metadata \
                      _##fixture_name##_##test_name##_object = { \
  */
 #define OPTIONAL_HANDLER(_assert) \
        for (; _metadata->trigger; _metadata->trigger = \
-                       __bail(_assert, _metadata->no_print, _metadata->step))
+                       __bail(_assert, _metadata))
 
 #define __INC_STEP(_metadata) \
        /* Keep "step" below 255 (which is used for "SKIP" reporting). */       \
@@ -830,6 +838,9 @@ struct __test_metadata {
        bool timed_out; /* did this test timeout instead of exiting? */
        __u8 step;
        bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+       bool aborted;   /* stopped test due to failed ASSERT */
+       bool setup_completed; /* did setup finish? */
+       jmp_buf env;    /* for exiting out of test early */
        struct __test_results *results;
        struct __test_metadata *prev, *next;
 };
@@ -848,16 +859,26 @@ static inline void __register_test(struct __test_metadata *t)
        __LIST_APPEND(t->fixture->tests, t);
 }
 
-static inline int __bail(int for_realz, bool no_print, __u8 step)
+static inline int __bail(int for_realz, struct __test_metadata *t)
 {
+       /* if this is ASSERT, return immediately. */
        if (for_realz) {
-               if (no_print)
-                       _exit(step);
-               abort();
+               t->aborted = true;
+               longjmp(t->env, 1);
        }
+       /* otherwise, end the for loop and continue. */
        return 0;
 }
 
+static inline void __test_check_assert(struct __test_metadata *t)
+{
+       if (t->aborted) {
+               if (t->no_print)
+                       _exit(t->step);
+               abort();
+       }
+}
+
 struct __test_metadata *__active_test;
 static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
 {
index d1e8f523746976fac6b217ef0ef1a0ef61c7bd51..0b0e4402bba6ae13e66cc812b64f7a128a1ca4ab 100644 (file)
@@ -3,6 +3,7 @@
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/psci_cpu_on_test
+/aarch64/vcpu_width_config
 /aarch64/vgic_init
 /aarch64/vgic_irq
 /s390x/memop
@@ -33,6 +34,7 @@
 /x86_64/state_test
 /x86_64/svm_vmcall_test
 /x86_64/svm_int_ctl_test
+/x86_64/tsc_scaling_sync
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
 /x86_64/userspace_io_test
index 21c2dbd21a81cfff52576f32049c717c7ac63da6..681b173aa87c17e5d467071f59d23764b2023759 100644 (file)
@@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
 TEST_GEN_PROGS_aarch64 += demand_paging_test
index b08d30bf71c513f443d665d5e30f368842d8d275..3b940a101bc0790cd27c5f18edd152cb5ff2b481 100644 (file)
@@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm)
        pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
 }
 
+static int gic_fd;
+
 static struct kvm_vm *test_vm_create(void)
 {
        struct kvm_vm *vm;
        unsigned int i;
-       int ret;
        int nr_vcpus = test_args.nr_vcpus;
 
        vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
@@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void)
 
        ucall_init(vm, NULL);
        test_init_timer_irq(vm);
-       ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
-       if (ret < 0) {
+       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+       if (gic_fd < 0) {
                print_skip("Failed to create vgic-v3");
                exit(KSFT_SKIP);
        }
@@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void)
        return vm;
 }
 
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+       close(gic_fd);
+       kvm_vm_free(vm);
+}
+
 static void test_print_help(char *name)
 {
        pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
@@ -478,7 +485,7 @@ int main(int argc, char *argv[])
 
        vm = test_vm_create();
        test_run(vm);
-       kvm_vm_free(vm);
+       test_vm_cleanup(vm);
 
        return 0;
 }
index f12147c43464e5c2a9cd7561e46c4459d48dd17d..0b571f3fe64ce8a36880e4a7460009efa1e4703f 100644 (file)
@@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c)
                ++missing_regs;
 
        if (new_regs || missing_regs) {
+               n = 0;
+               for_each_reg_filtered(i)
+                       ++n;
+
                printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
-               printf("%s: Number registers:         %5lld\n", config_name(c), reg_list->n);
+               printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
+                      config_name(c), reg_list->n, reg_list->n - n);
        }
 
        if (new_regs) {
@@ -683,9 +688,10 @@ static __u64 base_regs[] = {
        KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
        KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
        KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
-       KVM_REG_ARM_FW_REG(0),
-       KVM_REG_ARM_FW_REG(1),
-       KVM_REG_ARM_FW_REG(2),
+       KVM_REG_ARM_FW_REG(0),          /* KVM_REG_ARM_PSCI_VERSION */
+       KVM_REG_ARM_FW_REG(1),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+       KVM_REG_ARM_FW_REG(2),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+       KVM_REG_ARM_FW_REG(3),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
        ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
        ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
        ARM64_SYS_REG(3, 3, 14, 0, 2),
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
new file mode 100644 (file)
index 0000000..6e94026
--- /dev/null
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init1,
+                          struct kvm_vcpu_init *init2)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+       vm_vcpu_add(vm, 0);
+       ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+       if (ret)
+               goto free_exit;
+
+       vm_vcpu_add(vm, 1);
+       ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init2.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1,
+                                 struct kvm_vcpu_init *init2)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+       vm_vcpu_add(vm, 0);
+       vm_vcpu_add(vm, 1);
+
+       ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+       if (ret)
+               goto free_exit;
+
+       ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+       struct kvm_vcpu_init init1, init2;
+       struct kvm_vm *vm;
+       int ret;
+
+       if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) {
+               print_skip("KVM_CAP_ARM_EL1_32BIT is not supported");
+               exit(KSFT_SKIP);
+       }
+
+       /* Get the preferred target type and copy that to init2 for later use */
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1);
+       kvm_vm_free(vm);
+       init2 = init1;
+
+       /* Test with 64bit vCPUs */
+       ret = add_init_2vcpus(&init1, &init1);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init1, &init1);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with 32bit vCPUs */
+       init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init1, &init1);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init1, &init1);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with mixed-width vCPUs  */
+       init1.features[0] = 0;
+       init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init1, &init2);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init1, &init2);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+
+       return 0;
+}
index c9d9e513ca04457a80f14c6ad2d039dbe677ae12..7b47ae4f952e68a5003092321814c3265d3bc08f 100644 (file)
 #include "test_util.h"
 #include "perf_test_util.h"
 #include "guest_modes.h"
+
 #ifdef __aarch64__
 #include "aarch64/vgic.h"
 
 #define GICD_BASE_GPA                  0x8000000ULL
 #define GICR_BASE_GPA                  0x80A0000ULL
+
+static int gic_fd;
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+       /*
+        * The test can still run even if hardware does not support GICv3, as it
+        * is only an optimization to reduce guest exits.
+        */
+       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+       if (gic_fd > 0)
+               close(gic_fd);
+}
+
+#else /* __aarch64__ */
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+}
+
 #endif
 
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
@@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                vm_enable_cap(vm, &cap);
        }
 
-#ifdef __aarch64__
-       vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
-#endif
+       arch_setup_vm(vm, nr_vcpus);
 
        /* Start the iterations */
        iteration = 0;
@@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        }
 
        free_bitmaps(bitmaps, p->slots);
+       arch_cleanup_vm(vm);
        perf_test_destroy_vm(vm);
 }
 
index dc284c6bdbc3714e47d99cda508f5f1f820fc37d..eca5c622efd25b09732523b2b2962e488fb4ad3d 100644 (file)
@@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
 #define PGTBL_PTE_WRITE_SHIFT                  2
 #define PGTBL_PTE_READ_MASK                    0x0000000000000002ULL
 #define PGTBL_PTE_READ_SHIFT                   1
-#define PGTBL_PTE_PERM_MASK                    (PGTBL_PTE_EXECUTE_MASK | \
+#define PGTBL_PTE_PERM_MASK                    (PGTBL_PTE_ACCESSED_MASK | \
+                                                PGTBL_PTE_DIRTY_MASK | \
+                                                PGTBL_PTE_EXECUTE_MASK | \
                                                 PGTBL_PTE_WRITE_MASK | \
                                                 PGTBL_PTE_READ_MASK)
 #define PGTBL_PTE_VALID_MASK                   0x0000000000000001ULL
index 37db341d4cc5c976e2796d4bed8cc37f2cf99b20..d0d51adec76eb88f12564fa38e52d0df2ba535f2 100644 (file)
 /* CPUID.0x8000_0001.EDX */
 #define CPUID_GBPAGES          (1ul << 26)
 
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK        BIT_ULL(0)
+#define PTE_WRITABLE_MASK       BIT_ULL(1)
+#define PTE_USER_MASK           BIT_ULL(2)
+#define PTE_ACCESSED_MASK       BIT_ULL(5)
+#define PTE_DIRTY_MASK          BIT_ULL(6)
+#define PTE_LARGE_MASK          BIT_ULL(7)
+#define PTE_GLOBAL_MASK         BIT_ULL(8)
+#define PTE_NX_MASK             BIT_ULL(63)
+
+#define PAGE_SHIFT             12
+#define PAGE_SIZE              (1ULL << PAGE_SHIFT)
+#define PAGE_MASK              (~(PAGE_SIZE-1))
+
+#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
+#define PTE_GET_PFN(pte)        (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
        u64 rax;
index ba1fdc3dcf4a90319f1a9d7cd8dd9bbeaaa5f5b4..2c4a7563a4f8adf4416f77c92c3fcf328711e51f 100644 (file)
@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
        else
                guest_test_phys_mem = p->phys_offset;
 #ifdef __s390x__
-       alignment = max(0x100000, alignment);
+       alignment = max(0x100000UL, alignment);
 #endif
        guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
 
index d377f2603d98a0b7be7fc1304a478dbcd8209164..3961487a4870db7b3f85e41e7dd44f681ad4d08e 100644 (file)
@@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
                core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
 }
 
-static void guest_hang(void)
+static void __aligned(16) guest_hang(void)
 {
        while (1)
                ;
index 9f000dfb55949d04f547caa4ef4d13b44bd60d4c..33ea5e9955d9bddbe844a36cdbf886d6637f8a53 100644 (file)
 
 vm_vaddr_t exception_handlers;
 
-/* Virtual translation table structure declarations */
-struct pageUpperEntry {
-       uint64_t present:1;
-       uint64_t writable:1;
-       uint64_t user:1;
-       uint64_t write_through:1;
-       uint64_t cache_disable:1;
-       uint64_t accessed:1;
-       uint64_t ignored_06:1;
-       uint64_t page_size:1;
-       uint64_t ignored_11_08:4;
-       uint64_t pfn:40;
-       uint64_t ignored_62_52:11;
-       uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
-       uint64_t present:1;
-       uint64_t writable:1;
-       uint64_t user:1;
-       uint64_t write_through:1;
-       uint64_t cache_disable:1;
-       uint64_t accessed:1;
-       uint64_t dirty:1;
-       uint64_t reserved_07:1;
-       uint64_t global:1;
-       uint64_t ignored_11_09:3;
-       uint64_t pfn:40;
-       uint64_t ignored_62_52:11;
-       uint64_t execute_disable:1;
-};
-
 void regs_dump(FILE *stream, struct kvm_regs *regs,
               uint8_t indent)
 {
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
        return &page_table[index];
 }
 
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
-                                                   uint64_t pt_pfn,
-                                                   uint64_t vaddr,
-                                                   uint64_t paddr,
-                                                   int level,
-                                                   enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+                                      uint64_t pt_pfn,
+                                      uint64_t vaddr,
+                                      uint64_t paddr,
+                                      int level,
+                                      enum x86_page_size page_size)
 {
-       struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
-       if (!pte->present) {
-               pte->writable = true;
-               pte->present = true;
-               pte->page_size = (level == page_size);
-               if (pte->page_size)
-                       pte->pfn = paddr >> vm->page_shift;
+       uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+       if (!(*pte & PTE_PRESENT_MASK)) {
+               *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+               if (level == page_size)
+                       *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
                else
-                       pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+                       *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
        } else {
                /*
                 * Entry already present.  Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
                TEST_ASSERT(level != page_size,
                            "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
                            page_size, vaddr);
-               TEST_ASSERT(!pte->page_size,
+               TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
                            "Cannot create page table at level: %u, vaddr: 0x%lx\n",
                            level, vaddr);
        }
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
                   enum x86_page_size page_size)
 {
        const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
-       struct pageUpperEntry *pml4e, *pdpe, *pde;
-       struct pageTableEntry *pte;
+       uint64_t *pml4e, *pdpe, *pde;
+       uint64_t *pte;
 
        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
                    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
         */
        pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
                                      vaddr, paddr, 3, page_size);
-       if (pml4e->page_size)
+       if (*pml4e & PTE_LARGE_MASK)
                return;
 
-       pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
-       if (pdpe->page_size)
+       pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+       if (*pdpe & PTE_LARGE_MASK)
                return;
 
-       pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
-       if (pde->page_size)
+       pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+       if (*pde & PTE_LARGE_MASK)
                return;
 
        /* Fill in page table entry. */
-       pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
-       TEST_ASSERT(!pte->present,
+       pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+       TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
                    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
-       pte->pfn = paddr >> vm->page_shift;
-       pte->writable = true;
-       pte->present = 1;
+       *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
 }
 
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,22 +246,22 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
        __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
 }
 
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
                                                       uint64_t vaddr)
 {
        uint16_t index[4];
-       struct pageUpperEntry *pml4e, *pdpe, *pde;
-       struct pageTableEntry *pte;
+       uint64_t *pml4e, *pdpe, *pde;
+       uint64_t *pte;
        struct kvm_cpuid_entry2 *entry;
        struct kvm_sregs sregs;
        int max_phy_addr;
-       /* Set the bottom 52 bits. */
-       uint64_t rsvd_mask = 0x000fffffffffffff;
+       uint64_t rsvd_mask = 0;
 
        entry = kvm_get_supported_cpuid_index(0x80000008, 0);
        max_phy_addr = entry->eax & 0x000000ff;
-       /* Clear the bottom bits of the reserved mask. */
-       rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+       /* Set the high bits in the reserved mask. */
+       if (max_phy_addr < 52)
+               rsvd_mask = GENMASK_ULL(51, max_phy_addr);
 
        /*
         * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
@@ -307,7 +271,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
         */
        vcpu_sregs_get(vm, vcpuid, &sregs);
        if ((sregs.efer & EFER_NX) == 0) {
-               rsvd_mask |= (1ull << 63);
+               rsvd_mask |= PTE_NX_MASK;
        }
 
        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
        index[3] = (vaddr >> 39) & 0x1ffu;
 
        pml4e = addr_gpa2hva(vm, vm->pgd);
-       TEST_ASSERT(pml4e[index[3]].present,
+       TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
                "Expected pml4e to be present for gva: 0x%08lx", vaddr);
-       TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
-               (rsvd_mask | (1ull << 7))) == 0,
+       TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
                "Unexpected reserved bits set.");
 
-       pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-       TEST_ASSERT(pdpe[index[2]].present,
+       pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+       TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
                "Expected pdpe to be present for gva: 0x%08lx", vaddr);
-       TEST_ASSERT(pdpe[index[2]].page_size == 0,
+       TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
                "Expected pdpe to map a pde not a 1-GByte page.");
-       TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+       TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
                "Unexpected reserved bits set.");
 
-       pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-       TEST_ASSERT(pde[index[1]].present,
+       pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+       TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
                "Expected pde to be present for gva: 0x%08lx", vaddr);
-       TEST_ASSERT(pde[index[1]].page_size == 0,
+       TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
                "Expected pde to map a pte not a 2-MByte page.");
-       TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+       TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
                "Unexpected reserved bits set.");
 
-       pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-       TEST_ASSERT(pte[index[0]].present,
+       pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+       TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
                "Expected pte to be present for gva: 0x%08lx", vaddr);
 
        return &pte[index[0]];
@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
 
 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 {
-       struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+       uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
        return *(uint64_t *)pte;
 }
@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
                             uint64_t pte)
 {
-       struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
-                                                                 vaddr);
+       uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
        *(uint64_t *)new_pte = pte;
 }
 
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
-       struct pageUpperEntry *pml4e, *pml4e_start;
-       struct pageUpperEntry *pdpe, *pdpe_start;
-       struct pageUpperEntry *pde, *pde_start;
-       struct pageTableEntry *pte, *pte_start;
+       uint64_t *pml4e, *pml4e_start;
+       uint64_t *pdpe, *pdpe_start;
+       uint64_t *pde, *pde_start;
+       uint64_t *pte, *pte_start;
 
        if (!vm->pgd_created)
                return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        fprintf(stream, "%*s      index hvaddr         gpaddr         "
                "addr         w exec dirty\n",
                indent, "");
-       pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+       pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
        for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
                pml4e = &pml4e_start[n1];
-               if (!pml4e->present)
+               if (!(*pml4e & PTE_PRESENT_MASK))
                        continue;
-               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
                        " %u\n",
                        indent, "",
                        pml4e - pml4e_start, pml4e,
-                       addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
-                       pml4e->writable, pml4e->execute_disable);
+                       addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+                       !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
 
-               pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+               pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
                for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
                        pdpe = &pdpe_start[n2];
-                       if (!pdpe->present)
+                       if (!(*pdpe & PTE_PRESENT_MASK))
                                continue;
-                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
                                "%u  %u\n",
                                indent, "",
                                pdpe - pdpe_start, pdpe,
                                addr_hva2gpa(vm, pdpe),
-                               (uint64_t) pdpe->pfn, pdpe->writable,
-                               pdpe->execute_disable);
+                               PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+                               !!(*pdpe & PTE_NX_MASK));
 
-                       pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+                       pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
                        for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
                                pde = &pde_start[n3];
-                               if (!pde->present)
+                               if (!(*pde & PTE_PRESENT_MASK))
                                        continue;
                                fprintf(stream, "%*spde   0x%-3zx %p "
-                                       "0x%-12lx 0x%-10lx %u  %u\n",
+                                       "0x%-12lx 0x%-10llx %u  %u\n",
                                        indent, "", pde - pde_start, pde,
                                        addr_hva2gpa(vm, pde),
-                                       (uint64_t) pde->pfn, pde->writable,
-                                       pde->execute_disable);
+                                       PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+                                       !!(*pde & PTE_NX_MASK));
 
-                               pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+                               pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
                                for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
                                        pte = &pte_start[n4];
-                                       if (!pte->present)
+                                       if (!(*pte & PTE_PRESENT_MASK))
                                                continue;
                                        fprintf(stream, "%*spte   0x%-3zx %p "
-                                               "0x%-12lx 0x%-10lx %u  %u "
+                                               "0x%-12lx 0x%-10llx %u  %u "
                                                "    %u    0x%-10lx\n",
                                                indent, "",
                                                pte - pte_start, pte,
                                                addr_hva2gpa(vm, pte),
-                                               (uint64_t) pte->pfn,
-                                               pte->writable,
-                                               pte->execute_disable,
-                                               pte->dirty,
+                                               PTE_GET_PFN(*pte),
+                                               !!(*pte & PTE_WRITABLE_MASK),
+                                               !!(*pte & PTE_NX_MASK),
+                                               !!(*pte & PTE_DIRTY_MASK),
                                                ((uint64_t) n1 << 27)
                                                        | ((uint64_t) n2 << 18)
                                                        | ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
        uint16_t index[4];
-       struct pageUpperEntry *pml4e, *pdpe, *pde;
-       struct pageTableEntry *pte;
+       uint64_t *pml4e, *pdpe, *pde;
+       uint64_t *pte;
 
        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
        if (!vm->pgd_created)
                goto unmapped_gva;
        pml4e = addr_gpa2hva(vm, vm->pgd);
-       if (!pml4e[index[3]].present)
+       if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
                goto unmapped_gva;
 
-       pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-       if (!pdpe[index[2]].present)
+       pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+       if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
                goto unmapped_gva;
 
-       pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-       if (!pde[index[1]].present)
+       pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+       if (!(pde[index[1]] & PTE_PRESENT_MASK))
                goto unmapped_gva;
 
-       pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-       if (!pte[index[0]].present)
+       pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+       if (!(pte[index[0]] & PTE_PRESENT_MASK))
                goto unmapped_gva;
 
-       return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+       return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
 
 unmapped_gva:
        TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
index 52a3ef6629e80610c2d9776f659869fd757c1341..76f65c22796f2e734f243e6d063e2036253c71df 100644 (file)
@@ -29,7 +29,6 @@
 #define X86_FEATURE_XSAVE              (1 << 26)
 #define X86_FEATURE_OSXSAVE            (1 << 27)
 
-#define PAGE_SIZE                      (1 << 12)
 #define NUM_TILES                      8
 #define TILE_SIZE                      1024
 #define XSAVE_SIZE                     ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
index f070ff0224fa3f88f1247273eb0a83c3e3e61613..aeb3850f81bd10a6d8474de9b5bfcdb14afb9528 100644 (file)
@@ -12,7 +12,6 @@
 #include "vmx.h"
 
 #define VCPU_ID           1
-#define PAGE_SIZE  4096
 #define MAXPHYADDR 36
 
 #define MEM_REGION_GVA 0x0000123456789000
index 0d06ffa95d9d4cdba158decf53bd17de479092f4..93d77574b255d863f9fc84a4702a10b6c7179b47 100644 (file)
@@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm)
        return success;
 }
 
-static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
 {
        struct kvm_pmu_event_filter *f;
        int size = sizeof(*f) + nevents * sizeof(f->events[0]);
@@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
        return f;
 }
 
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+                       int nevents, uint32_t action)
 {
        struct kvm_pmu_event_filter *f;
        int i;
 
-       f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+       f = alloc_pmu_event_filter(nevents);
        f->action = action;
-       for (i = 0; i < ARRAY_SIZE(event_list); i++)
+       for (i = 0; i < nevents; i++)
                f->events[i] = event_list[i];
 
        return f;
 }
 
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+       return create_pmu_event_filter(event_list,
+                                      ARRAY_SIZE(event_list),
+                                      action);
+}
+
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
@@ -271,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
        return run_vm_to_sync(vm);
 }
 
+static void test_amd_deny_list(struct kvm_vm *vm)
+{
+       uint64_t event = EVENT(0x1C2, 0);
+       struct kvm_pmu_event_filter *f;
+       uint64_t count;
+
+       f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+       count = test_with_filter(vm, f);
+
+       free(f);
+       if (count != NUM_BRANCHES)
+               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+                       __func__, count, NUM_BRANCHES);
+       TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
 static void test_member_deny_list(struct kvm_vm *vm)
 {
        struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
@@ -453,6 +479,9 @@ int main(int argc, char *argv[])
                exit(KSFT_SKIP);
        }
 
+       if (use_amd_pmu())
+               test_amd_deny_list(vm);
+
        test_without_filter(vm);
        test_member_deny_list(vm);
        test_member_allow_list(vm);
index a626d40fdb48940be29857b01d80820da447ff7d..b4e0c860769e456e44a3fcc01ffe45fc8af551e4 100644 (file)
@@ -21,8 +21,6 @@
 
 #define VCPU_ID              1
 
-#define PAGE_SIZE  4096
-
 #define SMRAM_SIZE 65536
 #define SMRAM_MEMSLOT ((1 << 16) | 1)
 #define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
index e683d0ac3e45e1841281e87078bfda7edc91b6b6..19b35c607dc66c4b9969f6816ca77fc2aafa1538 100644 (file)
@@ -32,7 +32,6 @@
 #define MSR_IA32_TSC_ADJUST 0x3b
 #endif
 
-#define PAGE_SIZE      4096
 #define VCPU_ID                5
 
 #define TSC_ADJUST_VALUE (1ll << 32)
index 865e17146815a6585d801a31abf4a63c853d1dff..bcd3708278593dd14585d32510d5cda20a98c677 100644 (file)
@@ -23,7 +23,6 @@
 #define SHINFO_REGION_GVA      0xc0000000ULL
 #define SHINFO_REGION_GPA      0xc0000000ULL
 #define SHINFO_REGION_SLOT     10
-#define PAGE_SIZE              4096
 
 #define DUMMY_REGION_GPA       (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
 #define DUMMY_REGION_SLOT      11
index adc94452b57c6cda757213da12f6b27cec420ad5..b30fe9de1d4f6a31c286f8499356639639ca933e 100644 (file)
@@ -15,7 +15,6 @@
 
 #define HCALL_REGION_GPA       0xc0000000ULL
 #define HCALL_REGION_SLOT      10
-#define PAGE_SIZE              4096
 
 static struct kvm_vm *vm;
 
index b019e0b8221c7c0bf565d163e141d9e69d37014f..84fda3b490735faa7d3daeb05e58deca9f865f5f 100644 (file)
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
        if (in_shutdown++)
                return;
 
+       /* Free the cpu_set allocated using CPU_ALLOC in main function */
+       CPU_FREE(cpu_set);
+
        for (i = 0; i < num_cpus_to_pin; i++)
                if (cpu_threads[i]) {
                        pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,6 +554,12 @@ int main(int argc, char *argv[])
                perror("sysconf(_SC_NPROCESSORS_ONLN)");
                exit(1);
        }
+
+       if (getuid() != 0)
+               ksft_exit_skip("Not running as root, but almost all tests "
+                       "require root in order to modify\nsystem settings.  "
+                       "Exiting.\n");
+
        cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
        cpu_set = CPU_ALLOC(cpus_online);
        if (cpu_set == NULL) {
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
                                                cpu_set)) {
                                        fprintf(stderr, "Any given CPU may "
                                                "only be given once.\n");
-                                       exit(1);
+                                       goto err_code;
                                } else
                                        CPU_SET_S(cpus_to_pin[cpu],
                                                  cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
                                queue_path = malloc(strlen(option) + 2);
                                if (!queue_path) {
                                        perror("malloc()");
-                                       exit(1);
+                                       goto err_code;
                                }
                                queue_path[0] = '/';
                                queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
                fprintf(stderr, "Must pass at least one CPU to continuous "
                        "mode.\n");
                poptPrintUsage(popt_context, stderr, 0);
-               exit(1);
+               goto err_code;
        } else if (!continuous_mode) {
                num_cpus_to_pin = 1;
                cpus_to_pin[0] = cpus_online - 1;
        }
 
-       if (getuid() != 0)
-               ksft_exit_skip("Not running as root, but almost all tests "
-                       "require root in order to modify\nsystem settings.  "
-                       "Exiting.\n");
-
        max_msgs = fopen(MAX_MSGS, "r+");
        max_msgsize = fopen(MAX_MSGSIZE, "r+");
        if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
                        sleep(1);
        }
        shutdown(0, "", 0);
+
+err_code:
+       CPU_FREE(cpu_set);
+       exit(1);
+
 }
index 3fe2515aa616e915af630847c57207de1656c540..e1f998defd10745a916a125ced0a405d5e0f074c 100644 (file)
@@ -25,12 +25,13 @@ TEST_PROGS += bareudp.sh
 TEST_PROGS += amt.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
 TEST_PROGS += veth.sh
 TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
 TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
 TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -54,12 +55,15 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
+TEST_PROGS += test_vxlan_vnifiltering.sh
 
 TEST_FILES := settings
 
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+include bpf/Makefile
+
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
new file mode 100644 (file)
index 0000000..f91bf14
--- /dev/null
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CLANG ?= clang
+CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../../usr/include/
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/%.o: %.c
+       $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+
+clean:
+       rm -f $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
new file mode 100644 (file)
index 0000000..ac54c36
--- /dev/null
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+       const int l2_header_size =  sizeof(struct ethhdr);
+       void *data = (void *)(long)skb->data;
+       const void *data_end = (void *)(long)skb->data_end;
+       const struct ethhdr * const eth = data;  // used iff is_ethernet
+       const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+       // Require ethernet dst mac address to be our unicast address.
+       if  (skb->pkt_type != PACKET_HOST)
+               return TC_ACT_OK;
+
+       // Must be meta-ethernet IPv6 frame
+       if (skb->protocol != bpf_htons(ETH_P_IPV6))
+               return TC_ACT_OK;
+
+       // Must have (ethernet and) ipv6 header
+       if (data + l2_header_size + sizeof(*ip6) > data_end)
+               return TC_ACT_OK;
+
+       // Ethertype - if present - must be IPv6
+       if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+               return TC_ACT_OK;
+
+       // IP version must be 6
+       if (ip6->version != 6)
+               return TC_ACT_OK;
+       // Maximum IPv6 payload length that can be translated to IPv4
+       if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+               return TC_ACT_OK;
+       switch (ip6->nexthdr) {
+       case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+       case IPPROTO_UDP:  // address means there is no need to update their checksums.
+       case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+       case IPPROTO_ESP:  // since there is never a checksum to update.
+               break;
+       default:  // do not know how to handle anything else
+               return TC_ACT_OK;
+       }
+
+       struct ethhdr eth2;  // used iff is_ethernet
+
+       eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+       eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+       struct iphdr ip = {
+               .version = 4,                                                      // u4
+               .ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+               .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+               .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+               .id = 0,                                                           // u16
+               .frag_off = bpf_htons(IP_DF),                                          // u16
+               .ttl = ip6->hop_limit,                                             // u8
+               .protocol = ip6->nexthdr,                                          // u8
+               .check = 0,                                                        // u16
+               .saddr = 0x0201a8c0,                            // u32
+               .daddr = 0x0101a8c0,                                         // u32
+       };
+
+       // Calculate the IPv4 one's complement checksum of the IPv4 header.
+       __wsum sum4 = 0;
+
+       for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+               sum4 += ((__u16 *)&ip)[i];
+
+       // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+       ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+       // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+       __wsum sum6 = 0;
+       // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+       for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+               sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+       // Note that there is no L4 checksum update: we are relying on the checksum neutrality
+       // of the ipv6 address chosen by netd's ClatdController.
+
+       // Packet mutations begin - point of no return, but if this first modification fails
+       // the packet is probably still pristine, so let clatd handle it.
+       if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+               return TC_ACT_OK;
+       bpf_csum_update(skb, sum6);
+
+       data = (void *)(long)skb->data;
+       data_end = (void *)(long)skb->data_end;
+       if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+               return TC_ACT_SHOT;
+
+       struct ethhdr *new_eth = data;
+
+       // Copy over the updated ethernet header
+       *new_eth = eth2;
+
+       // Copy over the new ipv4 header.
+       *(struct iphdr *)(new_eth + 1) = ip;
+       return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+       const int l2_header_size =  sizeof(struct ethhdr);
+       void *data = (void *)(long)skb->data;
+       const void *data_end = (void *)(long)skb->data_end;
+       const struct ethhdr *const eth = data;  // used iff is_ethernet
+       const struct iphdr *const ip4 = (void *)(eth + 1);
+
+       // Must be meta-ethernet IPv4 frame
+       if (skb->protocol != bpf_htons(ETH_P_IP))
+               return TC_ACT_OK;
+
+       // Must have ipv4 header
+       if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+               return TC_ACT_OK;
+
+       // Ethertype - if present - must be IPv4
+       if (eth->h_proto != bpf_htons(ETH_P_IP))
+               return TC_ACT_OK;
+
+       // IP version must be 4
+       if (ip4->version != 4)
+               return TC_ACT_OK;
+
+       // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+       if (ip4->ihl != 5)
+               return TC_ACT_OK;
+
+       // Maximum IPv6 payload length that can be translated to IPv4
+       if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+               return TC_ACT_OK;
+
+       // Calculate the IPv4 one's complement checksum of the IPv4 header.
+       __wsum sum4 = 0;
+
+       for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+               sum4 += ((__u16 *)ip4)[i];
+
+       // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+       sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+       // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+       if (sum4 != 0xFFFF)
+               return TC_ACT_OK;
+
+       // Minimum IPv4 total length is the size of the header
+       if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+               return TC_ACT_OK;
+
+       // We are incapable of dealing with IPv4 fragments
+       if (ip4->frag_off & ~bpf_htons(IP_DF))
+               return TC_ACT_OK;
+
+       switch (ip4->protocol) {
+       case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+       case IPPROTO_GRE:  // address means there is no need to update their checksums.
+       case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+               break;         // since there is never a checksum to update.
+
+       case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+               if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+                       return TC_ACT_OK;
+               const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+               // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+               // checksum.  Otherwise the network or more likely the NAT64 gateway might
+               // drop the packet because in most cases IPv6/UDP packets with a zero checksum
+               // are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+               if (!uh->check)
+                       return TC_ACT_OK;
+               break;
+
+       default:  // do not know how to handle anything else
+               return TC_ACT_OK;
+       }
+       struct ethhdr eth2;  // used iff is_ethernet
+
+       eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+       eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+       struct ipv6hdr ip6 = {
+               .version = 6,                                    // __u8:4
+               .priority = ip4->tos >> 4,                       // __u8:4
+               .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+               .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+               .nexthdr = ip4->protocol,                        // __u8
+               .hop_limit = ip4->ttl,                           // __u8
+       };
+       ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+       ip6.saddr.in6_u.u6_addr32[1] = 0;
+       ip6.saddr.in6_u.u6_addr32[2] = 0;
+       ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+       ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+       ip6.daddr.in6_u.u6_addr32[1] = 0;
+       ip6.daddr.in6_u.u6_addr32[2] = 0;
+       ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+       // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+       __wsum sum6 = 0;
+       // We'll end up with a non-zero sum due to ip6.version == 6
+       for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+               sum6 += ((__u16 *)&ip6)[i];
+
+       // Packet mutations begin - point of no return, but if this first modification fails
+       // the packet is probably still pristine, so let clatd handle it.
+       if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+               return TC_ACT_OK;
+
+       // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+       // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+       // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+       // However, we've already verified the ipv4 checksum is correct and thus 0.
+       // Thus we only need to add the ipv6 header's sum.
+       //
+       // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+       // (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+       bpf_csum_update(skb, sum6);
+
+       // bpf_skb_change_proto() invalidates all pointers - reload them.
+       data = (void *)(long)skb->data;
+       data_end = (void *)(long)skb->data_end;
+
+       // I cannot think of any valid way for this error condition to trigger, however I do
+       // believe the explicit check is required to keep the in kernel ebpf verifier happy.
+       if (data + l2_header_size + sizeof(ip6) > data_end)
+               return TC_ACT_SHOT;
+
+       struct ethhdr *new_eth = data;
+
+       // Copy over the updated ethernet header
+       *new_eth = eth2;
+       // Copy over the new ipv4 header.
+       *(struct ipv6hdr *)(new_eth + 1) = ip6;
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
index 47c4d4b4a44af6b549536a94328da30cdd2a4bb1..54701c8b0cd7062716317bd803f238e26d72fb21 100755 (executable)
@@ -810,10 +810,16 @@ ipv4_ping()
        setup
        set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
        ipv4_ping_novrf
+       setup
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv4_ping_novrf
 
        log_subsection "With VRF"
        setup "yes"
        ipv4_ping_vrf
+       setup "yes"
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv4_ping_vrf
 }
 
 ################################################################################
@@ -2348,10 +2354,16 @@ ipv6_ping()
        log_subsection "No VRF"
        setup
        ipv6_ping_novrf
+       setup
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv6_ping_novrf
 
        log_subsection "With VRF"
        setup "yes"
        ipv6_ping_vrf
+       setup "yes"
+       set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+       ipv6_ping_vrf
 }
 
 ################################################################################
index d444ee6aa3cb0f45725434fa03b248c01f311872..b3bf5319bb0e505b471e6c7798e85962411e920b 100755 (executable)
@@ -1208,6 +1208,20 @@ ipv4_fcnal()
        set +e
        check_nexthop "dev veth1" ""
        log_test $? 0 "Nexthops removed on admin down"
+
+       # nexthop route delete warning: route add with nhid and delete
+       # using device
+       run_cmd "$IP li set dev veth1 up"
+       run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
+       out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+       run_cmd "$IP route add 172.16.101.1/32 nhid 12"
+       run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
+       out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+       [ $out1 -eq $out2 ]
+       rc=$?
+       log_test $rc 0 "Delete nexthop route warning"
+       run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+       run_cmd "$IP nexthop del id 12"
 }
 
 ipv4_grp_fcnal()
index 8fa97ae9af9eecdd8053aceb120dcd1cdffbcc3e..e811090f77483d1dd9370f28cd296e8c74b3db71 100644 (file)
@@ -2,15 +2,31 @@
 
 TEST_PROGS = bridge_igmp.sh \
        bridge_locked_port.sh \
+       bridge_mld.sh \
        bridge_port_isolation.sh \
        bridge_sticky_fdb.sh \
        bridge_vlan_aware.sh \
+       bridge_vlan_mcast.sh \
        bridge_vlan_unaware.sh \
+       custom_multipath_hash.sh \
+       dual_vxlan_bridge.sh \
+       ethtool_extended_state.sh \
        ethtool.sh \
+       gre_custom_multipath_hash.sh \
        gre_inner_v4_multipath.sh \
        gre_inner_v6_multipath.sh \
+       gre_multipath_nh_res.sh \
+       gre_multipath_nh.sh \
        gre_multipath.sh \
+       hw_stats_l3.sh \
        ip6_forward_instats_vrf.sh \
+       ip6gre_custom_multipath_hash.sh \
+       ip6gre_flat_key.sh \
+       ip6gre_flat_keys.sh \
+       ip6gre_flat.sh \
+       ip6gre_hier_key.sh \
+       ip6gre_hier_keys.sh \
+       ip6gre_hier.sh \
        ip6gre_inner_v4_multipath.sh \
        ip6gre_inner_v6_multipath.sh \
        ipip_flat_gre_key.sh \
@@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \
        mirror_gre_vlan_bridge_1q.sh \
        mirror_gre_vlan.sh \
        mirror_vlan.sh \
+       pedit_dsfield.sh \
+       pedit_ip.sh \
+       pedit_l4port.sh \
+       q_in_vni_ipv6.sh \
+       q_in_vni.sh \
        router_bridge.sh \
        router_bridge_vlan.sh \
        router_broadcast.sh \
+       router_mpath_nh_res.sh \
        router_mpath_nh.sh \
        router_multicast.sh \
        router_multipath.sh \
+       router_nh.sh \
        router.sh \
        router_vid_1.sh \
        sch_ets.sh \
+       sch_red.sh \
        sch_tbf_ets.sh \
        sch_tbf_prio.sh \
        sch_tbf_root.sh \
+       skbedit_priority.sh \
        tc_actions.sh \
        tc_chains.sh \
        tc_flower_router.sh \
        tc_flower.sh \
        tc_mpls_l2vpn.sh \
+       tc_police.sh \
        tc_shblocks.sh \
        tc_vlan_modify.sh \
+       vxlan_asymmetric_ipv6.sh \
        vxlan_asymmetric.sh \
+       vxlan_bridge_1d_ipv6.sh \
+       vxlan_bridge_1d_port_8472_ipv6.sh \
        vxlan_bridge_1d_port_8472.sh \
        vxlan_bridge_1d.sh \
+       vxlan_bridge_1q_ipv6.sh \
+       vxlan_bridge_1q_port_8472_ipv6.sh \
        vxlan_bridge_1q_port_8472.sh \
        vxlan_bridge_1q.sh \
+       vxlan_symmetric_ipv6.sh \
        vxlan_symmetric.sh
 
 TEST_PROGS_EXTENDED := devlink_lib.sh \
        ethtool_lib.sh \
        fib_offload_lib.sh \
        forwarding.config.sample \
+       ip6gre_lib.sh \
        ipip_lib.sh \
        lib.sh \
        mirror_gre_lib.sh \
index a3402cd8d5b68d6445ce078651f0f4884ed69d72..9ff22f28032ddd8b2786f319e3638f000f42da4a 100755 (executable)
@@ -61,9 +61,12 @@ setup_prepare()
 
        vrf_prepare
        mirror_gre_topo_create
+       # Avoid changing br1's PVID while it is operational as a L3 interface.
+       ip link set dev br1 down
 
        ip link set dev $swp3 master br1
        bridge vlan add dev br1 vid 555 pvid untagged self
+       ip link set dev br1 up
        ip address add dev br1 192.0.2.129/28
        ip address add dev br1 2001:db8:2::1/64
 
index 7314257d248a73cb61035416d740cfeba1ef29a7..48ef112f42c2e7a92d8ed45f394b6dac481153c9 100755 (executable)
@@ -1444,6 +1444,33 @@ chk_prio_nr()
        [ "${dump_stats}" = 1 ] && dump_stats
 }
 
+chk_subflow_nr()
+{
+       local need_title="$1"
+       local msg="$2"
+       local subflow_nr=$3
+       local cnt1
+       local cnt2
+
+       if [ -n "${need_title}" ]; then
+               printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
+       else
+               printf "%-${nr_blank}s %s" " " "${msg}"
+       fi
+
+       cnt1=$(ss -N $ns1 -tOni | grep -c token)
+       cnt2=$(ss -N $ns2 -tOni | grep -c token)
+       if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+               echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
+               fail_test
+               dump_stats=1
+       else
+               echo "[ ok ]"
+       fi
+
+       [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+}
+
 chk_link_usage()
 {
        local ns=$1
@@ -2556,7 +2583,7 @@ fastclose_tests()
        fi
 }
 
-implicit_tests()
+endpoint_tests()
 {
        # userspace pm type prevents add_addr
        if reset "implicit EP"; then
@@ -2578,6 +2605,23 @@ implicit_tests()
                        $ns2 10.0.2.2 id 1 flags signal
                wait
        fi
+
+       if reset "delete and re-add"; then
+               pm_nl_set_limits $ns1 1 1
+               pm_nl_set_limits $ns2 1 1
+               pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+               run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+
+               wait_mpj $ns2
+               pm_nl_del_endpoint $ns2 2 10.0.2.2
+               sleep 0.5
+               chk_subflow_nr needtitle "after delete" 1
+
+               pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+               wait_mpj $ns2
+               chk_subflow_nr "" "after re-add" 2
+               wait
+       fi
 }
 
 # [$1: error message]
@@ -2624,7 +2668,7 @@ all_tests_sorted=(
        d@deny_join_id0_tests
        m@fullmesh_tests
        z@fastclose_tests
-       I@implicit_tests
+       I@endpoint_tests
 )
 
 all_tests_args=""
index 59067f64b775384161a4586ba97eb435b4d88bcd..2672ac0b6d1f38c6112cee59da859b9a259ef165 100644 (file)
@@ -421,7 +421,7 @@ static void usage(const char *progname)
                        "Options:\n"
                        "  -4            only IPv4\n"
                        "  -6            only IPv6\n"
-                       "  -c <clock>    monotonic (default) or tai\n"
+                       "  -c <clock>    monotonic or tai (default)\n"
                        "  -D <addr>     destination IP address (server)\n"
                        "  -S <addr>     source IP address (client)\n"
                        "  -r            run rx mode\n"
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
                        cfg_rx = true;
                        break;
                case 't':
-                       cfg_start_time_ns = strtol(optarg, NULL, 0);
+                       cfg_start_time_ns = strtoll(optarg, NULL, 0);
                        break;
                case 'm':
                        cfg_mark = strtol(optarg, NULL, 0);
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755 (executable)
index 0000000..807b74c
--- /dev/null
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+       local -r jobs="$(jobs -p)"
+       local -r ns="$(ip netns list|grep $PEER_NS)"
+
+       [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+       [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+       # use 'rx' as separator between sender args and receiver args
+       local -r all="$@"
+       local -r tx_args=${all%rx*}
+       local rx_args=${all#*rx}
+
+
+
+       ip netns add "${PEER_NS}"
+       ip -netns "${PEER_NS}" link set lo up
+       ip link add type veth
+       ip link set dev veth0 up
+       ip addr add dev veth0 192.168.1.2/24
+       ip addr add dev veth0 2001:db8::2/64 nodad
+
+       ip link set dev veth1 netns "${PEER_NS}"
+       ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+       ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+       ip -netns "${PEER_NS}" link set dev veth1 up
+       ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+       ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+       tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+       tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6  direct-action
+       tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+        echo ${rx_args}
+       ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+       # Hack: let bg programs complete the startup
+       sleep 0.1
+       ./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+       local -r args=$@
+  echo ${args}
+       ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+       local -r args=$@
+
+       echo "udp gso - over veth touching data"
+       run_in_netns ${args} -u -S 0 rx -4 -v
+
+       echo "udp gso and gro - over veth touching data"
+       run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+       local -r args=$@
+
+       echo "tcp - over veth touching data"
+       run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+       local -r core_args="-l 4"
+       local -r ipv4_args="${core_args} -4  -D 192.168.1.1"
+       local -r ipv6_args="${core_args} -6  -D 2001:db8::1"
+
+       echo "ipv6"
+       run_tcp "${ipv6_args}"
+       run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+       echo "Missing xdp_dummy helper. Build bpf selftest first"
+       exit -1
+fi
+
+if [ ! -f bpf/nat6to4.o ]; then
+       echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+       exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+       run_all
+elif [[ $1 == "__subprocess" ]]; then
+       shift
+       run_one $@
+else
+       run_in_netns $@
+fi
index dcaefa224ca01f724d0f5549507a75cf0d55bcaf..edafaca1aeb39a8ac19aeca05040d1c6ebae3c95 100644 (file)
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -g -I../../../../usr/include/
 
-TEST_GEN_PROGS := regression_enomem
+TEST_GEN_PROGS = regression_enomem
 
-include ../lib.mk
+LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h
 
-$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
+include ../lib.mk
index 17999e082aa715525f013f194b278e1bdf6786ad..070c1c876df15146d59ebab18353e62ad5c87362 100644 (file)
@@ -95,7 +95,6 @@ TEST(wait_states)
                .flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
                .exit_signal = SIGCHLD,
        };
-       int ret;
        pid_t pid;
        siginfo_t info = {
                .si_signo = 0,
index 18a3bde8bc961eaf36c222cee19f00973c011640..28604c9f805c757bd108b1ef0b269e907539d5b1 100644 (file)
@@ -46,6 +46,8 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 
+#include "../kselftest.h"
+
 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
 {
        return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
@@ -368,7 +370,7 @@ int main(void)
                };
                int i;
 
-               for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+               for (i = 0; i < ARRAY_SIZE(S); i++) {
                        assert(memmem(buf, rv, S[i], strlen(S[i])));
                }
 
@@ -417,7 +419,7 @@ int main(void)
                };
                int i;
 
-               for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+               for (i = 0; i < ARRAY_SIZE(S); i++) {
                        assert(memmem(buf, rv, S[i], strlen(S[i])));
                }
        }
index c35ba24f994c36b82b94e7536d8715fc677b599c..66d0414d8e4bc3e9fba3ddb394697e825cce9624 100644 (file)
@@ -301,7 +301,7 @@ specify_qemu_cpus () {
                        echo $2 -smp $3
                        ;;
                qemu-system-ppc64)
-                       nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+                       nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
                        echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
                        ;;
                esac
index 5f682fc892dd490570d5746e0aea4381be36aa36..88983cba795636cc41b9c872f1ee4f7ba7ad2505 100755 (executable)
@@ -36,7 +36,7 @@ do
        then
                egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
                files="$files $i.diags $i"
-       elif ! test -f ${scenariobasedir}/vmlinux
+       elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
        then
                echo No ${scenariobasedir}/vmlinux file > $i.diags
                files="$files $i.diags $i"
index 0a5419982ab3ead8b0986d787b356ddbd07ac169..0789c5606d2abb347e8f93efbeefc88f60c04b3c 100755 (executable)
@@ -33,7 +33,12 @@ do
                TORTURE_SUITE="`cat $i/../torture_suite`"
                configfile=`echo $i | sed -e 's,^.*/,,'`
                rm -f $i/console.log.*.diags
-               kvm-recheck-${TORTURE_SUITE}.sh $i
+               case "${TORTURE_SUITE}" in
+               X*)
+                       ;;
+               *)
+                       kvm-recheck-${TORTURE_SUITE}.sh $i
+               esac
                if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
                then
                        echo QEMU error, output:
index 8c4c1e4792d02c9714ad802aabe59d9dae971f37..0ff59bd8b640df201d4a3ffa8b031e61a3ffe53c 100755 (executable)
@@ -138,14 +138,14 @@ chmod +x $T/bin/kvm-remote-*.sh
 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
-       ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
-       echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+       ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
        ret=$?
        if test "$ret" -ne 0
        then
                echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
                exit 4
        fi
+       echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 done
 
 # Download and expand the tarball on all systems.
@@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
        echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
-       cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+       cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
        ret=$?
        tries=0
        while test "$ret" -ne 0
        do
                echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
                sleep 60
-               cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+               cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
                ret=$?
                if test "$ret" -ne 0
                then
@@ -185,7 +185,7 @@ checkremotefile () {
 
        while :
        do
-               ssh $1 "test -f \"$2\""
+               ssh -o BatchMode=yes $1 "test -f \"$2\""
                ret=$?
                if test "$ret" -eq 255
                then
@@ -228,7 +228,7 @@ startbatches () {
                then
                        continue # System still running last test, skip.
                fi
-               ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+               ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
                ret=$?
                if test "$ret" -ne 0
                then
@@ -267,7 +267,7 @@ do
                sleep 30
        done
        echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
-       ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+       ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 done
 
 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
index 55b2c153328274bf8a1d469ad978060ad7c621a0..263e16aeca0e4e7b745dda2020716c5460cbc517 100755 (executable)
@@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
 TORTURE_REMOTE=
 TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
@@ -86,7 +87,7 @@ usage () {
        echo "       --remote"
        echo "       --results absolute-pathname"
        echo "       --shutdown-grace seconds"
-       echo "       --torture lock|rcu|rcuscale|refscale|scf"
+       echo "       --torture lock|rcu|rcuscale|refscale|scf|X*"
        echo "       --trust-make"
        exit 1
 }
@@ -180,6 +181,10 @@ do
                ;;
        --kasan)
                TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+               if test -n "$torture_qemu_mem_default"
+               then
+                       TORTURE_QEMU_MEM=2G
+               fi
                ;;
        --kconfig|--kconfigs)
                checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
@@ -202,6 +207,7 @@ do
        --memory)
                checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
                TORTURE_QEMU_MEM=$2
+               torture_qemu_mem_default=
                shift
                ;;
        --no-initrd)
@@ -231,7 +237,7 @@ do
                shift
                ;;
        --torture)
-               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
+               checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
                TORTURE_SUITE=$2
                TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
                shift
index bfe09e2829c8d0f723e2455eca6faa3f2b19c117..d477618e7261df5a5ad430e5fea7e4f3bd73c48d 100755 (executable)
@@ -54,6 +54,7 @@ do_kvfree=yes
 do_kasan=yes
 do_kcsan=no
 do_clocksourcewd=yes
+do_rt=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -82,6 +83,7 @@ usage () {
        echo "       --do-rcuscale / --do-no-rcuscale"
        echo "       --do-rcutorture / --do-no-rcutorture"
        echo "       --do-refscale / --do-no-refscale"
+       echo "       --do-rt / --do-no-rt"
        echo "       --do-scftorture / --do-no-scftorture"
        echo "       --duration [ <minutes> | <hours>h | <days>d ]"
        echo "       --kcsan-kmake-arg kernel-make-arguments"
@@ -118,6 +120,7 @@ do
                do_scftorture=yes
                do_rcuscale=yes
                do_refscale=yes
+               do_rt=yes
                do_kvfree=yes
                do_kasan=yes
                do_kcsan=yes
@@ -148,6 +151,7 @@ do
                do_scftorture=no
                do_rcuscale=no
                do_refscale=no
+               do_rt=no
                do_kvfree=no
                do_kasan=no
                do_kcsan=no
@@ -162,6 +166,9 @@ do
        --do-refscale|--do-no-refscale)
                do_refscale=`doyesno "$1" --do-refscale`
                ;;
+       --do-rt|--do-no-rt)
+               do_rt=`doyesno "$1" --do-rt`
+               ;;
        --do-scftorture|--do-no-scftorture)
                do_scftorture=`doyesno "$1" --do-scftorture`
                ;;
@@ -322,6 +329,7 @@ then
        echo " --- make clean" > "$amcdir/Make.out" 2>&1
        make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
        echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
+       cp .config $amcdir
        make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
        echo " --- make " >> "$amcdir/Make.out" 2>&1
        make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
@@ -350,8 +358,19 @@ fi
 
 if test "$do_scftorture" = "yes"
 then
-       torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
-       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+       torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+fi
+
+if test "$do_rt" = "yes"
+then
+       # With all post-boot grace periods forced to normal.
+       torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+       torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+       # With all post-boot grace periods forced to expedited.
+       torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+       torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
 fi
 
 if test "$do_refscale" = yes
@@ -363,7 +382,7 @@ fi
 for prim in $primlist
 do
        torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
-       torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+       torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
 done
 
 if test "$do_rcuscale" = yes
@@ -375,13 +394,13 @@ fi
 for prim in $primlist
 do
        torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
-       torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+       torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
 done
 
 if test "$do_kvfree" = "yes"
 then
        torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
-       torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+       torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
 fi
 
 if test "$do_clocksourcewd" = "yes"
index 7093422050f66a43e160e59d9f6dfc147f6789a2..6fd6acb94518270503ea55f37526611ab6312d1c 100644 (file)
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
index 2da8b49589a0330332d7e064e995e176dc765b2c..07f5e0a70ae705cdef00bcb97a9dc0e35077b17a 100644 (file)
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index 3ca112444ce7791caf82c50dd592b896e428583c..d84801b9a7aed77a0ee538868d429ecc61fc77cf 100644 (file)
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y
index ad2be91e5ee7624e95df63885f70dbbc833afb64..2f9fcffff5ae3c4b48939bcad0bb7ed4e3090717 100644 (file)
@@ -2,3 +2,7 @@ CONFIG_SMP=n
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
index cd2a188eeb6d986034e40d91359341944a1d2716..b9b6d67cbc5f6af55b8a6758f686c406369a99b4 100644 (file)
@@ -1 +1,2 @@
 rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
index dc02083803ce574a769e93d230e87b078e583f87..dea26c5686784953650b6a233358d8b1958c7e55 100644 (file)
@@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
index e4d74e5fc1d09bae1622a4f966458f253d14c034..85b407467454a2e98072ff234d9396e450ec01d1 100644 (file)
@@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 #CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=y
 CONFIG_RCU_EXPERT=y
index 77541eeb4e9fa1c6f8a955609c73b6ef4068fd24..093ea6e8e65cdf924ecaea7abb69f12d510ec4e8 100644 (file)
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
index 22ad0261728d0158ec3a4c5e18027ebe91fae4db..ae395981b5e5e7c03499bd3d9140c6de8b034690 100644 (file)
@@ -1,8 +1,9 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
index 2789b47e4ecd7da1b6f37d368595b693a5ad2773..d30922d8c88323ce6e4175dbc67c761a0af36238 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
index 8523a7515cbf817659a2a023d9da9578e09cf82b..fc45645bb5f421c1c0ca122fa66af430e5d5c8e5 100644 (file)
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index 4a00539bfdd713cacfc3c14769a640efe16254ec..a323d8948b7cf13bcf5fbf489995f308f0369256 100644 (file)
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
index effa415f9b9282880083d83a2ecadf996711b331..e2bc99c785e75a9da38e2a11511c0a99424e4372 100644 (file)
@@ -9,7 +9,7 @@
 
 # rcutorture_param_n_barrier_cbs bootparam-string
 #
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
 rcutorture_param_n_barrier_cbs () {
        if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
        then
@@ -30,13 +30,25 @@ rcutorture_param_onoff () {
        fi
 }
 
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+       if echo $1 | grep -q "rcutorture\.stat_interval"
+       then
+               :
+       else
+               echo rcutorture.stat_interval=15
+       fi
+}
+
 # per_version_boot_params bootparam-string config-file seconds
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
        echo $1 `rcutorture_param_onoff "$1" "$2"` \
                `rcutorture_param_n_barrier_cbs "$1"` \
-               rcutorture.stat_interval=15 \
+               `rcutorture_param_stat_interval "$1"` \
                rcutorture.shutdown_secs=$3 \
                rcutorture.test_no_idle_hz=1 \
                rcutorture.verbose=1
index 90942bb5bebc505940461a38e32cfdb0122c295f..6a00157bee5b1755a39f7cf7a6d49c10b5114aa3 100644 (file)
@@ -1,5 +1,6 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
index f110d9ffbe4cb1e41db98b25cca92fc1d336cf17..b10706fd03a45089993133eb3ec2aafc04128c28 100644 (file)
@@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
 CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index a98b58b54bb11d5fe2297bb6e96fd6bd39a2eb28..fbea3b13baba414d11fbb4514cb20b397d42b523 100644 (file)
@@ -1,2 +1,6 @@
 CONFIG_RCU_REF_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
index 7f06838a91e6103acbb362dfa6d9b22a1df7b1d0..ef2b501a697101e03e8f70146cbc1492e31d1b6d 100644 (file)
@@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index b8429d6c6ebc320c0428ced7e6115ef9f5d5ec01..3a59346b3de78f8d10caab213d10df38d672c5fb 100644 (file)
@@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
index ae4992b141b06d7fcc0fa9e116ca606d3b0797ce..cb37e08037d6e5121313a5323f4d240ef9e47093 100644 (file)
@@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
index d3d9e35d3d55a88460046fe3f7bee12f87cfeddd..2d949e58f5a5dfd2e7b267be5110686f2c869570 100644 (file)
@@ -25,6 +25,5 @@ per_version_boot_params () {
        echo $1 `scftorture_param_onoff "$1" "$2"` \
                scftorture.stat_interval=15 \
                scftorture.shutdown_secs=$3 \
-               scftorture.verbose=1 \
-               scf
+               scftorture.verbose=1
 }
index 9d126d7fabdb7fe2862ea34118470b7eaa89a24b..313bb0cbfb1eb96479148b7fc94461c745a01299 100644 (file)
@@ -955,7 +955,7 @@ TEST(ERRNO_valid)
        ASSERT_EQ(0, ret);
 
        EXPECT_EQ(parent, syscall(__NR_getppid));
-       EXPECT_EQ(-1, read(0, NULL, 0));
+       EXPECT_EQ(-1, read(-1, NULL, 0));
        EXPECT_EQ(E2BIG, errno);
 }
 
@@ -974,7 +974,7 @@ TEST(ERRNO_zero)
 
        EXPECT_EQ(parent, syscall(__NR_getppid));
        /* "errno" of 0 is ok. */
-       EXPECT_EQ(0, read(0, NULL, 0));
+       EXPECT_EQ(0, read(-1, NULL, 0));
 }
 
 /*
@@ -995,7 +995,7 @@ TEST(ERRNO_capped)
        ASSERT_EQ(0, ret);
 
        EXPECT_EQ(parent, syscall(__NR_getppid));
-       EXPECT_EQ(-1, read(0, NULL, 0));
+       EXPECT_EQ(-1, read(-1, NULL, 0));
        EXPECT_EQ(4095, errno);
 }
 
@@ -1026,7 +1026,7 @@ TEST(ERRNO_order)
        ASSERT_EQ(0, ret);
 
        EXPECT_EQ(parent, syscall(__NR_getppid));
-       EXPECT_EQ(-1, read(0, NULL, 0));
+       EXPECT_EQ(-1, read(-1, NULL, 0));
        EXPECT_EQ(12, errno);
 }
 
@@ -2623,7 +2623,7 @@ void *tsync_sibling(void *data)
        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
        if (!ret)
                return (void *)SIBLING_EXIT_NEWPRIVS;
-       read(0, NULL, 0);
+       read(-1, NULL, 0);
        return (void *)SIBLING_EXIT_UNKILLED;
 }
 
index c4aea794725a7e502b1334efdfdc512446b40c52..e691a3cf14911237190c048f658830078dc90c3b 100644 (file)
@@ -20,6 +20,7 @@
 #include <limits.h>
 
 #include "vdso_config.h"
+#include "../kselftest.h"
 
 static const char **name;
 
@@ -306,10 +307,8 @@ static void test_clock_gettime(void)
                return;
        }
 
-       for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-            clock++) {
+       for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
                test_one_clock_gettime(clock, clocknames[clock]);
-       }
 
        /* Also test some invalid clock ids */
        test_one_clock_gettime(-1, "invalid");
@@ -370,10 +369,8 @@ static void test_clock_gettime64(void)
                return;
        }
 
-       for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-            clock++) {
+       for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
                test_one_clock_gettime64(clock, clocknames[clock]);
-       }
 
        /* Also test some invalid clock ids */
        test_one_clock_gettime64(-1, "invalid");
index 04a49e876a46c346d01b4d1be0a78042551a8937..5b1ecd00695b362983e953495520051c8f8a7d65 100644 (file)
@@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog
 CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
 
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
 
 ifeq ($(CAN_BUILD_WITH_NOPIE),1)
 CFLAGS += -no-pie
@@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
 $(BINARIES_32): LDLIBS += -lrt -ldl -lm
 $(BINARIES_32): $(OUTPUT)/%_32: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
@@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
 $(BINARIES_64): LDLIBS += -lrt -ldl
 $(BINARIES_64): $(OUTPUT)/%_64: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
 # x86_64 users should be encouraged to install 32-bit libraries
index 7c0b0617b9f85554d6831f15ce5691b9e0082ff7..db0270127aeb041ab4e53f5f4dcdf7c372a22422 100644 (file)
@@ -6,9 +6,11 @@
 
 #include <errno.h>
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <sys/mman.h>
 #include <time.h>
+#include <stdbool.h>
 
 #include "../kselftest.h"
 
@@ -63,6 +65,59 @@ enum {
        .expect_failure = should_fail                           \
 }
 
+/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+       void *remap_addr = NULL;
+       bool ret = true;
+
+       /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+       remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+                                        MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+                                        -1, 0);
+
+       if (remap_addr == MAP_FAILED) {
+               if (errno == EEXIST)
+                       ret = false;
+       } else {
+               munmap(remap_addr, size);
+       }
+
+       return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+       FILE *fp;
+       int n_matched;
+       static unsigned long long addr;
+
+       if (addr)
+               return addr;
+
+       fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+       if (fp == NULL) {
+               ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+                       strerror(errno));
+               exit(KSFT_SKIP);
+       }
+
+       n_matched = fscanf(fp, "%llu", &addr);
+       if (n_matched != 1) {
+               ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+                       strerror(errno));
+               fclose(fp);
+               exit(KSFT_SKIP);
+       }
+
+       fclose(fp);
+       return addr;
+}
+
 /*
  * Returns the start address of the mapping on success, else returns
  * NULL on failure.
@@ -71,11 +126,18 @@ static void *get_source_mapping(struct config c)
 {
        unsigned long long addr = 0ULL;
        void *src_addr = NULL;
+       unsigned long long mmap_min_addr;
+
+       mmap_min_addr = get_mmap_min_addr();
+
 retry:
        addr += c.src_alignment;
+       if (addr < mmap_min_addr)
+               goto retry;
+
        src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
-                       MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
-                       -1, 0);
+                                       MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+                                       -1, 0);
        if (src_addr == MAP_FAILED) {
                if (errno == EPERM || errno == EEXIST)
                        goto retry;
@@ -90,8 +152,10 @@ retry:
         * alignment in the tests.
         */
        if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
-                       !((unsigned long long) src_addr & c.src_alignment))
+                       !((unsigned long long) src_addr & c.src_alignment)) {
+               munmap(src_addr, c.region_size);
                goto retry;
+       }
 
        if (!src_addr)
                goto error;
@@ -140,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
        if (!((unsigned long long) addr & c.dest_alignment))
                addr = (void *) ((unsigned long long) addr | c.dest_alignment);
 
+       /* Don't destroy existing mappings unless expected to overlap */
+       while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+               /* Check for unsigned overflow */
+               if (addr + c.dest_alignment < addr) {
+                       ksft_print_msg("Couldn't find a valid region to remap to\n");
+                       ret = -1;
+                       goto out;
+               }
+               addr += c.dest_alignment;
+       }
+
        clock_gettime(CLOCK_MONOTONIC, &t_start);
        dest_addr = mremap(src_addr, c.region_size, c.region_size,
-                       MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+                                         MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
        clock_gettime(CLOCK_MONOTONIC, &t_end);
 
        if (dest_addr == MAP_FAILED) {
@@ -193,7 +268,7 @@ static void run_mremap_test_case(struct test test_case, int *failures,
 
        if (remap_time < 0) {
                if (test_case.expect_failure)
-                       ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+                       ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
                                              test_case.name);
                else {
                        ksft_test_result_fail("%s\n", test_case.name);
index 3b265f140c25c205130208f416659e84eaf2f937..352ba00cf26b03dc5abb9494ff463c15530549d1 100755 (executable)
@@ -291,11 +291,16 @@ echo "-------------------"
 echo "running mremap_test"
 echo "-------------------"
 ./mremap_test
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+       echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+        echo "[SKIP]"
+        exitcode=$ksft_skip
+else
        echo "[FAIL]"
        exitcode=1
-else
-       echo "[PASS]"
 fi
 
 echo "-----------------"
index 8a9461aa0878a0b6ea74fc9ec48f370846e39397..69c7796c7ca92e80943210f3c4972f813000dbd9 100755 (executable)
 # interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
 # details on how this is accomplished.
 set -e
+shopt -s extglob
 
 exec 3>&1
 export LANG=C
 export WG_HIDE_KEYS=never
+NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
 netns0="wg-test-$$-0"
 netns1="wg-test-$$-1"
 netns2="wg-test-$$-2"
@@ -143,17 +145,15 @@ tests() {
        n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
 
        # TCP over IPv4, in parallel
-       for max in 4 5 50; do
-               local pids=( )
-               for ((i=0; i < max; ++i)) do
-                       n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
-                       pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
-               done
-               for ((i=0; i < max; ++i)) do
-                       n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
-               done
-               wait "${pids[@]}"
+       local pids=( ) i
+       for ((i=0; i < NPROC; ++i)) do
+               n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+               pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
        done
+       for ((i=0; i < NPROC; ++i)) do
+               n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+       done
+       wait "${pids[@]}"
 }
 
 [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
@@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
 ! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
 sleep 1
 read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
-(( tx_bytes_after - tx_bytes_before < 70000 ))
+if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
+       errstart=$'\x1b[37m\x1b[41m\x1b[1m'
+       errend=$'\x1b[0m'
+       echo "${errstart}                                                ${errend}"
+       echo "${errstart}                   E  R  R  O  R                ${errend}"
+       echo "${errstart}                                                ${errend}"
+       echo "${errstart} This architecture does not do the right thing  ${errend}"
+       echo "${errstart} with cross-namespace routing loops. This test  ${errend}"
+       echo "${errstart} has thus technically failed but, as this issue ${errend}"
+       echo "${errstart} is as yet unsolved, these tests will continue  ${errend}"
+       echo "${errstart} onward. :(                                     ${errend}"
+       echo "${errstart}                                                ${errend}"
+fi
 
 ip0 link del wg1
 ip1 link del wg0
index bfa15e6feb2ff07455145ced03c8a4918fe9221e..42ab9d72b37b301083fcff237d1acf8173aaebaa 100644 (file)
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 build/
 distfiles/
+ccache/
index 4bdd6c1a19d355b7d41c0a6e12282eafa4f955c5..bca07b93eeb0769dc993de5b0a980c40fdd77385 100644 (file)
@@ -4,26 +4,24 @@
 
 PWD := $(shell pwd)
 
-CHOST := $(shell gcc -dumpmachine)
-HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-ifneq (,$(ARCH))
-CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-ifeq (,$(CBUILD))
-$(error The toolchain for $(ARCH) is not installed)
-endif
-else
-CBUILD := $(CHOST)
-ARCH := $(firstword $(subst -, ,$(CBUILD)))
-endif
-
 # Set these from the environment to override
 KERNEL_PATH ?= $(PWD)/../../../../..
 BUILD_PATH ?= $(PWD)/build/$(ARCH)
 DISTFILES_PATH ?= $(PWD)/distfiles
 NR_CPUS ?= 4
+ARCH ?=
+CBUILD := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
+ifeq ($(ARCH),)
+ARCH := $(HOST_ARCH)
+endif
 
 MIRROR := https://download.wireguard.com/qemu-test/distfiles/
 
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
 default: qemu
 
 # variable name, tarball project name, version, tarball extension, default URI base
@@ -36,42 +34,33 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
 endef
 
 define file_download =
-$(DISTFILES_PATH)/$(1):
+$(DISTFILES_PATH)/$(1): | $(4)
        mkdir -p $(DISTFILES_PATH)
-       flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3)  $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+       flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\)  \($(1)\)\$$$$#\1  $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3)  $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
 endef
 
-$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
-$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
+$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
+$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
-
-KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-
-export CFLAGS ?= -O3 -pipe
-export LDFLAGS ?=
-export CPPFLAGS := -I$(BUILD_PATH)/include
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
 
+export CFLAGS := -O3 -pipe
 ifeq ($(HOST_ARCH),$(ARCH))
-CROSS_COMPILE_FLAG := --host=$(CHOST)
 CFLAGS += -march=native
-STRIP := strip
-else
-$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-export CROSS_COMPILE=$(CBUILD)-
-STRIP := $(CBUILD)-strip
 endif
+export LDFLAGS :=
+export CPPFLAGS :=
+
+QEMU_VPORT_RESULT :=
 ifeq ($(ARCH),aarch64)
+CHOST := aarch64-linux-musl
 QEMU_ARCH := aarch64
 KERNEL_ARCH := arm64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
 CFLAGS += -march=armv8-a -mtune=cortex-a53
 endif
 else ifeq ($(ARCH),aarch64_be)
+CHOST := aarch64_be-linux-musl
 QEMU_ARCH := aarch64
 KERNEL_ARCH := arm64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
 CFLAGS += -march=armv8-a -mtune=cortex-a53
 endif
 else ifeq ($(ARCH),arm)
+CHOST := arm-linux-musleabi
 QEMU_ARCH := arm
 KERNEL_ARCH := arm
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt
 CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
 endif
 else ifeq ($(ARCH),armeb)
+CHOST := armeb-linux-musleabi
 QEMU_ARCH := arm
 KERNEL_ARCH := arm
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due
 LDFLAGS += -Wl,--be8
 endif
 else ifeq ($(ARCH),x86_64)
+CHOST := x86_64-linux-musl
 QEMU_ARCH := x86_64
 KERNEL_ARCH := x86_64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35
 CFLAGS += -march=skylake-avx512
 endif
 else ifeq ($(ARCH),i686)
+CHOST := i686-linux-musl
 QEMU_ARCH := i386
 KERNEL_ARCH := x86
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35
 CFLAGS += -march=prescott
 endif
 else ifeq ($(ARCH),mips64)
+CHOST := mips64-linux-musl
 QEMU_ARCH := mips64
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
 CFLAGS += -march=mips64r2 -EB
 endif
 else ifeq ($(ARCH),mips64el)
+CHOST := mips64el-linux-musl
 QEMU_ARCH := mips64el
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
 CFLAGS += -march=mips64r2 -EL
 endif
 else ifeq ($(ARCH),mips)
+CHOST := mips-linux-musl
 QEMU_ARCH := mips
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
 CFLAGS += -march=mips32r2 -EB
 endif
 else ifeq ($(ARCH),mipsel)
+CHOST := mipsel-linux-musl
 QEMU_ARCH := mipsel
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -173,7 +174,18 @@ else
 QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
 CFLAGS += -march=mips32r2 -EL
 endif
+else ifeq ($(ARCH),powerpc64)
+CHOST := powerpc64-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
 else ifeq ($(ARCH),powerpc64le)
+CHOST := powerpc64le-linux-musl
 QEMU_ARCH := ppc64
 KERNEL_ARCH := powerpc
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
 else
 QEMU_MACHINE := -machine pseries
 endif
-CFLAGS += -mcpu=powerpc64le -mlong-double-64
 else ifeq ($(ARCH),powerpc)
+CHOST := powerpc-linux-musl
 QEMU_ARCH := ppc
 KERNEL_ARCH := powerpc
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -192,26 +204,79 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
 else
 QEMU_MACHINE := -machine ppce500
 endif
-CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
 else ifeq ($(ARCH),m68k)
+CHOST := m68k-linux-musl
 QEMU_ARCH := m68k
 KERNEL_ARCH := m68k
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
 KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
 ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
 else
 QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
 endif
+else ifeq ($(ARCH),riscv64)
+CHOST := riscv64-linux-musl
+QEMU_ARCH := riscv64
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv64 -machine virt
+endif
+else ifeq ($(ARCH),riscv32)
+CHOST := riscv32-linux-musl
+QEMU_ARCH := riscv32
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv32 -machine virt
+endif
+else ifeq ($(ARCH),s390x)
+CHOST := s390x-linux-musl
+QEMU_ARCH := s390x
+KERNEL_ARCH := s390
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
+QEMU_VPORT_RESULT := virtio-serial-ccw
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+endif
 else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
+endif
+
+TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
+TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
+TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
+TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
+$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
+$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
+
+STRIP := $(CHOST)-strip
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+$(info Building for $(CHOST) using $(CBUILD))
+export CROSS_COMPILE := $(CHOST)-
+export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
+export CC := $(CHOST)-gcc
+CCACHE_PATH := $(shell which ccache 2>/dev/null)
+ifneq ($(CCACHE_PATH),)
+export KBUILD_BUILD_TIMESTAMP := Fri Jun  5 15:58:00 CEST 2015
+export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
+export CCACHE_SLOPPINESS := file_macro,time_macros
+export CCACHE_DIR ?= $(PWD)/ccache
 endif
 
-REAL_CC := $(CBUILD)-gcc
-MUSL_CC := $(BUILD_PATH)/musl-gcc
-export CC := $(MUSL_CC)
-USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
 
+comma := ,
 build: $(KERNEL_BZIMAGE)
 qemu: $(KERNEL_BZIMAGE)
        rm -f $(BUILD_PATH)/result
@@ -222,13 +287,14 @@ qemu: $(KERNEL_BZIMAGE)
                $(QEMU_MACHINE) \
                -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
                -serial stdio \
-               -serial file:$(BUILD_PATH)/result \
+               -chardev file,path=$(BUILD_PATH)/result,id=result \
+               $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
                -no-reboot \
                -monitor none \
                -kernel $<
        grep -Fq success $(BUILD_PATH)/result
 
-$(BUILD_PATH)/init-cpio-spec.txt:
+$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
        mkdir -p $(BUILD_PATH)
        echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
        echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -246,10 +312,10 @@ $(BUILD_PATH)/init-cpio-spec.txt:
        echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
        echo "slink /bin/ping6 ping 777 0 0" >> $@
        echo "dir /lib 755 0 0" >> $@
-       echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
-       echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+       echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
+       echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
 
-$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
        mkdir -p $(KERNEL_BUILD_PATH)
        cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
        printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -258,29 +324,24 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
        cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
        $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
 
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
        $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
 
-$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
-       $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
+       rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
+       $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
        touch $@
 
-$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
        mkdir -p $(BUILD_PATH)
        flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-       cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
-       $(MAKE) -C $(MUSL_PATH)
-       $(STRIP) -s $@
-
-$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
-       $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+       $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
+ifneq ($(CCACHE_PATH),)
+       mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
+       ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
+endif
        touch $@
 
-$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
-       sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
-       printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
-       chmod +x $(BUILD_PATH)/musl-gcc
-
 $(IPERF_PATH)/.installed: $(IPERF_TAR)
        mkdir -p $(BUILD_PATH)
        flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -289,6 +350,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
        touch $@
 
 $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+       cd $(IPERF_PATH) && autoreconf -fi
        cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
        $(MAKE) -C $(IPERF_PATH)
        $(STRIP) -s $@
@@ -304,7 +366,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE
 
 $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
        mkdir -p $(BUILD_PATH)
-       $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+       $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
        $(STRIP) -s $@
 
 $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
@@ -323,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
        touch $@
 
 $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
-       cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+       cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
        $(MAKE) -C $(BASH_PATH)
        $(STRIP) -s $@
 
 $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
        mkdir -p $(BUILD_PATH)
        flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-       printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
-       printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+       printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
+       printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
        touch $@
 
 $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
@@ -370,8 +432,13 @@ clean:
 distclean: clean
        rm -rf $(DISTFILES_PATH)
 
+cacheclean: clean
+ifneq ($(CCACHE_DIR),)
+       rm -rf $(CCACHE_DIR)
+endif
+
 menuconfig: $(KERNEL_BUILD_PATH)/.config
        $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
 
-.PHONY: qemu build clean distclean menuconfig
+.PHONY: qemu build clean distclean cacheclean menuconfig
 .DELETE_ON_ERROR:
index 3d063bb247bbee383d53abbae19a5ae6990cf3d5..09016880ce035ad391c5b98d7058bcb2410acc9d 100644 (file)
@@ -1,5 +1,8 @@
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
index dbdc7e406a7ba86577fec2aec4ed3ab5bbe4ee2d..19ff66e4c602e270a3cf88ab5c119e91e1736bfe 100644 (file)
@@ -1,6 +1,9 @@
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
index 148f4990541826def42543a028937c462db018a2..fc7959bef9c252dc430da09283c1914692129784 100644 (file)
@@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y
 CONFIG_THUMB2_KERNEL=n
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
index bd76b07d00a2cc801a4bb3fc36e95f0cae851f35..f3066be81c1998e67f1827d784ec1fd2133d9794 100644 (file)
@@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y
 CONFIG_THUMB2_KERNEL=n
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_FRAME_WARN=1024
index a85025d7206eec0b349d4aef5373dcda03dc0f8c..6d90892a85a242040f71cb62a1aec559b2a88581 100644 (file)
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
index 62a15bdb877e7c999a2872f68a2312b328e17cbf..82c925e49beb7dd2794c6b726d6ee5f08f4d146d 100644 (file)
@@ -5,5 +5,5 @@ CONFIG_MAC=y
 CONFIG_SERIAL_PMACZILOG=y
 CONFIG_SERIAL_PMACZILOG_TTYS=y
 CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
index df71d6b95546fb50a58021e5bdf36d27e8e25e85..d7ec63c17b30e664ccec04a01d6ab879e89df528 100644 (file)
@@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
index 90c783f725c4d6f04af74834cd1807679e3035f1..0994947e3392561de553f0e1a2371da048b89092 100644 (file)
@@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
index 435b0b43e00cbce391b0804912673511bad86364..591184342f4715025500d01d24b569bd4555b637 100644 (file)
@@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
index 62bb50c4a85fc89321b08038c8e6ac1eedb7e8ea..18a498293737637708824f28a360a6a104c6181c 100644 (file)
@@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
index 57957093b71b84223631829f5d0a1d953954fe76..5e04882e8e35b3f8d0b8fb2732460a3511193fea 100644 (file)
@@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_MATH_EMULATION=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
new file mode 100644 (file)
index 0000000..737194b
--- /dev/null
@@ -0,0 +1,13 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
index f52f1e2bc7f64dba3c4d24d276fde663fed03909..8148b9d1220a494e99a41574cbf71161c184a9c2 100644 (file)
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
 CONFIG_HVC_CONSOLE=y
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
 CONFIG_FRAME_WARN=1280
 CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
new file mode 100644 (file)
index 0000000..0bd0e72
--- /dev/null
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
new file mode 100644 (file)
index 0000000..dc266f3
--- /dev/null
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
new file mode 100644 (file)
index 0000000..a7b44dc
--- /dev/null
@@ -0,0 +1,6 @@
+CONFIG_SCLP_VT220_TTY=y
+CONFIG_SCLP_VT220_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_S390_GUEST=y
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"
index 00a1ef4869d58ae86774cf5ba6c51363d903fb95..efa00693e08bfac6992b70230ff92732413dfec5 100644 (file)
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
index 0b45055d9de0e1d19a1d84d7e116d3c15670ffdd..2a0f48fac925ac139a3dcac93e24dfcd5c83a735 100644 (file)
@@ -110,12 +110,6 @@ static void enable_logging(void)
                        panic("write(exception-trace)");
                close(fd);
        }
-       fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
-       if (fd >= 0) {
-               if (write(fd, "1\n", 2) != 2)
-                       panic("write(panic_on_warn)");
-               close(fd);
-       }
 }
 
 static void kmod_selftests(void)
index 53df7d3893d31cfc3c60ebd2d612dc2546cdbc07..0388c4d60af0e34f192b8869e21cf76cb61aa8a5 100644 (file)
@@ -92,6 +92,10 @@ warn_32bit_failure:
        echo "If you are using a Fedora-like distribution, try:";       \
        echo "";                                                        \
        echo "  yum install glibc-devel.*i686";                         \
+       echo "";                                                        \
+       echo "If you are using a SUSE-like distribution, try:";         \
+       echo "";                                                        \
+       echo "  zypper install gcc-32bit glibc-devel-static-32bit";     \
        exit 0;
 endif
 
index 222ecc81d7df2d879eaee12a70e93f18d362a789..f4c2a6eb1666b99f2e1bf98be6edb874e956ba95 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * KVM dirty ring implementation
  *
index 59b1dd4a549ee041767628b4a3c1f2d7992e7abb..2a3ed401ce4653377d55de9b02ccfa9e6086f3f3 100644 (file)
@@ -77,7 +77,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 
        idx = srcu_read_lock(&kvm->irq_srcu);
 
-       list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+       list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
+           srcu_read_lock_held(&kvm->irq_srcu))
                eventfd_signal(irqfd->resamplefd, 1);
 
        srcu_read_unlock(&kvm->irq_srcu, idx);
index 70e05af5ebead64cc03218fa60f2f83d7d907941..5ab12214e18dd86869f66b1c5af89f3d3891fa5d 100644 (file)
@@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 {
 }
 
+__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+}
+
 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
 {
        /*
@@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 #endif
 
+static void kvm_flush_shadow_all(struct kvm *kvm)
+{
+       kvm_arch_flush_shadow_all(kvm);
+       kvm_arch_guest_memory_reclaimed(kvm);
+}
+
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
                                               gfp_t gfp_flags)
@@ -434,8 +444,8 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
 static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-       kvm_dirty_ring_free(&vcpu->dirty_ring);
        kvm_arch_vcpu_destroy(vcpu);
+       kvm_dirty_ring_free(&vcpu->dirty_ring);
 
        /*
         * No need for rcu_read_lock as VCPU_RUN is the only place that changes
@@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
 typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
                             unsigned long end);
 
+typedef void (*on_unlock_fn_t)(struct kvm *kvm);
+
 struct kvm_hva_range {
        unsigned long start;
        unsigned long end;
        pte_t pte;
        hva_handler_t handler;
        on_lock_fn_t on_lock;
+       on_unlock_fn_t on_unlock;
        bool flush_on_ret;
        bool may_block;
 };
@@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
        if (range->flush_on_ret && ret)
                kvm_flush_remote_tlbs(kvm);
 
-       if (locked)
+       if (locked) {
                KVM_MMU_UNLOCK(kvm);
+               if (!IS_KVM_NULL_FN(range->on_unlock))
+                       range->on_unlock(kvm);
+       }
 
        srcu_read_unlock(&kvm->srcu, idx);
 
@@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
                .pte            = pte,
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = true,
                .may_block      = false,
        };
@@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
                .pte            = __pte(0),
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = false,
                .may_block      = false,
        };
@@ -662,7 +680,7 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
                kvm->mmu_notifier_range_end = end;
        } else {
                /*
-                * Fully tracking multiple concurrent ranges has dimishing
+                * Fully tracking multiple concurrent ranges has diminishing
                 * returns. Keep things simple and just find the minimal range
                 * which includes the current and new ranges. As there won't be
                 * enough information to subtract a range after its invalidate
@@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                .pte            = __pte(0),
                .handler        = kvm_unmap_gfn_range,
                .on_lock        = kvm_inc_notifier_count,
+               .on_unlock      = kvm_arch_guest_memory_reclaimed,
                .flush_on_ret   = true,
                .may_block      = mmu_notifier_range_blockable(range),
        };
@@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
                .pte            = __pte(0),
                .handler        = (void *)kvm_null_fn,
                .on_lock        = kvm_dec_notifier_count,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = false,
                .may_block      = mmu_notifier_range_blockable(range),
        };
@@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
        int idx;
 
        idx = srcu_read_lock(&kvm->srcu);
-       kvm_arch_flush_shadow_all(kvm);
+       kvm_flush_shadow_all(kvm);
        srcu_read_unlock(&kvm->srcu, idx);
 }
 
@@ -932,7 +952,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
        int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
                                      kvm_vcpu_stats_header.num_desc;
 
-       if (!kvm->debugfs_dentry)
+       if (IS_ERR(kvm->debugfs_dentry))
                return;
 
        debugfs_remove_recursive(kvm->debugfs_dentry);
@@ -1075,6 +1095,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
        BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
+       /*
+        * Force subsequent debugfs file creations to fail if the VM directory
+        * is not created (by kvm_create_vm_debugfs()).
+        */
+       kvm->debugfs_dentry = ERR_PTR(-ENOENT);
+
        if (init_srcu_struct(&kvm->srcu))
                goto out_err_no_srcu;
        if (init_srcu_struct(&kvm->irq_srcu))
@@ -1219,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
        kvm->mn_active_invalidate_count = 0;
 #else
-       kvm_arch_flush_shadow_all(kvm);
+       kvm_flush_shadow_all(kvm);
 #endif
        kvm_arch_destroy_vm(kvm);
        kvm_destroy_devices(kvm);
@@ -1534,7 +1560,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
        r = kvm_arch_prepare_memory_region(kvm, old, new, change);
 
        /* Free the bitmap on failure if it was allocated above. */
-       if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap)
+       if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
                kvm_destroy_dirty_bitmap(new);
 
        return r;
@@ -1646,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
         *      - kvm_is_visible_gfn (mmu_check_root)
         */
        kvm_arch_flush_shadow_memslot(kvm, old);
+       kvm_arch_guest_memory_reclaimed(kvm);
 
        /* Was released by kvm_swap_active_memslots, reacquire. */
        mutex_lock(&kvm->slots_arch_lock);
@@ -1793,7 +1820,7 @@ static int kvm_set_memslot(struct kvm *kvm,
 
        /*
         * No need to refresh new->arch, changes after dropping slots_arch_lock
-        * will directly hit the final, active memsot.  Architectures are
+        * will directly hit the final, active memslot.  Architectures are
         * responsible for knowing that new->arch may be stale.
         */
        kvm_commit_memory_region(kvm, old, new, change);
@@ -4327,6 +4354,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
                return 0;
 #endif
        case KVM_CAP_BINARY_STATS_FD:
+       case KVM_CAP_SYSTEM_EVENT_DATA:
                return 1;
        default:
                break;
@@ -5479,7 +5507,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        }
        add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
-       if (kvm->debugfs_dentry) {
+       if (!IS_ERR(kvm->debugfs_dentry)) {
                char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
 
                if (p) {
index 34ca40823260da674376f9d06fbcfdf903ed18aa..41da467d99c95ef31412ac82d00111b88bb4f40a 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+/* SPDX-License-Identifier: GPL-2.0-only */
 
 #ifndef __KVM_MM_H__
 #define __KVM_MM_H__ 1