Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
authorJakub Kicinski <kuba@kernel.org>
Sat, 11 Mar 2023 05:45:03 +0000 (21:45 -0800)
committerJakub Kicinski <kuba@kernel.org>
Sat, 11 Mar 2023 05:45:03 +0000 (21:45 -0800)
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) nft_parse_register_load() gets an incorrect datatype size
   as input, from Jeremy Sowden.

2) incorrect maximum netlink attribute in nft_redir, also
   from Jeremy.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nft_redir: correct value of inet type `.maxattrs`
  netfilter: nft_redir: correct length for loading protocol registers
  netfilter: nft_masq: correct length for loading protocol registers
  netfilter: nft_nat: correct length for loading protocol registers
====================

Link: https://lore.kernel.org/r/20230309174655.69816-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
686 files changed:
.clang-format
.mailmap
Documentation/ABI/testing/sysfs-class-power
Documentation/ABI/testing/sysfs-class-watchdog
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/admin-guide/hw-vuln/spectre.rst
Documentation/devicetree/bindings/power/supply/richtek,rt9467-charger.yaml [deleted file]
Documentation/devicetree/bindings/power/supply/richtek,rt9467.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/pwm-mediatek.txt [deleted file]
Documentation/devicetree/bindings/pwm/snps,dw-apb-timers-pwm2.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/amlogic,meson-vrtc.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.yaml
Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
Documentation/devicetree/bindings/rtc/microcrystal,rv3028.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt
Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml
Documentation/devicetree/bindings/rtc/nxp,pcf85363.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/nxp,pcf8563.yaml
Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt [deleted file]
Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
Documentation/devicetree/bindings/sound/apple,mca.yaml
Documentation/devicetree/bindings/sound/microchip,sama7g5-pdmc.yaml
Documentation/devicetree/bindings/timer/qcom,msm-timer.txt [deleted file]
Documentation/devicetree/bindings/watchdog/amlogic,meson6-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml
Documentation/devicetree/bindings/watchdog/gpio-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/mediatek,mt7621-wdt.yaml
Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
Documentation/devicetree/bindings/watchdog/meson-wdt.txt [deleted file]
Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml
Documentation/devicetree/bindings/watchdog/watchdog.yaml
Documentation/filesystems/f2fs.rst
Documentation/index.rst
Documentation/locking/locktorture.rst
Documentation/netlink/specs/netdev.yaml
Documentation/process/5.Posting.rst
Documentation/process/submitting-patches.rst
Documentation/rust/arch-support.rst
Documentation/translations/sp_SP/process/index.rst
Documentation/translations/sp_SP/process/programming-language.rst [new file with mode: 0644]
MAINTAINERS
Makefile
arch/alpha/mm/fault.c
arch/arm64/Kconfig
arch/arm64/include/asm/memory.h
arch/arm64/kernel/acpi.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/traps.c
arch/arm64/mm/copypage.c
arch/hexagon/mm/vm_fault.c
arch/ia64/include/uapi/asm/cmpxchg.h
arch/ia64/include/uapi/asm/intel_intrin.h [deleted file]
arch/ia64/include/uapi/asm/intrinsics.h
arch/ia64/kernel/acpi.c
arch/ia64/mm/fault.c
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/addrspace.h
arch/loongarch/include/asm/asm.h
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/cpu.h
arch/loongarch/include/asm/hw_breakpoint.h [new file with mode: 0644]
arch/loongarch/include/asm/inst.h
arch/loongarch/include/asm/kprobes.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/processor.h
arch/loongarch/include/asm/ptrace.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/stackframe.h
arch/loongarch/include/asm/switch_to.h
arch/loongarch/include/asm/uaccess.h
arch/loongarch/include/uapi/asm/ptrace.h
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/entry.S
arch/loongarch/kernel/ftrace_dyn.c
arch/loongarch/kernel/genex.S
arch/loongarch/kernel/head.S
arch/loongarch/kernel/hw_breakpoint.c [new file with mode: 0644]
arch/loongarch/kernel/inst.c
arch/loongarch/kernel/kprobes.c [new file with mode: 0644]
arch/loongarch/kernel/kprobes_trampoline.S [new file with mode: 0644]
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/relocate.c [new file with mode: 0644]
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/time.c
arch/loongarch/kernel/traps.c
arch/loongarch/kernel/vmlinux.lds.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/mm/fault.c
arch/loongarch/mm/tlbex.S
arch/loongarch/power/suspend_asm.S
arch/m68k/kernel/setup_mm.c
arch/m68k/kernel/traps.c
arch/m68k/mm/fault.c
arch/m68k/mm/motorola.c
arch/microblaze/mm/fault.c
arch/mips/Kconfig
arch/mips/boot/dts/ralink/mt7621.dtsi
arch/mips/configs/mtx1_defconfig
arch/mips/include/asm/asm.h
arch/mips/include/asm/smp-cps.h
arch/mips/kernel/cevt-r4k.c
arch/mips/kernel/cps-vec.S
arch/mips/kernel/smp-cps.c
arch/mips/ralink/Kconfig
arch/nios2/mm/fault.c
arch/openrisc/mm/fault.c
arch/parisc/mm/fault.c
arch/powerpc/Kconfig
arch/powerpc/configs/ppc6xx_defconfig
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/xmon/xmon.c
arch/riscv/include/uapi/asm/setup.h [new file with mode: 0644]
arch/riscv/lib/strcmp.S
arch/riscv/lib/strlen.S
arch/riscv/lib/strncmp.S
arch/riscv/mm/fault.c
arch/s390/Kconfig
arch/s390/boot/Makefile
arch/s390/include/asm/ap.h
arch/s390/include/asm/nmi.h
arch/s390/include/asm/rwonce.h [new file with mode: 0644]
arch/s390/kernel/early.c
arch/s390/kernel/entry.S
arch/s390/kernel/kprobes.c
arch/s390/kernel/nmi.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/smp.c
arch/s390/mm/extmem.c
arch/sh/include/asm/processor_32.h
arch/sh/kernel/signal_32.c
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/um/Kconfig
arch/um/Makefile
arch/um/drivers/Kconfig
arch/um/drivers/pcap_kern.c
arch/um/drivers/vector_kern.c
arch/um/drivers/vector_user.h
arch/um/drivers/virt-pci.c
arch/um/drivers/virtio_uml.c
arch/um/include/asm/processor-generic.h
arch/um/kernel/exec.c
arch/um/kernel/tlb.c
arch/um/kernel/um_arch.c
arch/um/kernel/vmlinux.lds.S
arch/um/os-Linux/irq.c
arch/um/os-Linux/skas/mem.c
arch/um/os-Linux/skas/process.c
arch/x86/Makefile.um
arch/x86/entry/entry_64.S
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/resctrl.h
arch/x86/include/asm/string_64.h
arch/x86/include/asm/unwind_hints.h
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/resctrl/rdtgroup.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/unwind_orc.c
arch/x86/um/vdso/Makefile
arch/x86/um/vdso/um_vdso.c
block/bio.c
block/blk-core.c
block/blk-iocost.c
block/blk-merge.c
block/blk-mq.c
block/blk-zoned.c
block/blk.h
block/genhd.c
block/ioctl.c
block/sed-opal.c
drivers/acpi/resource.c
drivers/acpi/x86/s2idle.c
drivers/acpi/x86/utils.c
drivers/ata/ahci.c
drivers/auxdisplay/hd44780.c
drivers/base/bus.c
drivers/base/core.c
drivers/base/platform-msi.c
drivers/block/loop.c
drivers/block/rbd.c
drivers/block/ublk_drv.c
drivers/char/random.c
drivers/cpufreq/amd-pstate.c
drivers/cpufreq/apple-soc-cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamalg_qi.c
drivers/crypto/caam/qi.c
drivers/gpu/drm/amd/amdgpu/Kconfig
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_events.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
drivers/gpu/drm/amd/display/Kconfig
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/drm_gem_shmem_helper.c
drivers/gpu/drm/i915/Kconfig
drivers/gpu/drm/i915/display/intel_quirks.c
drivers/gpu/drm/i915/gt/intel_gt_mcr.c
drivers/gpu/drm/i915/gt/intel_ring.c
drivers/gpu/drm/i915/gvt/debugfs.c
drivers/gpu/drm/i915/gvt/firmware.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/vgpu.c
drivers/gpu/drm/msm/msm_fbdev.c
drivers/gpu/drm/omapdrm/omap_fbdev.c
drivers/gpu/drm/radeon/atombios_encoders.c
drivers/hid/hid-core.c
drivers/hid/hid-cp2112.c
drivers/hid/hid-logitech-hidpp.c
drivers/hid/intel-ish-hid/ipc/ipc.c
drivers/hid/uhid.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-gxp.c
drivers/i3c/master.c
drivers/i3c/master/dw-i3c-master.c
drivers/mtd/ubi/block.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/debug.c
drivers/mtd/ubi/eba.c
drivers/mtd/ubi/fastmap-wl.c
drivers/mtd/ubi/fastmap.c
drivers/mtd/ubi/kapi.c
drivers/mtd/ubi/misc.c
drivers/mtd/ubi/vmt.c
drivers/mtd/ubi/wl.c
drivers/net/dsa/mt7530.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/amazon/ena/ena_ethtool.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/fealnx.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/ice/ice_dcb.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c
drivers/net/ipvlan/ipvlan_l3s.c
drivers/net/phy/nxp-c45-tja11xx.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/wireguard/queueing.h
drivers/nfc/pn533/usb.c
drivers/nvme/host/auth.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/tcp.c
drivers/pci/msi/api.c
drivers/perf/riscv_pmu_sbi.c
drivers/platform/mellanox/Kconfig
drivers/platform/x86/Kconfig
drivers/platform/x86/amd/pmc.c
drivers/platform/x86/dell/dell-wmi-ddv.c
drivers/platform/x86/intel/int3472/tps68470_board_data.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.h
drivers/platform/x86/intel/tpmi.c
drivers/platform/x86/mlx-platform.c
drivers/power/supply/power_supply_core.c
drivers/power/supply/qcom_battmgr.c
drivers/powercap/intel_rapl_msr.c
drivers/powercap/powercap_sys.c
drivers/pwm/pwm-ab8500.c
drivers/pwm/pwm-dwc.c
drivers/pwm/pwm-iqs620a.c
drivers/pwm/pwm-lp3943.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-stm32-lp.c
drivers/regulator/core.c
drivers/regulator/max597x-regulator.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/interface.c
drivers/rtc/rtc-ab-eoz9.c
drivers/rtc/rtc-abx80x.c
drivers/rtc/rtc-brcmstb-waketimer.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-efi.c
drivers/rtc/rtc-hym8563.c
drivers/rtc/rtc-isl12022.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-max8907.c
drivers/rtc/rtc-moxart.c
drivers/rtc/rtc-nxp-bbnsm.c [new file with mode: 0644]
drivers/rtc/rtc-pcf2123.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pcf8523.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pcf8563.c
drivers/rtc/rtc-pm8xxx.c
drivers/rtc/rtc-rv3028.c
drivers/rtc/rtc-rv3029c2.c
drivers/rtc/rtc-rv3032.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8010.c
drivers/rtc/rtc-sun6i.c
drivers/s390/crypto/ap_queue.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/s390/scsi/zfcp_dbf.c
drivers/s390/scsi/zfcp_def.h
drivers/s390/scsi/zfcp_ext.h
drivers/s390/scsi/zfcp_fsf.c
drivers/s390/scsi/zfcp_qdio.h
drivers/s390/scsi/zfcp_reqlist.h
drivers/s390/scsi/zfcp_scsi.c
drivers/scsi/cxgbi/libcxgbi.h
drivers/scsi/hosts.c
drivers/scsi/ipr.c
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_mbox.c
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/mpi3mr/mpi3mr.h
drivers/scsi/mpi3mr/mpi3mr_app.c
drivers/scsi/mpi3mr/mpi3mr_fw.c
drivers/scsi/mpi3mr/mpi3mr_os.c
drivers/scsi/mpi3mr/mpi3mr_transport.c
drivers/scsi/qedi/qedi_dbg.h
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_fc.c
drivers/scsi/sd.c
drivers/scsi/sd_dif.c
drivers/scsi/ses.c
drivers/sh/clk/core.c
drivers/spi/spi-cadence-quadspi.c
drivers/spi/spi-sn-f-ospi.c
drivers/spi/spi-tegra210-quad.c
drivers/thermal/intel/Kconfig
drivers/thermal/intel/intel_quark_dts_thermal.c
drivers/tty/vt/vc_screen.c
drivers/ufs/core/ufshcd.c
drivers/ufs/host/Kconfig
drivers/ufs/host/ufs-mediatek.c
drivers/virt/coco/sev-guest/sev-guest.c
drivers/watchdog/Kconfig
drivers/watchdog/apple_wdt.c
drivers/watchdog/armada_37xx_wdt.c
drivers/watchdog/aspeed_wdt.c
drivers/watchdog/at91rm9200_wdt.c
drivers/watchdog/at91sam9_wdt.c
drivers/watchdog/bcm7038_wdt.c
drivers/watchdog/cadence_wdt.c
drivers/watchdog/da9062_wdt.c
drivers/watchdog/da9063_wdt.c
drivers/watchdog/davinci_wdt.c
drivers/watchdog/dw_wdt.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/imgpdc_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/imx7ulp_wdt.c
drivers/watchdog/lpc18xx_wdt.c
drivers/watchdog/meson_gxbb_wdt.c
drivers/watchdog/mt7621_wdt.c
drivers/watchdog/mtk_wdt.c
drivers/watchdog/of_xilinx_wdt.c
drivers/watchdog/pcwd_usb.c
drivers/watchdog/pic32-dmt.c
drivers/watchdog/pic32-wdt.c
drivers/watchdog/pnx4008_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/realtek_otto_wdt.c
drivers/watchdog/rtd119x_wdt.c
drivers/watchdog/rzg2l_wdt.c
drivers/watchdog/rzn1_wdt.c
drivers/watchdog/sbsa_gwdt.c
drivers/watchdog/visconti_wdt.c
drivers/watchdog/watchdog_dev.c
drivers/watchdog/wdat_wdt.c
drivers/watchdog/ziirave_wdt.c
fs/9p/v9fs.c
fs/9p/vfs_addr.c
fs/9p/vfs_dir.c
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/ceph/file.c
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/misc.c
fs/cifs/smb2inode.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smbdirect.c
fs/cramfs/inode.c
fs/exfat/dir.c
fs/exfat/exfat_fs.h
fs/exfat/exfat_raw.h
fs/exfat/fatent.c
fs/exfat/file.c
fs/exfat/inode.c
fs/exfat/namei.c
fs/exfat/super.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/fast_commit.c
fs/ext4/file.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/iostat.c
fs/f2fs/iostat.h
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/verity.c
fs/hfsplus/super.c
fs/hostfs/hostfs_kern.c
fs/jbd2/transaction.c
fs/jffs2/compr.c
fs/jffs2/compr.h
fs/jffs2/file.c
fs/jffs2/fs.c
fs/jfs/jfs_dmap.c
fs/netfs/iterator.c
fs/ocfs2/move_extents.c
fs/open.c
fs/proc/array.c
fs/ubifs/budget.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/io.c
fs/ubifs/journal.c
fs/ubifs/super.c
fs/ubifs/sysfs.c
fs/ubifs/tnc.c
fs/ubifs/ubifs.h
fs/udf/inode.c
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_ag.h
fs/xfs/libxfs/xfs_ag_resv.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_alloc_btree.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_ialloc_btree.h
fs/xfs/libxfs/xfs_refcount_btree.c
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/scrub/agheader_repair.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/common.c
fs/xfs/scrub/fscounters.c
fs/xfs/scrub/repair.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_discard.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_filestream.h
fs/xfs/xfs_fsmap.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_iwalk.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_reflink.c
fs/xfs/xfs_super.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
include/acpi/platform/acenv.h
include/acpi/platform/acenvex.h
include/acpi/platform/acintel.h [deleted file]
include/linux/bcd.h
include/linux/blk-mq.h
include/linux/blkdev.h
include/linux/capability.h
include/linux/compiler-intel.h [deleted file]
include/linux/compiler_attributes.h
include/linux/compiler_types.h
include/linux/cpumask.h
include/linux/f2fs_fs.h
include/linux/hid.h
include/linux/io_uring_types.h
include/linux/msi.h
include/linux/mtd/ubi.h
include/linux/objtool.h
include/linux/pwm.h
include/linux/rmap.h
include/linux/sh_intc.h
include/net/xdp.h
include/scsi/scsi_device.h
include/scsi/scsi_transport_fc.h
include/sound/soc-component.h
include/trace/events/f2fs.h
include/uapi/drm/amdgpu_drm.h
include/uapi/linux/elf.h
include/uapi/linux/netdev.h
include/uapi/linux/sed-opal.h
include/uapi/scsi/scsi_bsg_mpi3mr.h
include/ufs/ufshcd.h
io_uring/fdinfo.c
io_uring/io_uring.c
io_uring/kbuf.c
io_uring/net.c
io_uring/poll.c
io_uring/poll.h
io_uring/rsrc.c
io_uring/slist.h
io_uring/tctx.c
kernel/auditsc.c
kernel/capability.c
kernel/irq/ipi.c
kernel/irq/irqdesc.c
kernel/irq/irqdomain.c
kernel/irq/msi.c
kernel/panic.c
kernel/sched/cpufreq_schedutil.c
kernel/trace/blktrace.c
kernel/umh.c
lib/Kconfig.kasan
lib/Makefile
lib/cpumask_kunit.c
lib/kunit/Makefile
lib/parser.c
lib/zlib_deflate/defutil.h
mm/damon/paddr.c
mm/kasan/Makefile
mm/kasan/kasan.h
mm/kasan/kasan_test.c
mm/kasan/shadow.c
mm/memory-failure.c
mm/migrate.c
mm/mmap.c
mm/rmap.c
net/9p/client.c
net/9p/trans_rdma.c
net/9p/trans_xen.c
net/core/xdp.c
net/ipv4/tcp_output.c
net/mac80211/cfg.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/smc/af_smc.c
net/socket.c
net/sunrpc/auth_gss/gss_krb5_test.c
net/unix/af_unix.c
net/wireless/nl80211.c
rust/bindgen_parameters
samples/kprobes/kprobe_example.c
scripts/Makefile.kasan
scripts/cc-version.sh
scripts/coccicheck
scripts/coccinelle/api/atomic_as_refcounter.cocci
scripts/min-tool-version.sh
security/apparmor/policy_unpack.c
security/commoncap.c
sound/pci/hda/patch_realtek.c
sound/pci/ice1712/aureon.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/apple/mca.c
sound/soc/atmel/mchp-pdmc.c
sound/soc/atmel/sam9g20_wm8731.c
sound/soc/codecs/Kconfig
sound/soc/codecs/adau7118.c
sound/soc/codecs/da7219-aad.c
sound/soc/codecs/da7219-aad.h
sound/soc/codecs/mt6358.c
sound/soc/codecs/sma1303.c
sound/soc/intel/boards/sof_rt5682.c
sound/soc/intel/common/soc-acpi-intel-mtl-match.c
sound/soc/mediatek/mt8183/mt8183-dai-i2s.c
sound/soc/mediatek/mt8188/mt8188-dai-etdm.c
sound/soc/mediatek/mt8192/mt8192-dai-adda.c
sound/soc/mediatek/mt8195/mt8195-dai-etdm.c
sound/soc/sh/rcar/adg.c
sound/soc/soc-pcm.c
tools/arch/loongarch/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/arch/x86/include/asm/orc_types.h
tools/include/linux/objtool.h
tools/include/uapi/linux/netdev.h
tools/net/ynl/lib/__init__.py
tools/net/ynl/lib/nlspec.py
tools/net/ynl/lib/ynl.py
tools/net/ynl/ynl-gen-c.py
tools/objtool/.gitignore
tools/objtool/Build
tools/objtool/Documentation/objtool.txt
tools/objtool/Makefile
tools/objtool/arch/powerpc/decode.c
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-check.c
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/include/objtool/arch.h
tools/objtool/include/objtool/builtin.h
tools/objtool/include/objtool/cfi.h
tools/objtool/include/objtool/check.h
tools/objtool/include/objtool/elf.h
tools/objtool/include/objtool/objtool.h
tools/objtool/include/objtool/special.h
tools/objtool/objtool.c
tools/objtool/orc_dump.c
tools/objtool/orc_gen.c
tools/objtool/special.c
tools/scripts/Makefile.arch
tools/testing/selftests/bpf/progs/test_deny_namespace.c
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
tools/testing/selftests/hid/config
tools/testing/selftests/net/mptcp/userspace_pm.sh
tools/testing/selftests/seccomp/seccomp_bpf.c

index 2c61b4553374b994be3a795cd8600b930d5e9213..d988e9fa9b265324df3f8d637f7d21fbd6f08776 100644 (file)
@@ -226,7 +226,6 @@ ForEachMacros:
   - 'for_each_console_srcu'
   - 'for_each_cpu'
   - 'for_each_cpu_and'
-  - 'for_each_cpu_not'
   - 'for_each_cpu_wrap'
   - 'for_each_dapm_widgets'
   - 'for_each_dedup_cand'
index 4f37ff13a3464294680e59b9011d325c58753201..438d79c2771729fba67d38f48cf88195fdcbbbed 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -121,6 +121,7 @@ Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@gmail.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com>
 <dev.kurt@vandijck-laurijssen.be> <kurt.van.dijck@eia.be>
+Dikshita Agarwal <dikshita@qti.qualcomm.com> <dikshita@codeaurora.org>
 Dmitry Baryshkov <dbaryshkov@gmail.com>
 Dmitry Baryshkov <dbaryshkov@gmail.com> <[dbaryshkov@gmail.com]>
 Dmitry Baryshkov <dbaryshkov@gmail.com> <dmitry_baryshkov@mentor.com>
@@ -150,6 +151,7 @@ Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
 Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
 Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
 Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
+Georgi Djakov <djakov@kernel.org> <georgi.djakov@linaro.org>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com>
@@ -208,6 +210,9 @@ Jens Axboe <axboe@suse.de>
 Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
 Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
 Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
+Jiri Pirko <jiri@resnulli.us> <jiri@nvidia.com>
+Jiri Pirko <jiri@resnulli.us> <jiri@mellanox.com>
+Jiri Pirko <jiri@resnulli.us> <jpirko@redhat.com>
 Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
@@ -446,6 +451,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
 Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
 Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
+Vikash Garodia <quic_vgarodia@quicinc.com> <vgarodia@codeaurora.org>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
 Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
index e434fc523291d85b10503c45d394032d7359d086..7c81f0a25a48727a477de3d712155535cca92728 100644 (file)
@@ -437,7 +437,8 @@ What:               /sys/class/power_supply/<supply_name>/present
 Date:          May 2007
 Contact:       linux-pm@vger.kernel.org
 Description:
-               Reports whether a battery is present or not in the system.
+               Reports whether a battery is present or not in the system. If the
+               property does not exist, the battery is considered to be present.
 
                Access: Read
 
index 585caecda3a5fc3a533360eea7ac95589b9c2545..94fb7461595123733b524b7c43ace4c31ab4c339 100644 (file)
@@ -6,6 +6,19 @@ Description:
                device at boot. It is equivalent to WDIOC_GETBOOTSTATUS of
                ioctl interface.
 
+What:          /sys/class/watchdog/watchdogn/options
+Date:          April 2023
+Contact:       Thomas Weißschuh
+Description:
+               It is a read only file. It contains options of watchdog device.
+
+What:          /sys/class/watchdog/watchdogn/fw_version
+Date:          April 2023
+Contact:       Thomas Weißschuh
+Description:
+               It is a read only file. It contains firmware version of
+               watchdog device.
+
 What:          /sys/class/watchdog/watchdogn/identity
 Date:          August 2015
 Contact:       Wim Van Sebroeck <wim@iguana.be>
index 9e3756625a8194d92761ee3e286c84aa273e5fa5..94132745ecbe7709f32ef7c19093413e3053bc29 100644 (file)
@@ -49,16 +49,23 @@ Contact:    "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
 Description:   Controls the in-place-update policy.
                updates in f2fs. User can set:
 
-               ====  =================
-               0x01  F2FS_IPU_FORCE
-               0x02  F2FS_IPU_SSR
-               0x04  F2FS_IPU_UTIL
-               0x08  F2FS_IPU_SSR_UTIL
-               0x10  F2FS_IPU_FSYNC
-               0x20  F2FS_IPU_ASYNC
-               0x40  F2FS_IPU_NOCACHE
-               0x80  F2FS_IPU_HONOR_OPU_WRITE
-               ====  =================
+               ===== =============== ===================================================
+               value policy          description
+               0x00  DISABLE         disable IPU(=default option in LFS mode)
+               0x01  FORCE           all the time
+               0x02  SSR             if SSR mode is activated
+               0x04  UTIL            if FS utilization is over threashold
+               0x08  SSR_UTIL        if SSR mode is activated and FS utilization is over
+                                     threashold
+               0x10  FSYNC           activated in fsync path only for high performance
+                                     flash storages. IPU will be triggered only if the
+                                     # of dirty pages over min_fsync_blocks.
+                                     (=default option)
+               0x20  ASYNC           do IPU given by asynchronous write requests
+               0x40  NOCACHE         disable IPU bio cache
+               0x80  HONOR_OPU_WRITE use OPU write prior to IPU write if inode has
+                                     FI_OPU_WRITE flag
+               ===== =============== ===================================================
 
                Refer segment.h for details.
 
@@ -669,3 +676,56 @@ Contact:   "Ping Xiong" <xiongping1@xiaomi.com>
 Description:   When DATA SEPARATION is on, it controls the age threshold to indicate
                the data blocks as warm. By default it was initialized as 2621440 blocks
                (equals to 10GB).
+
+What:          /sys/fs/f2fs/<disk>/fault_rate
+Date:          May 2016
+Contact:       "Sheng Yong" <shengyong@oppo.com>
+Contact:       "Chao Yu" <chao@kernel.org>
+Description:   Enable fault injection in all supported types with
+               specified injection rate.
+
+What:          /sys/fs/f2fs/<disk>/fault_type
+Date:          May 2016
+Contact:       "Sheng Yong" <shengyong@oppo.com>
+Contact:       "Chao Yu" <chao@kernel.org>
+Description:   Support configuring fault injection type, should be
+               enabled with fault_injection option, fault type value
+               is shown below, it supports single or combined type.
+
+               ===================      ===========
+               Type_Name                Type_Value
+               ===================      ===========
+               FAULT_KMALLOC            0x000000001
+               FAULT_KVMALLOC           0x000000002
+               FAULT_PAGE_ALLOC         0x000000004
+               FAULT_PAGE_GET           0x000000008
+               FAULT_ALLOC_BIO          0x000000010 (obsolete)
+               FAULT_ALLOC_NID          0x000000020
+               FAULT_ORPHAN             0x000000040
+               FAULT_BLOCK              0x000000080
+               FAULT_DIR_DEPTH          0x000000100
+               FAULT_EVICT_INODE        0x000000200
+               FAULT_TRUNCATE           0x000000400
+               FAULT_READ_IO            0x000000800
+               FAULT_CHECKPOINT         0x000001000
+               FAULT_DISCARD            0x000002000
+               FAULT_WRITE_IO           0x000004000
+               FAULT_SLAB_ALLOC         0x000008000
+               FAULT_DQUOT_INIT         0x000010000
+               FAULT_LOCK_OP            0x000020000
+               FAULT_BLKADDR            0x000040000
+               ===================      ===========
+
+What:          /sys/fs/f2fs/<disk>/discard_io_aware_gran
+Date:          January 2023
+Contact:       "Yangtao Li" <frank.li@vivo.com>
+Description:   Controls background discard granularity of inner discard thread
+               when is not in idle. Inner thread will not issue discards with size that
+               is smaller than granularity. The unit size is one block(4KB), now only
+               support configuring in range of [0, 512].
+               Default: 512
+
+What:          /sys/fs/f2fs/<disk>/last_age_weight
+Date:          January 2023
+Contact:       "Ping Xiong" <xiongping1@xiaomi.com>
+Description:   When DATA SEPARATION is on, it controls the weight of last data block age.
index 3fe6511c54050dfc128fb5a9bc1aa01bc03aa1be..4d186f599d90f722978b33998df5e27e149aead5 100644 (file)
@@ -479,8 +479,16 @@ Spectre variant 2
    On Intel Skylake-era systems the mitigation covers most, but not all,
    cases. See :ref:`[3] <spec_ref3>` for more details.
 
-   On CPUs with hardware mitigation for Spectre variant 2 (e.g. Enhanced
-   IBRS on x86), retpoline is automatically disabled at run time.
+   On CPUs with hardware mitigation for Spectre variant 2 (e.g. IBRS
+   or enhanced IBRS on x86), retpoline is automatically disabled at run time.
+
+   Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
+   boot, by setting the IBRS bit, and they're automatically protected against
+   Spectre v2 variant attacks, including cross-thread branch target injections
+   on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
+
+   Legacy IBRS systems clear the IBRS bit on exit to userspace and
+   therefore explicitly enable STIBP for that
 
    The retpoline mitigation is turned on by default on vulnerable
    CPUs. It can be forced on or off by the administrator
@@ -504,9 +512,12 @@ Spectre variant 2
    For Spectre variant 2 mitigation, individual user programs
    can be compiled with return trampolines for indirect branches.
    This protects them from consuming poisoned entries in the branch
-   target buffer left by malicious software.  Alternatively, the
-   programs can disable their indirect branch speculation via prctl()
-   (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+   target buffer left by malicious software.
+
+   On legacy IBRS systems, at return to userspace, implicit STIBP is disabled
+   because the kernel clears the IBRS bit. In this case, the userspace programs
+   can disable indirect branch speculation via prctl() (See
+   :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
    On x86, this will turn on STIBP to guard against attacks from the
    sibling thread when the user program is running, and use IBPB to
    flush the branch target buffer when switching to/from the program.
diff --git a/Documentation/devicetree/bindings/power/supply/richtek,rt9467-charger.yaml b/Documentation/devicetree/bindings/power/supply/richtek,rt9467-charger.yaml
deleted file mode 100644 (file)
index 92c5706..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/power/supply/richtek,rt9467-charger.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Richtek RT9467 Switching Battery Charger with Power Path Management
-
-maintainers:
-  - ChiYuan Huang <cy_huang@richtek.com>
-  - ChiaEn Wu <chiaen_wu@richtek.com>
-
-description: |
-  RT9467 is a switch-mode single cell Li-Ion/Li-Polymer battery charger for
-  portable applications. It integrates a synchronous PWM controller, power
-  MOSFETs, input current sensing and regulation, high-accuracy voltage
-  regulation, and charge termination. The charge current is regulated through
-  integrated sensing resistors.
-
-  The RT9467 also features USB On-The-Go (OTG) support. It also integrates
-  D+/D- pin for USB host/charging port detection.
-
-  Datasheet is available at
-  https://www.richtek.com/assets/product_file/RT9467/DS9467-01.pdf
-
-properties:
-  compatible:
-    const: richtek,rt9467-charger
-
-  reg:
-    maxItems: 1
-
-  wakeup-source: true
-
-  interrupts:
-    maxItems: 1
-
-  charge-enable-gpios:
-    description: GPIO is used to turn on and off charging.
-    maxItems: 1
-
-  usb-otg-vbus-regulator:
-    type: object
-    description: OTG boost regulator.
-    unevaluatedProperties: false
-    $ref: /schemas/regulator/regulator.yaml#
-
-    properties:
-      enable-gpios: true
-
-required:
-  - compatible
-  - reg
-  - wakeup-source
-  - interrupts
-
-additionalProperties: false
-
-examples:
-  - |
-    #include <dt-bindings/interrupt-controller/irq.h>
-    #include <dt-bindings/gpio/gpio.h>
-    i2c {
-      #address-cells = <1>;
-      #size-cells = <0>;
-
-      charger@5b {
-        compatible = "richtek,rt9467-charger";
-        reg = <0x5b>;
-        wakeup-source;
-        interrupts-extended = <&gpio_intc 32 IRQ_TYPE_LEVEL_LOW>;
-        charge-enable-gpios = <&gpio26 1 GPIO_ACTIVE_LOW>;
-
-        rt9467_otg_vbus: usb-otg-vbus-regulator {
-          regulator-name = "rt9467-usb-otg-vbus";
-          regulator-min-microvolt = <4425000>;
-          regulator-max-microvolt = <5825000>;
-          regulator-min-microamp = <500000>;
-          regulator-max-microamp = <3000000>;
-        };
-      };
-    };
diff --git a/Documentation/devicetree/bindings/power/supply/richtek,rt9467.yaml b/Documentation/devicetree/bindings/power/supply/richtek,rt9467.yaml
new file mode 100644 (file)
index 0000000..3723717
--- /dev/null
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/richtek,rt9467.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT9467 Switching Battery Charger with Power Path Management
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+  - ChiaEn Wu <chiaen_wu@richtek.com>
+
+description: |
+  RT9467 is a switch-mode single cell Li-Ion/Li-Polymer battery charger for
+  portable applications. It integrates a synchronous PWM controller, power
+  MOSFETs, input current sensing and regulation, high-accuracy voltage
+  regulation, and charge termination. The charge current is regulated through
+  integrated sensing resistors.
+
+  The RT9467 also features USB On-The-Go (OTG) support. It also integrates
+  D+/D- pin for USB host/charging port detection.
+
+  Datasheet is available at
+  https://www.richtek.com/assets/product_file/RT9467/DS9467-01.pdf
+
+properties:
+  compatible:
+    const: richtek,rt9467
+
+  reg:
+    maxItems: 1
+
+  wakeup-source: true
+
+  interrupts:
+    maxItems: 1
+
+  charge-enable-gpios:
+    description: GPIO is used to turn on and off charging.
+    maxItems: 1
+
+  usb-otg-vbus-regulator:
+    type: object
+    description: OTG boost regulator.
+    unevaluatedProperties: false
+    $ref: /schemas/regulator/regulator.yaml#
+
+    properties:
+      enable-gpios: true
+
+required:
+  - compatible
+  - reg
+  - wakeup-source
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      charger@5b {
+        compatible = "richtek,rt9467";
+        reg = <0x5b>;
+        wakeup-source;
+        interrupts-extended = <&gpio_intc 32 IRQ_TYPE_LEVEL_LOW>;
+        charge-enable-gpios = <&gpio26 1 GPIO_ACTIVE_LOW>;
+
+        rt9467_otg_vbus: usb-otg-vbus-regulator {
+          regulator-name = "rt9467-usb-otg-vbus";
+          regulator-min-microvolt = <4425000>;
+          regulator-max-microvolt = <5825000>;
+          regulator-min-microamp = <500000>;
+          regulator-max-microamp = <3000000>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml b/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml
new file mode 100644 (file)
index 0000000..dbc974b
--- /dev/null
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/mediatek,mt2712-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PWM Controller
+
+maintainers:
+  - John Crispin <john@phrozen.org>
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - mediatek,mt2712-pwm
+          - mediatek,mt6795-pwm
+          - mediatek,mt7622-pwm
+          - mediatek,mt7623-pwm
+          - mediatek,mt7628-pwm
+          - mediatek,mt7629-pwm
+          - mediatek,mt8183-pwm
+          - mediatek,mt8365-pwm
+          - mediatek,mt8516-pwm
+      - items:
+          - enum:
+              - mediatek,mt8195-pwm
+          - const: mediatek,mt8183-pwm
+
+  reg:
+    maxItems: 1
+
+  "#pwm-cells":
+    const: 2
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 10
+
+  clock-names:
+    description:
+      This controller needs two input clocks for its core and one
+      clock for each PWM output.
+    minItems: 2
+    items:
+      - const: top
+      - const: main
+      - const: pwm1
+      - const: pwm2
+      - const: pwm3
+      - const: pwm4
+      - const: pwm5
+      - const: pwm6
+      - const: pwm7
+      - const: pwm8
+
+required:
+  - compatible
+  - reg
+  - "#pwm-cells"
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mt2712-clk.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    pwm0: pwm@11006000 {
+        compatible = "mediatek,mt2712-pwm";
+        reg = <0x11006000 0x1000>;
+        #pwm-cells = <2>;
+        interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&topckgen CLK_TOP_PWM_SEL>, <&pericfg CLK_PERI_PWM>,
+                 <&pericfg CLK_PERI_PWM0>, <&pericfg CLK_PERI_PWM1>,
+                 <&pericfg CLK_PERI_PWM2>, <&pericfg CLK_PERI_PWM3>,
+                 <&pericfg CLK_PERI_PWM4>, <&pericfg CLK_PERI_PWM5>,
+                 <&pericfg CLK_PERI_PWM6>, <&pericfg CLK_PERI_PWM7>;
+        clock-names = "top", "main",
+                      "pwm1", "pwm2",
+                      "pwm3", "pwm4",
+                      "pwm5", "pwm6",
+                      "pwm7", "pwm8";
+    };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
deleted file mode 100644 (file)
index 554c96b..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-MediaTek PWM controller
-
-Required properties:
- - compatible: should be "mediatek,<name>-pwm":
-   - "mediatek,mt2712-pwm": found on mt2712 SoC.
-   - "mediatek,mt6795-pwm": found on mt6795 SoC.
-   - "mediatek,mt7622-pwm": found on mt7622 SoC.
-   - "mediatek,mt7623-pwm": found on mt7623 SoC.
-   - "mediatek,mt7628-pwm": found on mt7628 SoC.
-   - "mediatek,mt7629-pwm": found on mt7629 SoC.
-   - "mediatek,mt8183-pwm": found on mt8183 SoC.
-   - "mediatek,mt8195-pwm", "mediatek,mt8183-pwm": found on mt8195 SoC.
-   - "mediatek,mt8365-pwm": found on mt8365 SoC.
-   - "mediatek,mt8516-pwm": found on mt8516 SoC.
- - reg: physical base address and length of the controller's registers.
- - #pwm-cells: must be 2. See pwm.yaml in this directory for a description of
-   the cell format.
- - clocks: phandle and clock specifier of the PWM reference clock.
- - clock-names: must contain the following, except for MT7628 which
-                has no clocks
-   - "top": the top clock generator
-   - "main": clock used by the PWM core
-   - "pwm1-3": the three per PWM clocks for mt8365
-   - "pwm1-8": the eight per PWM clocks for mt2712
-   - "pwm1-6": the six per PWM clocks for mt7622
-   - "pwm1-5": the five per PWM clocks for mt7623
-   - "pwm1"  : the PWM1 clock for mt7629
- - pinctrl-names: Must contain a "default" entry.
- - pinctrl-0: One property must exist for each entry in pinctrl-names.
-   See pinctrl/pinctrl-bindings.txt for details of the property values.
-
-Optional properties:
-- assigned-clocks: Reference to the PWM clock entries.
-- assigned-clock-parents: The phandle of the parent clock of PWM clock.
-
-Example:
-       pwm0: pwm@11006000 {
-               compatible = "mediatek,mt7623-pwm";
-               reg = <0 0x11006000 0 0x1000>;
-               #pwm-cells = <2>;
-               clocks = <&topckgen CLK_TOP_PWM_SEL>,
-                        <&pericfg CLK_PERI_PWM>,
-                        <&pericfg CLK_PERI_PWM1>,
-                        <&pericfg CLK_PERI_PWM2>,
-                        <&pericfg CLK_PERI_PWM3>,
-                        <&pericfg CLK_PERI_PWM4>,
-                        <&pericfg CLK_PERI_PWM5>;
-               clock-names = "top", "main", "pwm1", "pwm2",
-                             "pwm3", "pwm4", "pwm5";
-               pinctrl-names = "default";
-               pinctrl-0 = <&pwm0_pins>;
-       };
diff --git a/Documentation/devicetree/bindings/pwm/snps,dw-apb-timers-pwm2.yaml b/Documentation/devicetree/bindings/pwm/snps,dw-apb-timers-pwm2.yaml
new file mode 100644 (file)
index 0000000..9aabdb3
--- /dev/null
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/snps,dw-apb-timers-pwm2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DW-APB timers PWM controller
+
+maintainers:
+  - Ben Dooks <ben.dooks@sifive.com>
+
+description:
+  This describes the DesignWare APB timers module when used in the PWM
+  mode. The IP core can be generated with various options which can
+  control the functionality, the number of PWMs available and other
+  internal controls the designer requires.
+
+  The IP block has a version register so this can be used for detection
+  instead of having to encode the IP version number in the device tree
+  comaptible.
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    const: snps,dw-apb-timers-pwm2
+
+  reg:
+    maxItems: 1
+
+  "#pwm-cells":
+    const: 3
+
+  clocks:
+    items:
+      - description: Interface bus clock
+      - description: PWM reference clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: timer
+
+  snps,pwm-number:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: The number of PWM channels configured for this instance
+    enum: [1, 2, 3, 4, 5, 6, 7, 8]
+
+required:
+  - compatible
+  - reg
+  - "#pwm-cells"
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    pwm: pwm@180000 {
+      compatible = "snps,dw-apb-timers-pwm2";
+      reg = <0x180000 0x200>;
+      #pwm-cells = <3>;
+      clocks = <&bus>, <&timer>;
+      clock-names = "bus", "timer";
+    };
diff --git a/Documentation/devicetree/bindings/rtc/amlogic,meson-vrtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,meson-vrtc.yaml
new file mode 100644 (file)
index 0000000..a89865f
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/amlogic,meson-vrtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Virtual RTC (VRTC)
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+description: |
+  This is a Linux interface to an RTC managed by firmware, hence it's
+  virtual from a Linux perspective.  The interface is 1 register where
+  an alarm time (in seconds) is to be written.
+  The alarm register is a simple scratch register shared between the
+  application processors (AP) and the secure co-processor (SCP.)  When
+  the AP suspends, the SCP will use the value of this register to
+  program an always-on timer before going sleep. When the timer expires,
+  the SCP will wake up and will then wake the AP.
+
+allOf:
+  - $ref: rtc.yaml#
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson-vrtc
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    rtc@a8 {
+      compatible = "amlogic,meson-vrtc";
+      reg = <0x000a8 0x4>;
+    };
index 9fe079917a986e1bab5b0be6d61cb786c8309efb..c6c57636c729e305b230de91bf7c1416a80fbda5 100644 (file)
@@ -11,7 +11,8 @@ maintainers:
 
 description:
   The Broadcom STB wake-up timer provides a 27Mhz resolution timer, with the
-  ability to wake up the system from low-power suspend/standby modes.
+  ability to wake up the system from low-power suspend/standby modes and
+  optionally generate RTC alarm interrupts.
 
 allOf:
   - $ref: "rtc.yaml#"
@@ -24,8 +25,14 @@ properties:
     maxItems: 1
 
   interrupts:
-    description: the TIMER interrupt
-    maxItems: 1
+    minItems: 1
+    items:
+      - description: the TIMER interrupt
+      - description: the ALARM interrupt
+    description:
+      The TIMER interrupt wakes the system from low-power suspend/standby modes.
+      An ALARM interrupt may be specified to interrupt the CPU when an RTC alarm
+      is enabled.
 
   clocks:
     description: clock reference in the 27MHz domain
@@ -35,10 +42,10 @@ additionalProperties: false
 
 examples:
   - |
-    rtc@f0411580 {
+    rtc@f041a080 {
         compatible = "brcm,brcmstb-waketimer";
-        reg = <0xf0411580 0x14>;
-        interrupts = <0x3>;
-        interrupt-parent = <&aon_pm_l2_intc>;
+        reg = <0xf041a080 0x14>;
+        interrupts-extended = <&aon_pm_l2_intc 0x04>,
+                              <&upg_aux_aon_intr2_intc 0x08>;
         clocks = <&upg_fixed>;
     };
index af78b67b3da4d859cfc4e3ccf09c2e5e4e6cf5c4..de9879bdb3175a7e0f24304b5a084a8faa233c46 100644 (file)
@@ -11,6 +11,17 @@ maintainers:
 
 allOf:
   - $ref: rtc.yaml#
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - ingenic,jz4770-rtc
+                - ingenic,jz4780-rtc
+    then:
+      properties:
+        "#clock-cells": false
 
 properties:
   compatible:
@@ -39,6 +50,9 @@ properties:
   clock-names:
     const: rtc
 
+  "#clock-cells":
+    const: 0
+
   system-power-controller:
     description: |
       Indicates that the RTC is responsible for powering OFF
@@ -83,3 +97,18 @@ examples:
       clocks = <&cgu JZ4740_CLK_RTC>;
       clock-names = "rtc";
     };
+
+  - |
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
+    rtc: rtc@10003000 {
+      compatible = "ingenic,jz4780-rtc", "ingenic,jz4760-rtc";
+      reg = <0x10003000 0x4c>;
+
+      interrupt-parent = <&intc>;
+      interrupts = <32>;
+
+      clocks = <&cgu JZ4780_CLK_RTCLK>;
+      clock-names = "rtc";
+
+      #clock-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/rtc/microcrystal,rv3028.yaml b/Documentation/devicetree/bindings/rtc/microcrystal,rv3028.yaml
new file mode 100644 (file)
index 0000000..5ade5df
--- /dev/null
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/microcrystal,rv3028.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip RV-3028 RTC
+
+allOf:
+  - $ref: rtc.yaml#
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+  compatible:
+    const: microcrystal,rv3028
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  trickle-resistor-ohms:
+    enum:
+      - 3000
+      - 5000
+      - 9000
+      - 15000
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        rtc@51 {
+            compatible = "microcrystal,rv3028";
+            reg = <0x51>;
+            pinctrl-0 = <&rtc_nint_pins>;
+            interrupts-extended = <&gpio1 16 IRQ_TYPE_LEVEL_HIGH>;
+            trickle-resistor-ohms = <3000>;
+        };
+    };
+
+...
index c9d3ac1477feb84ccbad62a3527779bb78e05e8f..1374df7bf9d691b8e2856f64f9709e6f5b01f265 100644 (file)
@@ -3,15 +3,15 @@ MOXA ART real-time clock
 Required properties:
 
 - compatible : Should be "moxa,moxart-rtc"
-- gpio-rtc-sclk : RTC sclk gpio, with zero flags
-- gpio-rtc-data : RTC data gpio, with zero flags
-- gpio-rtc-reset : RTC reset gpio, with zero flags
+- rtc-sclk-gpios : RTC sclk gpio, with zero flags
+- rtc-data-gpios : RTC data gpio, with zero flags
+- rtc-reset-gpios : RTC reset gpio, with zero flags
 
 Example:
 
        rtc: rtc {
                compatible = "moxa,moxart-rtc";
-               gpio-rtc-sclk = <&gpio 5 0>;
-               gpio-rtc-data = <&gpio 6 0>;
-               gpio-rtc-reset = <&gpio 7 0>;
+               rtc-sclk-gpios = <&gpio 5 0>;
+               rtc-data-gpios = <&gpio 6 0>;
+               rtc-reset-gpios = <&gpio 7 0>;
        };
index cde7b1675ead4c50038d54e9465338f41f2f3682..a1148eb22c2450e9091e301eb9f1a42a2deefdc8 100644 (file)
@@ -14,7 +14,10 @@ maintainers:
 
 properties:
   compatible:
-    const: nxp,pcf2127
+    enum:
+      - nxp,pca2129
+      - nxp,pcf2127
+      - nxp,pcf2129
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85363.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf85363.yaml
new file mode 100644 (file)
index 0000000..52aa3e2
--- /dev/null
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/nxp,pcf85363.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Philips PCF85263/PCF85363 Real Time Clock
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+allOf:
+  - $ref: rtc.yaml#
+
+properties:
+  compatible:
+    enum:
+      - nxp,pcf85263
+      - nxp,pcf85363
+
+  reg:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  quartz-load-femtofarads:
+    description:
+      The capacitive load of the quartz(x-tal).
+    enum: [6000, 7000, 12500]
+    default: 7000
+
+  start-year: true
+  wakeup-source: true
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        rtc@51 {
+            compatible = "nxp,pcf85363";
+            reg = <0x51>;
+            #clock-cells = <0>;
+            quartz-load-femtofarads = <12500>;
+        };
+    };
index a98b72752349fa6bdb42bf2f9ad739f39a2d353f..22909a96123e58c8b8c78effb138a8c2cc6a5b41 100644 (file)
@@ -19,8 +19,6 @@ properties:
       - microcrystal,rv8564
       - nxp,pca8565
       - nxp,pcf8563
-      - nxp,pcf85263
-      - nxp,pcf85363
 
   reg:
     maxItems: 1
index 21c8ea08ff0a226c5d6a5d71dc4f3c64eb95f5b7..b95a69cc9ae0fef4e4111bade35faa4a7ddb339f 100644 (file)
@@ -40,6 +40,16 @@ properties:
     description:
       Indicates that the setting of RTC time is allowed by the host CPU.
 
+  nvmem-cells:
+    items:
+      - description:
+          four-byte nvmem cell holding a little-endian offset from the Unix
+          epoch representing the time when the RTC timer was last reset
+
+  nvmem-cell-names:
+    items:
+      - const: offset
+
   wakeup-source: true
 
 required:
@@ -69,6 +79,8 @@ examples:
           compatible = "qcom,pm8921-rtc";
           reg = <0x11d>;
           interrupts = <0x27 0>;
+          nvmem-cells = <&rtc_offset>;
+          nvmem-cell-names = "offset";
         };
       };
     };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt b/Documentation/devicetree/bindings/rtc/rtc-meson-vrtc.txt
deleted file mode 100644 (file)
index c014f54..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-* Amlogic Virtual RTC (VRTC)
-
-This is a Linux interface to an RTC managed by firmware, hence it's
-virtual from a Linux perspective.  The interface is 1 register where
-an alarm time (in seconds) is to be written.
-
-Required properties:
-- compatible: should be "amlogic,meson-vrtc"
-- reg: physical address for the alarm register
-
-The alarm register is a simple scratch register shared between the
-application processors (AP) and the secure co-processor (SCP.)  When
-the AP suspends, the SCP will use the value of this register to
-program an always-on timer before going sleep. When the timer expires,
-the SCP will wake up and will then wake the AP.
-
-Example:
-
-       vrtc: rtc@0a8 {
-               compatible = "amlogic,meson-vrtc";
-               reg = <0x0 0x000a8 0x0 0x4>;
-       };
index d9fc120c61cc30406e3f5a23473334efea2868c4..eb75861c28c32d44e8c1c729e3185f1dbc99a5b8 100644 (file)
@@ -47,14 +47,12 @@ properties:
       - isil,isl1218
       # Intersil ISL12022 Real-time Clock
       - isil,isl12022
-      # Real Time Clock Module with I2C-Bus
-      - microcrystal,rv3028
+      # Loongson-2K Socs/LS7A bridge Real-time Clock
+      - loongson,ls2x-rtc
       # Real Time Clock Module with I2C-Bus
       - microcrystal,rv3029
       # Real Time Clock
       - microcrystal,rv8523
-      - nxp,pca2129
-      - nxp,pcf2129
       # Real-time Clock Module
       - pericom,pt7c4338
       # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
index 40e3a202f443413ec50c102fbda2d4c3aaf2e65b..5c6ec08c7d247c88f0fcceb352a545f6a95f89fc 100644 (file)
@@ -23,6 +23,7 @@ properties:
       - enum:
           - apple,t6000-mca
           - apple,t8103-mca
+          - apple,t8112-mca
       - const: apple,mca
 
   reg:
index c4cf1e5ab84b03baeef34f706d5170eb79e9061f..9b40268537cb2a56f33f36e6e64ba282b71e2082 100644 (file)
@@ -67,6 +67,12 @@ properties:
     maxItems: 4
     uniqueItems: true
 
+  microchip,startup-delay-us:
+    description: |
+      Specifies the delay in microseconds that needs to be applied after
+      enabling the PDMC microphones to avoid unwanted noise due to microphones
+      not being ready.
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/timer/qcom,msm-timer.txt b/Documentation/devicetree/bindings/timer/qcom,msm-timer.txt
deleted file mode 100644 (file)
index 5e10c34..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-* MSM Timer
-
-Properties:
-
-- compatible : Should at least contain "qcom,msm-timer". More specific
-               properties specify which subsystem the timers are paired with.
-
-               "qcom,kpss-timer" - krait subsystem
-               "qcom,scss-timer" - scorpion subsystem
-
-- interrupts : Interrupts for the debug timer, the first general purpose
-               timer, and optionally a second general purpose timer, and
-               optionally as well, 2 watchdog interrupts, in that order.
-
-- reg : Specifies the base address of the timer registers.
-
-- clocks: Reference to the parent clocks, one per output clock. The parents
-          must appear in the same order as the clock names.
-
-- clock-names: The name of the clocks as free-form strings. They should be in
-               the same order as the clocks.
-
-- clock-frequency : The frequency of the debug timer and the general purpose
-                    timer(s) in Hz in that order.
-
-Optional:
-
-- cpu-offset : per-cpu offset used when the timer is accessed without the
-               CPU remapping facilities. The offset is
-               cpu-offset + (0x10000 * cpu-nr).
-
-Example:
-
-       timer@200a000 {
-               compatible = "qcom,scss-timer", "qcom,msm-timer";
-               interrupts = <1 1 0x301>,
-                            <1 2 0x301>,
-                            <1 3 0x301>,
-                            <1 4 0x301>,
-                            <1 5 0x301>;
-               reg = <0x0200a000 0x100>;
-               clock-frequency = <19200000>,
-                                 <32768>;
-               clocks = <&sleep_clk>;
-               clock-names = "sleep";
-               cpu-offset = <0x40000>;
-       };
diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson6-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson6-wdt.yaml
new file mode 100644 (file)
index 0000000..84732cb
--- /dev/null
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/amlogic,meson6-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson6 SoCs Watchdog timer
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+  - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - amlogic,meson6-wdt
+          - amlogic,meson8-wdt
+          - amlogic,meson8b-wdt
+      - items:
+          - const: amlogic,meson8m2-wdt
+          - const: amlogic,meson8b-wdt
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - interrupts
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    wdt: watchdog@c1109900 {
+        compatible = "amlogic,meson6-wdt";
+        reg = <0xc1109900 0x8>;
+        interrupts = <GIC_SPI 0 IRQ_TYPE_EDGE_RISING>;
+        timeout-sec = <10>;
+    };
index fb7695515be1a16bb7f9fce32b96e01a01de0ecf..181f0cc5b5bde2dfb5426169dc6d424d31032162 100644 (file)
@@ -9,9 +9,6 @@ title: Freescale i.MX Watchdog Timer (WDT) Controller
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
 
-allOf:
-  - $ref: "watchdog.yaml#"
-
 properties:
   compatible:
     oneOf:
@@ -55,11 +52,45 @@ properties:
       If present, the watchdog device is configured to assert its
       external reset (WDOG_B) instead of issuing a software reset.
 
+  fsl,suspend-in-wait:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      If present, the watchdog device is suspended in WAIT mode
+      (Suspend-to-Idle). Only supported on certain devices.
+
 required:
   - compatible
   - interrupts
   - reg
 
+allOf:
+  - $ref: watchdog.yaml#
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - fsl,imx25-wdt
+                - fsl,imx35-wdt
+                - fsl,imx50-wdt
+                - fsl,imx51-wdt
+                - fsl,imx53-wdt
+                - fsl,imx6q-wdt
+                - fsl,imx6sl-wdt
+                - fsl,imx6sll-wdt
+                - fsl,imx6sx-wdt
+                - fsl,imx6ul-wdt
+                - fsl,imx7d-wdt
+                - fsl,imx8mm-wdt
+                - fsl,imx8mn-wdt
+                - fsl,imx8mp-wdt
+                - fsl,imx8mq-wdt
+                - fsl,vf610-wdt
+    then:
+      properties:
+        fsl,suspend-in-wait: false
+
 unevaluatedProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/watchdog/gpio-wdt.yaml b/Documentation/devicetree/bindings/watchdog/gpio-wdt.yaml
new file mode 100644 (file)
index 0000000..155dc79
--- /dev/null
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/gpio-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO controlled watchdog
+
+maintainers:
+  - Robert Marko <robert.marko@sartura.hr>
+
+properties:
+  compatible:
+    const: linux,wdt-gpio
+
+  gpios:
+    maxItems: 1
+    description: GPIO connected to the WDT reset pin
+
+  hw_algo:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: Algorithm used by the driver
+    oneOf:
+      - description:
+          Either a high-to-low or a low-to-high transition clears the WDT counter.
+          The watchdog timer is disabled when GPIO is left floating or connected
+          to a three-state buffer.
+        const: toggle
+      - description:
+          Low or high level starts counting WDT timeout, the opposite level
+          disables the WDT.
+          Active level is determined by the GPIO flags.
+        const: level
+
+  hw_margin_ms:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Maximum time to reset watchdog circuit (in milliseconds)
+    minimum: 2
+    maximum: 65535
+
+  always-running:
+    type: boolean
+    description:
+      If the watchdog timer cannot be disabled, add this flag to have the driver
+      keep toggling the signal without a client.
+      It will only cease to toggle the signal when the device is open and the
+      timeout elapsed.
+
+required:
+  - compatible
+  - gpios
+  - hw_algo
+  - hw_margin_ms
+
+unevaluatedProperties: false
index b2b17fdf4e3981400d1ff724c3c577888e52e9e5..a668d0c2f14b88d2066e7f2748bb5ab09a56cb43 100644 (file)
@@ -19,6 +19,12 @@ properties:
   reg:
     maxItems: 1
 
+  mediatek,sysctl:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      phandle to system controller 'sysc' syscon node which
+      controls system registers
+
 required:
   - compatible
   - reg
@@ -30,4 +36,5 @@ examples:
     watchdog@100 {
       compatible = "mediatek,mt7621-wdt";
       reg = <0x100 0x100>;
+      mediatek,sysctl = <&sysc>;
     };
index b3605608410c63e4ef5eb21ea02b47facab91680..55b34461df1bb49f9a8a851dbc14b989cd9c9113 100644 (file)
@@ -52,6 +52,12 @@ properties:
     description: Disable sending output reset signal
     type: boolean
 
+  mediatek,reset-by-toprgu:
+    description: The Top Reset Generation Unit (TOPRGU) generates reset signals
+      and distributes them to each IP. If present, the watchdog timer will be
+      reset by TOPRGU once system resets.
+    type: boolean
+
   '#reset-cells':
     const: 1
 
diff --git a/Documentation/devicetree/bindings/watchdog/meson-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson-wdt.txt
deleted file mode 100644 (file)
index 7588cc3..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-Meson SoCs Watchdog timer
-
-Required properties:
-
-- compatible : depending on the SoC this should be one of:
-       "amlogic,meson6-wdt" on Meson6 SoCs
-       "amlogic,meson8-wdt" and "amlogic,meson6-wdt" on Meson8 SoCs
-       "amlogic,meson8b-wdt" on Meson8b SoCs
-       "amlogic,meson8m2-wdt" and "amlogic,meson8b-wdt" on Meson8m2 SoCs
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-- timeout-sec: contains the watchdog timeout in seconds.
-
-Example:
-
-wdt: watchdog@c1109900 {
-       compatible = "amlogic,meson6-wdt";
-       reg = <0xc1109900 0x8>;
-       timeout-sec = <10>;
-};
index d8ac0be36e6c809e7308192113e113951cdba9f5..6448b633c970238894f9cccf6d925b44c40161d2 100644 (file)
@@ -9,15 +9,18 @@ title: Qualcomm Krait Processor Sub-system (KPSS) Watchdog timer
 maintainers:
   - Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
 
-allOf:
-  - $ref: watchdog.yaml#
-
 properties:
+  $nodename:
+    pattern: "^(watchdog|timer)@[0-9a-f]+$"
+
   compatible:
     oneOf:
       - items:
           - enum:
+              - qcom,kpss-wdt-ipq4019
+              - qcom,apss-wdt-msm8994
               - qcom,apss-wdt-qcs404
+              - qcom,apss-wdt-sa8775p
               - qcom,apss-wdt-sc7180
               - qcom,apss-wdt-sc7280
               - qcom,apss-wdt-sc8180x
@@ -29,15 +32,19 @@ properties:
               - qcom,apss-wdt-sm8150
               - qcom,apss-wdt-sm8250
           - const: qcom,kpss-wdt
+      - const: qcom,kpss-wdt
+        deprecated: true
+      - items:
+          - const: qcom,scss-timer
+          - const: qcom,msm-timer
       - items:
           - enum:
-              - qcom,kpss-wdt
-              - qcom,kpss-timer
               - qcom,kpss-wdt-apq8064
-              - qcom,kpss-wdt-ipq4019
               - qcom,kpss-wdt-ipq8064
+              - qcom,kpss-wdt-mdm9615
               - qcom,kpss-wdt-msm8960
-              - qcom,scss-timer
+          - const: qcom,kpss-timer
+          - const: qcom,msm-timer
 
   reg:
     maxItems: 1
@@ -45,18 +52,87 @@ properties:
   clocks:
     maxItems: 1
 
+  clock-names:
+    items:
+      - const: sleep
+
+  clock-frequency:
+    description:
+      The frequency of the general purpose timer in Hz.
+
+  cpu-offset:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Per-CPU offset used when the timer is accessed without the CPU remapping
+      facilities. The offset is cpu-offset + (0x10000 * cpu-nr).
+
+  interrupts:
+    minItems: 1
+    maxItems: 5
+
 required:
   - compatible
   - reg
   - clocks
 
+allOf:
+  - $ref: watchdog.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,kpss-wdt
+    then:
+      properties:
+        clock-frequency: false
+        cpu-offset: false
+        interrupts:
+          minItems: 1
+          items:
+            - description: Bark
+            - description: Bite
+
+    else:
+      properties:
+        interrupts:
+          minItems: 3
+          items:
+            - description: Debug
+            - description: First general purpose timer
+            - description: Second general purpose timer
+            - description: First watchdog
+            - description: Second watchdog
+      required:
+        - clock-frequency
+
 unevaluatedProperties: false
 
 examples:
   - |
-    watchdog@208a038 {
-      compatible = "qcom,kpss-wdt-ipq8064";
-      reg = <0x0208a038 0x40>;
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    watchdog@17c10000 {
+      compatible = "qcom,apss-wdt-sm8150", "qcom,kpss-wdt";
+      reg = <0x17c10000 0x1000>;
       clocks = <&sleep_clk>;
+      interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
       timeout-sec = <10>;
     };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    watchdog@200a000 {
+      compatible = "qcom,kpss-wdt-ipq8064", "qcom,kpss-timer", "qcom,msm-timer";
+      interrupts = <GIC_PPI 1 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+                   <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+                   <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+                   <GIC_PPI 4 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>,
+                   <GIC_PPI 5 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
+      reg = <0x0200a000 0x100>;
+      clock-frequency = <25000000>;
+      clocks = <&sleep_clk>;
+      clock-names = "sleep";
+      cpu-offset = <0x80000>;
+    };
index 26b1815a6753a81a1de6ab194f22002d0eca0f29..e2c9bf1aec38026fa0cdaf0ce1c95358dfc6ea32 100644 (file)
@@ -26,7 +26,7 @@ properties:
 
       - items:
           - enum:
-              - renesas,r9a07g043-wdt    # RZ/G2UL
+              - renesas,r9a07g043-wdt    # RZ/G2UL and RZ/Five
               - renesas,r9a07g044-wdt    # RZ/G2{L,LC}
               - renesas,r9a07g054-wdt    # RZ/V2L
           - const: renesas,rzg2l-wdt
index fccae0d00110342e7c3ca58332f00b2bed9ee9f1..519b48889eb148a131674c85ff79c2cc10fe940b 100644 (file)
@@ -14,9 +14,14 @@ description: |
   This document describes generic bindings which can be used to
   describe watchdog devices in a device tree.
 
+select:
+  properties:
+    $nodename:
+      pattern: "^watchdog(@.*|-[0-9a-f])?$"
+
 properties:
   $nodename:
-    pattern: "^watchdog(@.*|-[0-9a-f])?$"
+    pattern: "^(timer|watchdog)(@.*|-[0-9a-f])?$"
 
   timeout-sec:
     description:
index 220f3e0d3f559f47ab61b46b6e44c7754471b941..2055e72871fe6b1c51d45fbd93daad764e15d634 100644 (file)
@@ -158,7 +158,7 @@ nobarrier            This option can be used if underlying storage guarantees
                         If this option is set, no cache_flush commands are issued
                         but f2fs still guarantees the write ordering of all the
                         data writes.
-barrier                 If this option is set, cache_flush commands are allowed to be
+barrier                         If this option is set, cache_flush commands are allowed to be
                         issued.
 fastboot                This option is used when a system wants to reduce mount
                         time as much as possible, even though normal performance
index bf6aa681c960863c2e61db4253e2447d7eecf792..76d1a3ec9be3de914aee1c301add8a43293d441e 100644 (file)
@@ -2,6 +2,7 @@
 
 .. _linux_doc:
 
+==============================
 The Linux Kernel documentation
 ==============================
 
@@ -13,7 +14,7 @@ documentation are welcome; join the linux-doc list at vger.kernel.org if
 you want to help out.
 
 Working with the development community
---------------------------------------
+======================================
 
 The essential guides for interacting with the kernel's development
 community and getting your work upstream.
@@ -29,7 +30,7 @@ community and getting your work upstream.
 
 
 Internal API manuals
---------------------
+====================
 
 Manuals for use by developers working to interface with the rest of the
 kernel.
@@ -43,7 +44,7 @@ kernel.
    Locking in the kernel <locking/index>
 
 Development tools and processes
--------------------------------
+===============================
 
 Various other manuals with useful information for all kernel developers.
 
@@ -62,7 +63,7 @@ Various other manuals with useful information for all kernel developers.
 
 
 User-oriented documentation
----------------------------
+===========================
 
 The following manuals are written for *users* of the kernel â€” those who are
 trying to get it to work optimally on a given system and application
@@ -81,7 +82,7 @@ See also: the `Linux man pages <https://www.kernel.org/doc/man-pages/>`_,
 which are kept separately from the kernel's own documentation.
 
 Firmware-related documentation
-------------------------------
+==============================
 The following holds information on the kernel's expectations regarding the
 platform firmwares.
 
@@ -93,7 +94,7 @@ platform firmwares.
 
 
 Architecture-specific documentation
------------------------------------
+===================================
 
 .. toctree::
    :maxdepth: 2
@@ -102,7 +103,7 @@ Architecture-specific documentation
 
 
 Other documentation
--------------------
+===================
 
 There are several unsorted documents that don't seem to fit on other parts
 of the documentation body, or may require some adjustments and/or conversion
@@ -115,7 +116,7 @@ to ReStructured Text format, or are simply too old.
 
 
 Translations
-------------
+============
 
 .. toctree::
    :maxdepth: 2
index dfaf9fc883f44a2c12ab0d4f71348efc40968361..7f56fc0d7c31688d012dfea7d072675817e31814 100644 (file)
@@ -5,7 +5,7 @@ Kernel Lock Torture Test Operation
 CONFIG_LOCK_TORTURE_TEST
 ========================
 
-The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
+The CONFIG_LOCK_TORTURE_TEST config option provides a kernel module
 that runs torture tests on core kernel locking primitives. The kernel
 module, 'locktorture', may be built after the fact on the running
 kernel to be tested, if desired. The tests periodically output status
@@ -67,7 +67,7 @@ torture_type
 
                     - "rtmutex_lock":
                                rtmutex_lock() and rtmutex_unlock() pairs.
-                               Kernel must have CONFIG_RT_MUTEX=y.
+                               Kernel must have CONFIG_RT_MUTEXES=y.
 
                     - "rwsem_lock":
                                read/write down() and up() semaphore pairs.
index 24de747b53443158fc8adab3816219dcd3e82ec7..753e5914a8b7cb5b9fbfe87f6c892c5e33f99086 100644 (file)
@@ -9,6 +9,7 @@ definitions:
   -
     type: flags
     name: xdp-act
+    render-max: true
     entries:
       -
         name: basic
index d87f1fee4cbc5f706ba9ef3b37e0c0427fbcb807..7a670a075ab6f77c1a1b30df387d0a6be25b4ae6 100644 (file)
@@ -251,7 +251,8 @@ The tags in common use are:
  - Reported-by: names a user who reported a problem which is fixed by this
    patch; this tag is used to give credit to the (often underappreciated)
    people who test our code and let us know when things do not work
-   correctly.
+   correctly. Note, this tag should be followed by a Link: tag pointing to the
+   report, unless the report is not available on the web.
 
  - Cc: the named person received a copy of the patch and had the
    opportunity to comment on it.
index fab44ae732e3f859b5f7e2414b276e77515c59d9..eac7167dce83d0fd7665a874aceaa4c9fa5d9e83 100644 (file)
@@ -496,10 +496,11 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
 ----------------------------------------------------------------------
 
 The Reported-by tag gives credit to people who find bugs and report them and it
-hopefully inspires them to help us again in the future.  Please note that if
-the bug was reported in private, then ask for permission first before using the
-Reported-by tag. The tag is intended for bugs; please do not use it to credit
-feature requests.
+hopefully inspires them to help us again in the future. The tag is intended for
+bugs; please do not use it to credit feature requests. The tag should be
+followed by a Link: tag pointing to the report, unless the report is not
+available on the web. Please note that if the bug was reported in private, then
+ask for permission first before using the Reported-by tag.
 
 A Tested-by: tag indicates that the patch has been successfully tested (in
 some environment) by the person named.  This tag informs maintainers that
index 6982b63775da583735b8e5d52e4a784de719293c..ed7f4f5b3cf157d12484a289f2efa2d3b9f228f8 100644 (file)
@@ -16,4 +16,6 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
 Architecture  Level of support  Constraints
 ============  ================  ==============================================
 ``x86``       Maintained        ``x86_64`` only.
+``um``        Maintained        ``x86_64`` only.
 ============  ================  ==============================================
+
index c978a8132ce103beee66ccd67fdb6706b247f3b9..0f1e131b3bb14d049aedc1e33be046ae5bdcbdb6 100644 (file)
@@ -17,3 +17,4 @@
    kernel-enforcement-statement
    email-clients
    magic-number
+   programming-language
diff --git a/Documentation/translations/sp_SP/process/programming-language.rst b/Documentation/translations/sp_SP/process/programming-language.rst
new file mode 100644 (file)
index 0000000..301f525
--- /dev/null
@@ -0,0 +1,53 @@
+.. include:: ../disclaimer-sp.rst
+
+:Original: :ref:`Documentation/process/programming-language.rst <programming_language>`
+:Translator: Carlos Bilbao <carlos.bilbao@amd.com>
+
+.. _sp_programming_language:
+
+Lenguaje de programación
+========================
+
+El kernel está escrito en el lenguaje de programación C [sp-c-language]_.
+Más concretamente, el kernel normalmente se compila con ``gcc`` [sp-gcc]_
+bajo ``-std=gnu11`` [sp-gcc-c-dialect-options]_: el dialecto GNU de ISO C11.
+``clang`` [sp-clang]_ también es compatible, consulte los documentos en
+:ref:`Building Linux with Clang/LLVM <kbuild_llvm>`.
+
+Este dialecto contiene muchas extensiones del lenguaje [sp-gnu-extensions]_,
+y muchos de ellos se usan dentro del kernel de forma habitual.
+
+Hay algo de soporte para compilar el núcleo con ``icc`` [sp-icc]_ para varias
+de las arquitecturas, aunque en el momento de escribir este texto, eso no
+está terminado y requiere parches de terceros.
+
+Atributos
+---------
+
+Una de las comunes extensiones utilizadas en todo el kernel son los atributos
+[sp-gcc-attribute-syntax]_. Los atributos permiten introducir semántica
+definida por la implementación a las entidades del lenguaje (como variables,
+funciones o tipos) sin tener que hacer cambios sintácticos significativos
+al idioma (por ejemplo, agregar una nueva palabra clave) [sp-n2049]_.
+
+En algunos casos, los atributos son opcionales (es decir, hay compiladores
+que no los admiten pero de todos modos deben producir el código adecuado,
+incluso si es más lento o no realiza tantas comprobaciones/diagnósticos en
+tiempo de compilación).
+
+El kernel define pseudo-palabras clave (por ejemplo, ``__pure``) en lugar
+de usar directamente la sintaxis del atributo GNU (por ejemplo,
+``__attribute__((__pure__))``) con el fin de detectar cuáles se pueden
+utilizar y/o acortar el código.
+
+Por favor consulte ``include/linux/compiler_attributes.h`` para obtener
+más información.
+
+.. [sp-c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [sp-gcc] https://gcc.gnu.org
+.. [sp-clang] https://clang.llvm.org
+.. [sp-icc] https://software.intel.com/en-us/c-compilers
+.. [sp-gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [sp-gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [sp-gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [sp-n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
index edd3d562beeeb5bee8f5225727f53031343807a7..d86c7807aa2017da828d1fde4a744d7eeb52b36c 100644 (file)
@@ -5971,7 +5971,7 @@ F:        include/linux/dm-*.h
 F:     include/uapi/linux/dm-*.h
 
 DEVLINK
-M:     Jiri Pirko <jiri@nvidia.com>
+M:     Jiri Pirko <jiri@resnulli.us>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     Documentation/networking/devlink
@@ -7795,6 +7795,7 @@ M:        Chao Yu <chao@kernel.org>
 L:     linux-f2fs-devel@lists.sourceforge.net
 S:     Maintained
 W:     https://f2fs.wiki.kernel.org/
+Q:     https://patchwork.kernel.org/project/f2fs/list/
 B:     https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
 F:     Documentation/ABI/testing/sysfs-fs-f2fs
@@ -7910,6 +7911,7 @@ F:        include/trace/events/fs_dax.h
 
 FILESYSTEMS (VFS and infrastructure)
 M:     Alexander Viro <viro@zeniv.linux.org.uk>
+M:     Christian Brauner <brauner@kernel.org>
 L:     linux-fsdevel@vger.kernel.org
 S:     Maintained
 F:     fs/*
@@ -9789,13 +9791,6 @@ L:       netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/ibm/ibmvnic.*
 
-IBM Power Virtual Accelerator Switchboard
-L:     linuxppc-dev@lists.ozlabs.org
-S:     Supported
-F:     arch/powerpc/include/asm/vas.h
-F:     arch/powerpc/platforms/powernv/copy-paste.h
-F:     arch/powerpc/platforms/powernv/vas*
-
 IBM Power Virtual Ethernet Device Driver
 M:     Nick Child <nnac123@linux.ibm.com>
 L:     netdev@vger.kernel.org
@@ -15084,7 +15079,7 @@ F:      Documentation/hwmon/nzxt-smart2.rst
 F:     drivers/hwmon/nzxt-smart2.c
 
 OBJAGG
-M:     Jiri Pirko <jiri@nvidia.com>
+M:     Jiri Pirko <jiri@resnulli.us>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     include/linux/objagg.h
@@ -15858,7 +15853,7 @@ F:      drivers/video/logo/logo_parisc*
 F:     include/linux/hp_sdc.h
 
 PARMAN
-M:     Jiri Pirko <jiri@nvidia.com>
+M:     Jiri Pirko <jiri@resnulli.us>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     include/linux/parman.h
index aa26f34a9b522f35f5506655de7573db939bb264..d7bd0eb9b34633f22d70649e245575f49dfd1e41 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 6
-PATCHLEVEL = 2
+PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index ef427a6bdd1ab91be0445b0261d38bb49bb2be93..7b01ae4f3bc6c7ab53dfb0c219c6fd75e2ffdf36 100644 (file)
@@ -152,8 +152,11 @@ retry:
           the fault.  */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 27b2592698b0ae4748e09907adc3ed5f1bf41b2f..1023e896d46b89698c69c84fcf0779f407c390da 100644 (file)
@@ -100,7 +100,6 @@ config ARM64
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
-       select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
        select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WANTS_NO_INSTR
        select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
@@ -187,7 +186,8 @@ config ARM64
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
-               if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
+               if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \
+                   !CC_OPTIMIZE_FOR_SIZE)
        select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
                if DYNAMIC_FTRACE_WITH_ARGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
index 9dd08cd339c3f0286c6d361bae0569f0035c9a8e..78e5163836a0ab95148c5101405fd872edc40198 100644 (file)
 #include <linux/compiler.h>
 #include <linux/mmdebug.h>
 #include <linux/types.h>
+#include <asm/boot.h>
 #include <asm/bug.h>
 
 #if VA_BITS > 48
@@ -203,6 +204,16 @@ static inline unsigned long kaslr_offset(void)
        return kimage_vaddr - KIMAGE_VADDR;
 }
 
+static inline bool kaslr_enabled(void)
+{
+       /*
+        * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
+        * placement of the image rather than from the seed, so a displacement
+        * of less than MIN_KIMG_ALIGN means that no seed was provided.
+        */
+       return kaslr_offset() >= MIN_KIMG_ALIGN;
+}
+
 /*
  * Allow all memory at the discovery stage. We will clip it later.
  */
index 378453faa87e18986f008c44387f5e76b3736e2c..dba8fcec7f33d6581848bbfd61b9e65f3413bcaa 100644 (file)
@@ -435,10 +435,6 @@ int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
        enum arm_smccc_conduit conduit;
        struct acpi_ffh_data *ffh_ctxt;
 
-       ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
-       if (!ffh_ctxt)
-               return -ENOMEM;
-
        if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
                return -EOPNOTSUPP;
 
@@ -448,6 +444,10 @@ int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
                return -EOPNOTSUPP;
        }
 
+       ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
+       if (!ffh_ctxt)
+               return -ENOMEM;
+
        if (conduit == SMCCC_CONDUIT_SMC) {
                ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
                ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
index 87687e99fee3c8dd82eb43d4fcd9754f1b4ef4c3..2e3e5513977733b7a324c4263218be2bfbeef580 100644 (file)
@@ -1633,7 +1633,7 @@ bool kaslr_requires_kpti(void)
                        return false;
        }
 
-       return kaslr_offset() > 0;
+       return kaslr_enabled();
 }
 
 static bool __meltdown_safe = true;
index 692dfefbe0ed2597458e251b2b0da2a1ad50f5ba..9e7e50a0fd76d10b72e92570a0bb0688559935be 100644 (file)
@@ -2122,9 +2122,6 @@ static int __init fpsimd_init(void)
                pr_notice("Advanced SIMD is not implemented\n");
 
 
-       if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
-               pr_notice("SME is implemented but not SVE\n");
-
        sve_sysctl_init();
        sme_sysctl_init();
 
index 325455d16dbcb31a1808768374376a2cbe941665..e7477f21a4c9d062ee4909d6bcc72cdf555ab02a 100644 (file)
@@ -41,7 +41,7 @@ static int __init kaslr_init(void)
                return 0;
        }
 
-       if (!kaslr_offset()) {
+       if (!kaslr_enabled()) {
                pr_warn("KASLR disabled due to lack of seed\n");
                return 0;
        }
index 901dfd9bf04c325c0d878caf34869ed7635c2161..4a79ba10079983f52348b4d0a4d7ca6a95db7a54 100644 (file)
@@ -997,7 +997,7 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr)
 
        switch (report_cfi_failure(regs, regs->pc, &target, type)) {
        case BUG_TRAP_TYPE_BUG:
-               die("Oops - CFI", regs, 0);
+               die("Oops - CFI", regs, esr);
                break;
 
        case BUG_TRAP_TYPE_WARN:
index 8dd5a8fe64b4f6688889ace395aedc83a735ae9f..4aadcfb017545dbaef8fd7fb321ab2bdd9d565bd 100644 (file)
@@ -22,7 +22,8 @@ void copy_highpage(struct page *to, struct page *from)
        copy_page(kto, kfrom);
 
        if (system_supports_mte() && page_mte_tagged(from)) {
-               page_kasan_tag_reset(to);
+               if (kasan_hw_tags_enabled())
+                       page_kasan_tag_reset(to);
                /* It's a new page, shouldn't have been tagged yet */
                WARN_ON_ONCE(!try_page_mte_tagging(to));
                mte_copy_page_tags(kto, kfrom);
index f73c7cbfe32603c425269f80af6e767bd212ad68..4b578d02fd01a9eb1513d34b708a84f6e5af31b5 100644 (file)
@@ -93,8 +93,11 @@ good_area:
 
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index ca2e02685343844b01837131db4989de37a0262c..259ae57570bfedd36a90b676a0beb15057e5901a 100644 (file)
 #include <linux/types.h>
 /* include compiler specific intrinsics */
 #include <asm/ia64regs.h>
-#ifdef __INTEL_COMPILER
-# include <asm/intel_intrin.h>
-#else
-# include <asm/gcc_intrin.h>
-#endif
+#include <asm/gcc_intrin.h>
 
 /*
  * This function doesn't exist, so you'll get a linker error if
diff --git a/arch/ia64/include/uapi/asm/intel_intrin.h b/arch/ia64/include/uapi/asm/intel_intrin.h
deleted file mode 100644 (file)
index dc1884d..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_INTEL_INTRIN_H
-#define _ASM_IA64_INTEL_INTRIN_H
-/*
- * Intel Compiler Intrinsics
- *
- * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
- * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
- *
- */
-#include <ia64intrin.h>
-
-#define ia64_barrier()         __memory_barrier()
-
-#define ia64_stop()    /* Nothing: As of now stop bit is generated for each
-                        * intrinsic
-                        */
-
-#define ia64_getreg            __getReg
-#define ia64_setreg            __setReg
-
-#define ia64_hint              __hint
-#define ia64_hint_pause                __hint_pause
-
-#define ia64_mux1_brcst                _m64_mux1_brcst
-#define ia64_mux1_mix          _m64_mux1_mix
-#define ia64_mux1_shuf         _m64_mux1_shuf
-#define ia64_mux1_alt          _m64_mux1_alt
-#define ia64_mux1_rev          _m64_mux1_rev
-
-#define ia64_mux1(x,v)         _m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
-#define ia64_popcnt            _m64_popcnt
-#define ia64_getf_exp          __getf_exp
-#define ia64_shrp              _m64_shrp
-
-#define ia64_tpa               __tpa
-#define ia64_invala            __invala
-#define ia64_invala_gr         __invala_gr
-#define ia64_invala_fr         __invala_fr
-#define ia64_nop               __nop
-#define ia64_sum               __sum
-#define ia64_ssm               __ssm
-#define ia64_rum               __rum
-#define ia64_rsm               __rsm
-#define ia64_fc                        __fc
-
-#define ia64_ldfs              __ldfs
-#define ia64_ldfd              __ldfd
-#define ia64_ldfe              __ldfe
-#define ia64_ldf8              __ldf8
-#define ia64_ldf_fill          __ldf_fill
-
-#define ia64_stfs              __stfs
-#define ia64_stfd              __stfd
-#define ia64_stfe              __stfe
-#define ia64_stf8              __stf8
-#define ia64_stf_spill         __stf_spill
-
-#define ia64_mf                        __mf
-#define ia64_mfa               __mfa
-
-#define ia64_fetchadd4_acq     __fetchadd4_acq
-#define ia64_fetchadd4_rel     __fetchadd4_rel
-#define ia64_fetchadd8_acq     __fetchadd8_acq
-#define ia64_fetchadd8_rel     __fetchadd8_rel
-
-#define ia64_xchg1             _InterlockedExchange8
-#define ia64_xchg2             _InterlockedExchange16
-#define ia64_xchg4             _InterlockedExchange
-#define ia64_xchg8             _InterlockedExchange64
-
-#define ia64_cmpxchg1_rel      _InterlockedCompareExchange8_rel
-#define ia64_cmpxchg1_acq      _InterlockedCompareExchange8_acq
-#define ia64_cmpxchg2_rel      _InterlockedCompareExchange16_rel
-#define ia64_cmpxchg2_acq      _InterlockedCompareExchange16_acq
-#define ia64_cmpxchg4_rel      _InterlockedCompareExchange_rel
-#define ia64_cmpxchg4_acq      _InterlockedCompareExchange_acq
-#define ia64_cmpxchg8_rel      _InterlockedCompareExchange64_rel
-#define ia64_cmpxchg8_acq      _InterlockedCompareExchange64_acq
-
-#define __ia64_set_dbr(index, val)     \
-               __setIndReg(_IA64_REG_INDR_DBR, index, val)
-#define ia64_set_ibr(index, val)       \
-               __setIndReg(_IA64_REG_INDR_IBR, index, val)
-#define ia64_set_pkr(index, val)       \
-               __setIndReg(_IA64_REG_INDR_PKR, index, val)
-#define ia64_set_pmc(index, val)       \
-               __setIndReg(_IA64_REG_INDR_PMC, index, val)
-#define ia64_set_pmd(index, val)       \
-               __setIndReg(_IA64_REG_INDR_PMD, index, val)
-#define ia64_set_rr(index, val)                \
-               __setIndReg(_IA64_REG_INDR_RR, index, val)
-
-#define ia64_get_cpuid(index)  \
-               __getIndReg(_IA64_REG_INDR_CPUID, index)
-#define __ia64_get_dbr(index)          __getIndReg(_IA64_REG_INDR_DBR, index)
-#define ia64_get_ibr(index)            __getIndReg(_IA64_REG_INDR_IBR, index)
-#define ia64_get_pkr(index)            __getIndReg(_IA64_REG_INDR_PKR, index)
-#define ia64_get_pmc(index)            __getIndReg(_IA64_REG_INDR_PMC, index)
-#define ia64_get_pmd(index)            __getIndReg(_IA64_REG_INDR_PMD, index)
-#define ia64_get_rr(index)             __getIndReg(_IA64_REG_INDR_RR, index)
-
-#define ia64_srlz_d            __dsrlz
-#define ia64_srlz_i            __isrlz
-
-#define ia64_dv_serialize_data()
-#define ia64_dv_serialize_instruction()
-
-#define ia64_st1_rel           __st1_rel
-#define ia64_st2_rel           __st2_rel
-#define ia64_st4_rel           __st4_rel
-#define ia64_st8_rel           __st8_rel
-
-/* FIXME: need st4.rel.nta intrinsic */
-#define ia64_st4_rel_nta       __st4_rel
-
-#define ia64_ld1_acq           __ld1_acq
-#define ia64_ld2_acq           __ld2_acq
-#define ia64_ld4_acq           __ld4_acq
-#define ia64_ld8_acq           __ld8_acq
-
-#define ia64_sync_i            __synci
-#define ia64_thash             __thash
-#define ia64_ttag              __ttag
-#define ia64_itcd              __itcd
-#define ia64_itci              __itci
-#define ia64_itrd              __itrd
-#define ia64_itri              __itri
-#define ia64_ptce              __ptce
-#define ia64_ptcl              __ptcl
-#define ia64_ptcg              __ptcg
-#define ia64_ptcga             __ptcga
-#define ia64_ptri              __ptri
-#define ia64_ptrd              __ptrd
-#define ia64_dep_mi            _m64_dep_mi
-
-/* Values for lfhint in __lfetch and __lfetch_fault */
-
-#define ia64_lfhint_none       __lfhint_none
-#define ia64_lfhint_nt1                __lfhint_nt1
-#define ia64_lfhint_nt2                __lfhint_nt2
-#define ia64_lfhint_nta                __lfhint_nta
-
-#define ia64_lfetch            __lfetch
-#define ia64_lfetch_excl       __lfetch_excl
-#define ia64_lfetch_fault      __lfetch_fault
-#define ia64_lfetch_fault_excl __lfetch_fault_excl
-
-#define ia64_intrin_local_irq_restore(x)               \
-do {                                                   \
-       if ((x) != 0) {                                 \
-               ia64_ssm(IA64_PSR_I);                   \
-               ia64_srlz_d();                          \
-       } else {                                        \
-               ia64_rsm(IA64_PSR_I);                   \
-       }                                               \
-} while (0)
-
-#define __builtin_trap()       __break(0);
-
-#endif /* _ASM_IA64_INTEL_INTRIN_H */
index a0e0a064f5b120caa1b5f6a81dcc12fa9578bd66..63f27c4ec739f0066b05875690287bac035e2f7a 100644 (file)
 #include <linux/types.h>
 /* include compiler specific intrinsics */
 #include <asm/ia64regs.h>
-#ifdef __INTEL_COMPILER
-# include <asm/intel_intrin.h>
-#else
-# include <asm/gcc_intrin.h>
-#endif
+#include <asm/gcc_intrin.h>
 #include <asm/cmpxchg.h>
 
 #define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4)              \
index 96d13cb7c19f085cc62395011a9499cdfa2515a6..15f6cfddcc080b47693f2b6212d4ca5fab6bf3e0 100644 (file)
@@ -783,11 +783,9 @@ __init void prefill_possible_map(void)
 
 static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-       cpumask_t tmp_map;
        int cpu;
 
-       cpumask_complement(&tmp_map, cpu_present_mask);
-       cpu = cpumask_first(&tmp_map);
+       cpu = cpumask_first_zero(cpu_present_mask);
        if (cpu >= nr_cpu_ids)
                return -EINVAL;
 
index ef78c2d66cdde243b8bb1887054faab51b13d838..85c4d9ac8686d805399b926929a59ed19dc2c506 100644 (file)
@@ -136,8 +136,11 @@ retry:
         */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 9cc8b84f7eb03d856900f2fb42f412d25ec5b0d5..7fd51257e0ed41b0460e584683805c2ed0f3714f 100644 (file)
@@ -94,15 +94,21 @@ config LOONGARCH
        select HAVE_DYNAMIC_FTRACE_WITH_ARGS
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_EBPF_JIT
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
        select HAVE_EXIT_THREAD
        select HAVE_FAST_GUP
        select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_GENERIC_VDSO
+       select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_EXIT_ON_IRQ_STACK
        select HAVE_IRQ_TIME_ACCOUNTING
+       select HAVE_KPROBES
+       select HAVE_KPROBES_ON_FTRACE
+       select HAVE_KRETPROBES
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI
        select HAVE_PCI
@@ -441,6 +447,24 @@ config ARCH_IOREMAP
          protection support. However, you can enable LoongArch DMW-based
          ioremap() for better performance.
 
+config ARCH_STRICT_ALIGN
+       bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT
+       default y
+       help
+         Not all LoongArch cores support h/w unaligned access, we can use
+         -mstrict-align build parameter to prevent unaligned accesses.
+
+         CPUs with h/w unaligned access support:
+         Loongson-2K2000/2K3000/3A5000/3C5000/3D5000.
+
+         CPUs without h/w unaligned access support:
+         Loongson-2K500/2K1000.
+
+         This option is enabled by default to make the kernel be able to run
+         on all LoongArch systems. But you can disable it manually if you want
+         to run kernel only on systems with h/w unaligned access support in
+         order to optimise for performance.
+
 config KEXEC
        bool "Kexec system call"
        select KEXEC_CORE
@@ -454,6 +478,7 @@ config KEXEC
 
 config CRASH_DUMP
        bool "Build kdump crash kernel"
+       select RELOCATABLE
        help
          Generate crash dump after being started by kexec. This should
          be normally only set in special crash dump kernels which are
@@ -463,16 +488,38 @@ config CRASH_DUMP
 
          For more details see Documentation/admin-guide/kdump/kdump.rst
 
-config PHYSICAL_START
-       hex "Physical address where the kernel is loaded"
-       default "0x90000000a0000000"
-       depends on CRASH_DUMP
+config RELOCATABLE
+       bool "Relocatable kernel"
        help
-         This gives the XKPRANGE address where the kernel is loaded.
-         If you plan to use kernel for capturing the crash dump change
-         this value to start of the reserved region (the "X" value as
-         specified in the "crashkernel=YM@XM" command line boot parameter
-         passed to the panic-ed kernel).
+         This builds the kernel as a Position Independent Executable (PIE),
+         which retains all relocation metadata required, so as to relocate
+         the kernel binary at runtime to a different virtual address from
+         its link address.
+
+config RANDOMIZE_BASE
+       bool "Randomize the address of the kernel (KASLR)"
+       depends on RELOCATABLE
+       help
+          Randomizes the physical and virtual address at which the
+          kernel image is loaded, as a security feature that
+          deters exploit attempts relying on knowledge of the location
+          of kernel internals.
+
+          The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
+
+          If unsure, say N.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+       hex "Maximum KASLR offset" if EXPERT
+       depends on RANDOMIZE_BASE
+       range 0x0 0x10000000
+       default "0x01000000"
+       help
+         When KASLR is active, this provides the maximum offset that will
+         be applied to the kernel image. It should be set according to the
+         amount of physical RAM available in the target system.
+
+         This is limited by the size of the lower address memory, 256MB.
 
 config SECCOMP
        bool "Enable seccomp to safely compute untrusted bytecode"
index 4402387d27551a3601c7a5ea49b6bf42f1377376..f71edf5741011e7c2d1e17d4400308b297947253 100644 (file)
@@ -71,14 +71,15 @@ KBUILD_AFLAGS_MODULE                += -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE           += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 endif
 
+ifeq ($(CONFIG_RELOCATABLE),y)
+KBUILD_CFLAGS_KERNEL           += -fPIE
+LDFLAGS_vmlinux                        += -static -pie --no-dynamic-linker -z notext
+endif
+
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
-ifndef CONFIG_PHYSICAL_START
 load-y         = 0x9000000000200000
-else
-load-y         = $(CONFIG_PHYSICAL_START)
-endif
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
@@ -91,10 +92,15 @@ KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
 # instead of .eh_frame so we don't discard them.
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+ifdef CONFIG_ARCH_STRICT_ALIGN
 # Don't emit unaligned accesses.
 # Not all LoongArch cores support unaligned access, and as kernel we can't
 # rely on others to provide emulation for these accesses.
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+else
+# Optimise for performance on hardware supports unaligned access.
+KBUILD_CFLAGS += $(call cc-option,-mno-strict-align)
+endif
 
 KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include)
 
index eb84cae642e5874cb8c09f3b06d9b8044bf6c423..e18213f01cc478e6466addbfcb72174626ab8718 100644 (file)
@@ -48,6 +48,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
index d342935e5a72d1de92c496087ba3f2e96e46d352..8fb699b4d40afb0ac7ccdeaf30a41e9783e30d26 100644 (file)
@@ -125,4 +125,6 @@ extern unsigned long vm_map_base;
 #define ISA_IOSIZE     SZ_16K
 #define IO_SPACE_LIMIT (PCI_IOSIZE - 1)
 
+#define PHYS_LINK_KADDR        PHYSADDR(VMLINUX_LOAD_ADDRESS)
+
 #endif /* _ASM_ADDRSPACE_H */
index 40eea6aa469e17c0ca5847da98046171cb590762..f591b3245def69d3deff2895e6c2d9f064225cb9 100644 (file)
 #define PTRLOG         3
 #endif
 
+/* Annotate a function as being unsuitable for kprobes. */
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(name)                            \
+       .pushsection "_kprobe_blacklist", "aw";         \
+       .quad   name;                                   \
+       .popsection
+#else
+#define _ASM_NOKPROBE(name)
+#endif
+
 #endif /* __ASM_ASM_H */
index be037a40580d4b017725564e59ec0c511373228a..c51a1b43acb4458da6bddb6a5952b3448d036c4b 100644 (file)
        nor     \dst, \src, zero
 .endm
 
+.macro la_abs reg, sym
+#ifndef CONFIG_RELOCATABLE
+       la.abs  \reg, \sym
+#else
+       766:
+       lu12i.w \reg, 0
+       ori     \reg, \reg, 0
+       lu32i.d \reg, 0
+       lu52i.d \reg, \reg, 0
+       .pushsection ".la_abs", "aw", %progbits
+       768:
+       .dword  768b-766b
+       .dword  \sym
+       .popsection
+#endif
+.endm
+
 #endif /* _ASM_ASMMACRO_H */
index 754f2850679134ee81ec9a9966ea170d88d85a7f..c3da91759472841c0f15e71b5f03603077ae6d0d 100644 (file)
@@ -36,7 +36,7 @@
 
 #define PRID_SERIES_LA132      0x8000  /* Loongson 32bit */
 #define PRID_SERIES_LA264      0xa000  /* Loongson 64bit, 2-issue */
-#define PRID_SERIES_LA364      0xb000  /* Loongson 64bit,3-issue */
+#define PRID_SERIES_LA364      0xb000  /* Loongson 64bit3-issue */
 #define PRID_SERIES_LA464      0xc000  /* Loongson 64bit, 4-issue */
 #define PRID_SERIES_LA664      0xd000  /* Loongson 64bit, 6-issue */
 
diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h
new file mode 100644 (file)
index 0000000..21447fb
--- /dev/null
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#ifndef __ASM_HW_BREAKPOINT_H
+#define __ASM_HW_BREAKPOINT_H
+
+#include <asm/loongarch.h>
+
+#ifdef __KERNEL__
+
+/* Breakpoint */
+#define LOONGARCH_BREAKPOINT_EXECUTE           (0 << 0)
+
+/* Watchpoints */
+#define LOONGARCH_BREAKPOINT_LOAD              (1 << 0)
+#define LOONGARCH_BREAKPOINT_STORE             (1 << 1)
+
+struct arch_hw_breakpoint_ctrl {
+       u32 __reserved  : 28,
+       len             : 2,
+       type            : 2;
+};
+
+struct arch_hw_breakpoint {
+       u64 address;
+       u64 mask;
+       struct arch_hw_breakpoint_ctrl ctrl;
+};
+
+/* Lengths */
+#define LOONGARCH_BREAKPOINT_LEN_1    0b11
+#define LOONGARCH_BREAKPOINT_LEN_2    0b10
+#define LOONGARCH_BREAKPOINT_LEN_4    0b01
+#define LOONGARCH_BREAKPOINT_LEN_8    0b00
+
+/*
+ * Limits.
+ * Changing these will require modifications to the register accessors.
+ */
+#define LOONGARCH_MAX_BRP              8
+#define LOONGARCH_MAX_WRP              8
+
+/* Virtual debug register bases. */
+#define CSR_CFG_ADDR   0
+#define CSR_CFG_MASK   (CSR_CFG_ADDR + LOONGARCH_MAX_BRP)
+#define CSR_CFG_CTRL   (CSR_CFG_MASK + LOONGARCH_MAX_BRP)
+#define CSR_CFG_ASID   (CSR_CFG_CTRL + LOONGARCH_MAX_WRP)
+
+/* Debug register names. */
+#define LOONGARCH_CSR_NAME_ADDR        ADDR
+#define LOONGARCH_CSR_NAME_MASK        MASK
+#define LOONGARCH_CSR_NAME_CTRL        CTRL
+#define LOONGARCH_CSR_NAME_ASID        ASID
+
+/* Accessor macros for the debug registers. */
+#define LOONGARCH_CSR_WATCH_READ(N, REG, T, VAL)                       \
+do {                                                           \
+       if (T == 0)                                             \
+               VAL = csr_read64(LOONGARCH_CSR_##IB##N##REG);   \
+       else                                                    \
+               VAL = csr_read64(LOONGARCH_CSR_##DB##N##REG);   \
+} while (0)
+
+#define LOONGARCH_CSR_WATCH_WRITE(N, REG, T, VAL)                      \
+do {                                                           \
+       if (T == 0)                                             \
+               csr_write64(VAL, LOONGARCH_CSR_##IB##N##REG);   \
+       else                                                    \
+               csr_write64(VAL, LOONGARCH_CSR_##DB##N##REG);   \
+} while (0)
+
+/* Exact number */
+#define CSR_FWPC_NUM           0x3f
+#define CSR_MWPC_NUM           0x3f
+
+#define CTRL_PLV_ENABLE                0x1e
+
+#define MWPnCFG3_LoadEn                8
+#define MWPnCFG3_StoreEn       9
+
+#define MWPnCFG3_Type_mask     0x3
+#define MWPnCFG3_Size_mask     0x3
+
+static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
+{
+       return (ctrl.len << 10) | (ctrl.type << 8);
+}
+
+static inline void decode_ctrl_reg(u32 reg, struct arch_hw_breakpoint_ctrl *ctrl)
+{
+       reg >>= 8;
+       ctrl->type = reg & MWPnCFG3_Type_mask;
+       reg >>= 2;
+       ctrl->len  = reg & MWPnCFG3_Size_mask;
+}
+
+struct task_struct;
+struct notifier_block;
+struct perf_event;
+struct perf_event_attr;
+
+extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+                                 int *gen_len, int *gen_type, int *offset);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+                                   const struct perf_event_attr *attr,
+                                   struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+                                          unsigned long val, void *data);
+
+extern int arch_install_hw_breakpoint(struct perf_event *bp);
+extern void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+extern int hw_breakpoint_slots(int type);
+extern void hw_breakpoint_pmu_read(struct perf_event *bp);
+
+void breakpoint_handler(struct pt_regs *regs);
+void watchpoint_handler(struct pt_regs *regs);
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern void ptrace_hw_copy_thread(struct task_struct *task);
+extern void hw_breakpoint_thread_switch(struct task_struct *next);
+#else
+static inline void ptrace_hw_copy_thread(struct task_struct *task)
+{
+}
+static inline void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+}
+#endif
+
+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+       return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+       return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_BREAKPOINT_H */
index 7eedd83fd0d72127fc570cd87dc759f1d8b6b12f..a04fe755d71932d298189b608f8d786fd3c6edef 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/types.h>
 #include <asm/asm.h>
+#include <asm/ptrace.h>
 
 #define INSN_NOP               0x03400000
 #define INSN_BREAK             0x002a0000
 
 #define ADDR_IMM(addr, INSN)   ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN)
 
+enum reg0i15_op {
+       break_op        = 0x54,
+};
+
 enum reg0i26_op {
        b_op            = 0x14,
        bl_op           = 0x15,
@@ -32,6 +37,7 @@ enum reg1i20_op {
        lu12iw_op       = 0x0a,
        lu32id_op       = 0x0b,
        pcaddi_op       = 0x0c,
+       pcalau12i_op    = 0x0d,
        pcaddu12i_op    = 0x0e,
        pcaddu18i_op    = 0x0f,
 };
@@ -178,6 +184,11 @@ enum reg3sa2_op {
        alsld_op        = 0x16,
 };
 
+struct reg0i15_format {
+       unsigned int immediate : 15;
+       unsigned int opcode : 17;
+};
+
 struct reg0i26_format {
        unsigned int immediate_h : 10;
        unsigned int immediate_l : 16;
@@ -263,6 +274,7 @@ struct reg3sa2_format {
 
 union loongarch_instruction {
        unsigned int word;
+       struct reg0i15_format   reg0i15_format;
        struct reg0i26_format   reg0i26_format;
        struct reg1i20_format   reg1i20_format;
        struct reg1i21_format   reg1i21_format;
@@ -321,6 +333,11 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit)
        return val & (1UL << (bit - 1));
 }
 
+static inline bool is_break_ins(union loongarch_instruction *ip)
+{
+       return ip->reg0i15_format.opcode == break_op;
+}
+
 static inline bool is_pc_ins(union loongarch_instruction *ip)
 {
        return ip->reg1i20_format.opcode >= pcaddi_op &&
@@ -351,6 +368,47 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
                is_imm12_negative(ip->reg2i12_format.immediate);
 }
 
+static inline bool is_self_loop_ins(union loongarch_instruction *ip, struct pt_regs *regs)
+{
+       switch (ip->reg0i26_format.opcode) {
+       case b_op:
+       case bl_op:
+               if (ip->reg0i26_format.immediate_l == 0
+                   && ip->reg0i26_format.immediate_h == 0)
+                       return true;
+       }
+
+       switch (ip->reg1i21_format.opcode) {
+       case beqz_op:
+       case bnez_op:
+       case bceqz_op:
+               if (ip->reg1i21_format.immediate_l == 0
+                   && ip->reg1i21_format.immediate_h == 0)
+                       return true;
+       }
+
+       switch (ip->reg2i16_format.opcode) {
+       case beq_op:
+       case bne_op:
+       case blt_op:
+       case bge_op:
+       case bltu_op:
+       case bgeu_op:
+               if (ip->reg2i16_format.immediate == 0)
+                       return true;
+               break;
+       case jirl_op:
+               if (regs->regs[ip->reg2i16_format.rj] +
+                   ((unsigned long)ip->reg2i16_format.immediate << 2) == (unsigned long)ip)
+                       return true;
+       }
+
+       return false;
+}
+
+void simu_pc(struct pt_regs *regs, union loongarch_instruction insn);
+void simu_branch(struct pt_regs *regs, union loongarch_instruction insn);
+
 int larch_insn_read(void *addr, u32 *insnp);
 int larch_insn_write(void *addr, u32 insn);
 int larch_insn_patch_text(void *addr, u32 insn);
diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h
new file mode 100644 (file)
index 0000000..798020a
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_LOONGARCH_KPROBES_H
+#define __ASM_LOONGARCH_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+
+#include <asm/inst.h>
+#include <asm/cacheflush.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE                  2
+
+#define flush_insn_slot(p)                                             \
+do {                                                                   \
+       if (p->addr)                                                    \
+               flush_icache_range((unsigned long)p->addr,              \
+                          (unsigned long)p->addr +                     \
+                          (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));  \
+} while (0)
+
+#define kretprobe_blacklist_size       0
+
+typedef union loongarch_instruction kprobe_opcode_t;
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+       /* copy of the original instruction */
+       kprobe_opcode_t *insn;
+       /* restore address after simulation */
+       unsigned long restore;
+};
+
+struct prev_kprobe {
+       struct kprobe *kp;
+       unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+       unsigned int kprobe_status;
+       unsigned long saved_status;
+       struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+bool kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+bool kprobe_breakpoint_handler(struct pt_regs *regs);
+bool kprobe_singlestep_handler(struct pt_regs *regs);
+
+void __kretprobe_trampoline(void);
+void *trampoline_probe_handler(struct pt_regs *regs);
+
+#else /* !CONFIG_KPROBES */
+
+static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) { return false; }
+static inline bool kprobe_singlestep_handler(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_KPROBES */
+#endif /* __ASM_LOONGARCH_KPROBES_H */
index 7f8d57a61c8bdd1c7cd9d2d77fdbe23bc10e89e3..65b7dcdea16d0f4f59bfed5cbee2444767327a6c 100644 (file)
@@ -970,42 +970,42 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
 
 #define LOONGARCH_CSR_DB0ADDR          0x310   /* data breakpoint 0 address */
 #define LOONGARCH_CSR_DB0MASK          0x311   /* data breakpoint 0 mask */
-#define LOONGARCH_CSR_DB0CT          0x312   /* data breakpoint 0 control */
+#define LOONGARCH_CSR_DB0CTRL          0x312   /* data breakpoint 0 control */
 #define LOONGARCH_CSR_DB0ASID          0x313   /* data breakpoint 0 asid */
 
 #define LOONGARCH_CSR_DB1ADDR          0x318   /* data breakpoint 1 address */
 #define LOONGARCH_CSR_DB1MASK          0x319   /* data breakpoint 1 mask */
-#define LOONGARCH_CSR_DB1CT          0x31a   /* data breakpoint 1 control */
+#define LOONGARCH_CSR_DB1CTRL          0x31a   /* data breakpoint 1 control */
 #define LOONGARCH_CSR_DB1ASID          0x31b   /* data breakpoint 1 asid */
 
 #define LOONGARCH_CSR_DB2ADDR          0x320   /* data breakpoint 2 address */
 #define LOONGARCH_CSR_DB2MASK          0x321   /* data breakpoint 2 mask */
-#define LOONGARCH_CSR_DB2CT          0x322   /* data breakpoint 2 control */
+#define LOONGARCH_CSR_DB2CTRL          0x322   /* data breakpoint 2 control */
 #define LOONGARCH_CSR_DB2ASID          0x323   /* data breakpoint 2 asid */
 
 #define LOONGARCH_CSR_DB3ADDR          0x328   /* data breakpoint 3 address */
 #define LOONGARCH_CSR_DB3MASK          0x329   /* data breakpoint 3 mask */
-#define LOONGARCH_CSR_DB3CT          0x32a   /* data breakpoint 3 control */
+#define LOONGARCH_CSR_DB3CTRL          0x32a   /* data breakpoint 3 control */
 #define LOONGARCH_CSR_DB3ASID          0x32b   /* data breakpoint 3 asid */
 
 #define LOONGARCH_CSR_DB4ADDR          0x330   /* data breakpoint 4 address */
 #define LOONGARCH_CSR_DB4MASK          0x331   /* data breakpoint 4 maks */
-#define LOONGARCH_CSR_DB4CT          0x332   /* data breakpoint 4 control */
+#define LOONGARCH_CSR_DB4CTRL          0x332   /* data breakpoint 4 control */
 #define LOONGARCH_CSR_DB4ASID          0x333   /* data breakpoint 4 asid */
 
 #define LOONGARCH_CSR_DB5ADDR          0x338   /* data breakpoint 5 address */
 #define LOONGARCH_CSR_DB5MASK          0x339   /* data breakpoint 5 mask */
-#define LOONGARCH_CSR_DB5CT          0x33a   /* data breakpoint 5 control */
+#define LOONGARCH_CSR_DB5CTRL          0x33a   /* data breakpoint 5 control */
 #define LOONGARCH_CSR_DB5ASID          0x33b   /* data breakpoint 5 asid */
 
 #define LOONGARCH_CSR_DB6ADDR          0x340   /* data breakpoint 6 address */
 #define LOONGARCH_CSR_DB6MASK          0x341   /* data breakpoint 6 mask */
-#define LOONGARCH_CSR_DB6CT          0x342   /* data breakpoint 6 control */
+#define LOONGARCH_CSR_DB6CTRL          0x342   /* data breakpoint 6 control */
 #define LOONGARCH_CSR_DB6ASID          0x343   /* data breakpoint 6 asid */
 
 #define LOONGARCH_CSR_DB7ADDR          0x348   /* data breakpoint 7 address */
 #define LOONGARCH_CSR_DB7MASK          0x349   /* data breakpoint 7 mask */
-#define LOONGARCH_CSR_DB7CT          0x34a   /* data breakpoint 7 control */
+#define LOONGARCH_CSR_DB7CTRL          0x34a   /* data breakpoint 7 control */
 #define LOONGARCH_CSR_DB7ASID          0x34b   /* data breakpoint 7 asid */
 
 #define LOONGARCH_CSR_FWPC             0x380   /* instruction breakpoint config */
@@ -1013,48 +1013,51 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
 
 #define LOONGARCH_CSR_IB0ADDR          0x390   /* inst breakpoint 0 address */
 #define LOONGARCH_CSR_IB0MASK          0x391   /* inst breakpoint 0 mask */
-#define LOONGARCH_CSR_IB0CT          0x392   /* inst breakpoint 0 control */
+#define LOONGARCH_CSR_IB0CTRL          0x392   /* inst breakpoint 0 control */
 #define LOONGARCH_CSR_IB0ASID          0x393   /* inst breakpoint 0 asid */
 
 #define LOONGARCH_CSR_IB1ADDR          0x398   /* inst breakpoint 1 address */
 #define LOONGARCH_CSR_IB1MASK          0x399   /* inst breakpoint 1 mask */
-#define LOONGARCH_CSR_IB1CT          0x39a   /* inst breakpoint 1 control */
+#define LOONGARCH_CSR_IB1CTRL          0x39a   /* inst breakpoint 1 control */
 #define LOONGARCH_CSR_IB1ASID          0x39b   /* inst breakpoint 1 asid */
 
 #define LOONGARCH_CSR_IB2ADDR          0x3a0   /* inst breakpoint 2 address */
 #define LOONGARCH_CSR_IB2MASK          0x3a1   /* inst breakpoint 2 mask */
-#define LOONGARCH_CSR_IB2CT          0x3a2   /* inst breakpoint 2 control */
+#define LOONGARCH_CSR_IB2CTRL          0x3a2   /* inst breakpoint 2 control */
 #define LOONGARCH_CSR_IB2ASID          0x3a3   /* inst breakpoint 2 asid */
 
 #define LOONGARCH_CSR_IB3ADDR          0x3a8   /* inst breakpoint 3 address */
 #define LOONGARCH_CSR_IB3MASK          0x3a9   /* breakpoint 3 mask */
-#define LOONGARCH_CSR_IB3CT          0x3aa   /* inst breakpoint 3 control */
+#define LOONGARCH_CSR_IB3CTRL          0x3aa   /* inst breakpoint 3 control */
 #define LOONGARCH_CSR_IB3ASID          0x3ab   /* inst breakpoint 3 asid */
 
 #define LOONGARCH_CSR_IB4ADDR          0x3b0   /* inst breakpoint 4 address */
 #define LOONGARCH_CSR_IB4MASK          0x3b1   /* inst breakpoint 4 mask */
-#define LOONGARCH_CSR_IB4CT          0x3b2   /* inst breakpoint 4 control */
+#define LOONGARCH_CSR_IB4CTRL          0x3b2   /* inst breakpoint 4 control */
 #define LOONGARCH_CSR_IB4ASID          0x3b3   /* inst breakpoint 4 asid */
 
 #define LOONGARCH_CSR_IB5ADDR          0x3b8   /* inst breakpoint 5 address */
 #define LOONGARCH_CSR_IB5MASK          0x3b9   /* inst breakpoint 5 mask */
-#define LOONGARCH_CSR_IB5CT          0x3ba   /* inst breakpoint 5 control */
+#define LOONGARCH_CSR_IB5CTRL          0x3ba   /* inst breakpoint 5 control */
 #define LOONGARCH_CSR_IB5ASID          0x3bb   /* inst breakpoint 5 asid */
 
 #define LOONGARCH_CSR_IB6ADDR          0x3c0   /* inst breakpoint 6 address */
 #define LOONGARCH_CSR_IB6MASK          0x3c1   /* inst breakpoint 6 mask */
-#define LOONGARCH_CSR_IB6CT          0x3c2   /* inst breakpoint 6 control */
+#define LOONGARCH_CSR_IB6CTRL          0x3c2   /* inst breakpoint 6 control */
 #define LOONGARCH_CSR_IB6ASID          0x3c3   /* inst breakpoint 6 asid */
 
 #define LOONGARCH_CSR_IB7ADDR          0x3c8   /* inst breakpoint 7 address */
 #define LOONGARCH_CSR_IB7MASK          0x3c9   /* inst breakpoint 7 mask */
-#define LOONGARCH_CSR_IB7CT          0x3ca   /* inst breakpoint 7 control */
+#define LOONGARCH_CSR_IB7CTRL          0x3ca   /* inst breakpoint 7 control */
 #define LOONGARCH_CSR_IB7ASID          0x3cb   /* inst breakpoint 7 asid */
 
 #define LOONGARCH_CSR_DEBUG            0x500   /* debug config */
 #define LOONGARCH_CSR_DERA             0x501   /* debug era */
 #define LOONGARCH_CSR_DESAVE           0x502   /* debug save */
 
+#define CSR_FWPC_SKIP_SHIFT            16
+#define CSR_FWPC_SKIP                  (_ULCAST_(1) << CSR_FWPC_SKIP_SHIFT)
+
 /*
  * CSR_ECFG IM
  */
index 7184f1dc61f2784b399b254c6618f7bde54ddb15..636e1c66398c17c7bfb930b5b225bd241f831a45 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/hw_breakpoint.h>
 #include <asm/loongarch.h>
 #include <asm/vdso/processor.h>
 #include <uapi/asm/ptrace.h>
@@ -124,13 +125,18 @@ struct thread_struct {
        /* Other stuff associated with the thread. */
        unsigned long trap_nr;
        unsigned long error_code;
+       unsigned long single_step; /* Used by PTRACE_SINGLESTEP */
        struct loongarch_vdso_info *vdso;
 
        /*
-        * FPU & vector registers, must be at last because
-        * they are conditionally copied at fork().
+        * FPU & vector registers, must be at the last of inherited
+        * context because they are conditionally copied at fork().
         */
        struct loongarch_fpu fpu FPU_ALIGN;
+
+       /* Hardware breakpoints pinned to this task. */
+       struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
+       struct perf_event *hbp_watch[LOONGARCH_MAX_WRP];
 };
 
 #define thread_saved_ra(tsk)   (tsk->thread.sched_ra)
@@ -172,6 +178,8 @@ struct thread_struct {
                .fcc            = 0,                            \
                .fpr            = {{{0,},},},                   \
        },                                                      \
+       .hbp_break              = {0},                          \
+       .hbp_watch              = {0},                          \
 }
 
 struct task_struct;
@@ -184,10 +192,6 @@ extern unsigned long               boot_option_idle_override;
  */
 extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp);
 
-static inline void flush_thread(void)
-{
-}
-
 unsigned long __get_wchan(struct task_struct *p);
 
 #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
index 59c4608de91db938a6eb805db3da8f7b68948278..d761db943335ce707e9f7059e677abcc05195a16 100644 (file)
@@ -6,6 +6,7 @@
 #define _ASM_PTRACE_H
 
 #include <asm/page.h>
+#include <asm/irqflags.h>
 #include <asm/thread_info.h>
 #include <uapi/asm/ptrace.h>
 
@@ -109,6 +110,40 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsi
 
 struct task_struct;
 
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:       pt_regs of that context
+ * @n:          function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ * Note that this chooses most probably assignment, in some case
+ * it can be incorrect.
+ * This is expected to be called from kprobes or ftrace with regs
+ * where the top of stack is the return address.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+                                                    unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+       static const unsigned int args[] = {
+               offsetof(struct pt_regs, regs[4]),
+               offsetof(struct pt_regs, regs[5]),
+               offsetof(struct pt_regs, regs[6]),
+               offsetof(struct pt_regs, regs[7]),
+               offsetof(struct pt_regs, regs[8]),
+               offsetof(struct pt_regs, regs[9]),
+               offsetof(struct pt_regs, regs[10]),
+               offsetof(struct pt_regs, regs[11]),
+       };
+
+       if (n < NR_REG_ARGUMENTS)
+               return regs_get_register(regs, args[n]);
+       else {
+               n -= NR_REG_ARGUMENTS;
+               return regs_get_kernel_stack_nth(regs, n);
+       }
+}
+
 /*
  * Does the process account for user or for system time?
  */
@@ -149,4 +184,8 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
        regs->regs[3] = val;
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#define arch_has_single_step()         (1)
+#endif
+
 #endif /* _ASM_PTRACE_H */
index 72ead58039f3e159ae13c7e066c4ee9793aa2208..be05c0e706a2e23d1e5c82859f02be62c82ce0b9 100644 (file)
@@ -21,4 +21,20 @@ extern void per_cpu_trap_init(int cpu);
 extern void set_handler(unsigned long offset, void *addr, unsigned long len);
 extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len);
 
+#ifdef CONFIG_RELOCATABLE
+
+struct rela_la_abs {
+       long offset;
+       long symvalue;
+};
+
+extern long __la_abs_begin;
+extern long __la_abs_end;
+extern long __rela_dyn_begin;
+extern long __rela_dyn_end;
+
+extern void * __init relocate_kernel(void);
+
+#endif
+
 #endif /* __SETUP_H */
index 4ca953062b5be2c2a76f5176c7e4977284981e6b..7df80e6ae9d2c8d76af305b7478bd52776ed380a 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/threads.h>
 
+#include <asm/addrspace.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-offsets.h>
        cfi_restore \reg \offset \docfi
        .endm
 
+/* Jump to the runtime virtual address. */
+       .macro JUMP_VIRT_ADDR temp1 temp2
+       li.d    \temp1, CACHE_BASE
+       pcaddi  \temp2, 0
+       or      \temp1, \temp1, \temp2
+       jirl    zero, \temp1, 0xc
+       .endm
+
        .macro BACKUP_T0T1
        csrwr   t0, EXCEPTION_KS0
        csrwr   t1, EXCEPTION_KS1
@@ -77,7 +86,7 @@
  * new value in sp.
  */
        .macro  get_saved_sp docfi=0
-       la.abs    t1, kernelsp
+       la_abs    t1, kernelsp
 #ifdef CONFIG_SMP
        csrrd     t0, PERCPU_BASE_KS
        LONG_ADD  t1, t1, t0
@@ -90,7 +99,7 @@
        .endm
 
        .macro  set_saved_sp stackp temp temp2
-       la.abs    \temp, kernelsp
+       la.pcrel  \temp, kernelsp
 #ifdef CONFIG_SMP
        LONG_ADD  \temp, \temp, u0
 #endif
index 43a5ab162d38b917781a14acf3e3da92411d8a50..24e3094bebab166c046264280ef9b5929b350377 100644 (file)
@@ -34,6 +34,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)                                            \
 do {                                                                           \
        lose_fpu_inatomic(1, prev);                                             \
+       hw_breakpoint_thread_switch(next);                                      \
        (last) = __switch_to(prev, next, task_thread_info(next),                \
                 __builtin_return_address(0), __builtin_frame_address(0));      \
 } while (0)
index 255899d4a7c36a73ad9802b6f71c4bf75a0ae9f6..0d22991ae430d4598743743a07ce6f925a01d4e6 100644 (file)
@@ -22,7 +22,6 @@
 extern u64 __ua_limit;
 
 #define __UA_ADDR      ".dword"
-#define __UA_LA                "la.abs"
 #define __UA_LIMIT     __ua_limit
 
 /*
index 083193f4a5d5d8e1771872b6f90610347f8fde01..cc48ed262021244bcbcc7e0f98b63d8dc7e4b9de 100644 (file)
@@ -46,6 +46,15 @@ struct user_fp_state {
        uint32_t    fcsr;
 };
 
+struct user_watch_state {
+       uint16_t dbg_info;
+       struct {
+               uint64_t    addr;
+               uint64_t    mask;
+               uint32_t    ctrl;
+       } dbg_regs[8];
+};
+
 #define PTRACE_SYSEMU                  0x1f
 #define PTRACE_SYSEMU_SINGLESTEP       0x20
 
index c8cfbd562921d63fe2a15aa6fb8a6940ff356fd4..78d4e33843054a98be9827f464e0a666efc1ab6e 100644 (file)
@@ -8,13 +8,15 @@ extra-y               := vmlinux.lds
 obj-y          += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
                   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
                   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
-                  alternative.o unaligned.o unwind.o
+                  alternative.o unwind.o
 
 obj-$(CONFIG_ACPI)             += acpi.o
 obj-$(CONFIG_EFI)              += efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o
 
+obj-$(CONFIG_ARCH_STRICT_ALIGN)        += unaligned.o
+
 ifdef CONFIG_FUNCTION_TRACER
   ifndef CONFIG_DYNAMIC_FTRACE
     obj-y += mcount.o ftrace.o
@@ -39,6 +41,8 @@ obj-$(CONFIG_NUMA)            += numa.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)      += sysrq.o
 
+obj-$(CONFIG_RELOCATABLE)      += relocate.o
+
 obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 
@@ -46,5 +50,8 @@ obj-$(CONFIG_UNWINDER_GUESS)  += unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
+
+obj-$(CONFIG_KPROBES)          += kprobes.o kprobes_trampoline.o
 
 CPPFLAGS_vmlinux.lds           := $(KBUILD_CFLAGS)
index d53b631c90227cc4bbaf06192b90404afcc53493..d737e3cf42d3fd8ca882e08113d58d2887db32c1 100644 (file)
        .cfi_sections   .debug_frame
        .align  5
 SYM_FUNC_START(handle_syscall)
-       csrrd   t0, PERCPU_BASE_KS
-       la.abs  t1, kernelsp
-       add.d   t1, t1, t0
-       move    t2, sp
-       ld.d    sp, t1, 0
+       csrrd           t0, PERCPU_BASE_KS
+       la.pcrel        t1, kernelsp
+       add.d           t1, t1, t0
+       move            t2, sp
+       ld.d            sp, t1, 0
 
-       addi.d  sp, sp, -PT_SIZE
-       cfi_st  t2, PT_R3
+       addi.d          sp, sp, -PT_SIZE
+       cfi_st          t2, PT_R3
        cfi_rel_offset  sp, PT_R3
-       st.d    zero, sp, PT_R0
-       csrrd   t2, LOONGARCH_CSR_PRMD
-       st.d    t2, sp, PT_PRMD
-       csrrd   t2, LOONGARCH_CSR_CRMD
-       st.d    t2, sp, PT_CRMD
-       csrrd   t2, LOONGARCH_CSR_EUEN
-       st.d    t2, sp, PT_EUEN
-       csrrd   t2, LOONGARCH_CSR_ECFG
-       st.d    t2, sp, PT_ECFG
-       csrrd   t2, LOONGARCH_CSR_ESTAT
-       st.d    t2, sp, PT_ESTAT
-       cfi_st  ra, PT_R1
-       cfi_st  a0, PT_R4
-       cfi_st  a1, PT_R5
-       cfi_st  a2, PT_R6
-       cfi_st  a3, PT_R7
-       cfi_st  a4, PT_R8
-       cfi_st  a5, PT_R9
-       cfi_st  a6, PT_R10
-       cfi_st  a7, PT_R11
-       csrrd   ra, LOONGARCH_CSR_ERA
-       st.d    ra, sp, PT_ERA
+       st.d            zero, sp, PT_R0
+       csrrd           t2, LOONGARCH_CSR_PRMD
+       st.d            t2, sp, PT_PRMD
+       csrrd           t2, LOONGARCH_CSR_CRMD
+       st.d            t2, sp, PT_CRMD
+       csrrd           t2, LOONGARCH_CSR_EUEN
+       st.d            t2, sp, PT_EUEN
+       csrrd           t2, LOONGARCH_CSR_ECFG
+       st.d            t2, sp, PT_ECFG
+       csrrd           t2, LOONGARCH_CSR_ESTAT
+       st.d            t2, sp, PT_ESTAT
+       cfi_st          ra, PT_R1
+       cfi_st          a0, PT_R4
+       cfi_st          a1, PT_R5
+       cfi_st          a2, PT_R6
+       cfi_st          a3, PT_R7
+       cfi_st          a4, PT_R8
+       cfi_st          a5, PT_R9
+       cfi_st          a6, PT_R10
+       cfi_st          a7, PT_R11
+       csrrd           ra, LOONGARCH_CSR_ERA
+       st.d            ra, sp, PT_ERA
        cfi_rel_offset  ra, PT_ERA
 
-       cfi_st  tp, PT_R2
-       cfi_st  u0, PT_R21
-       cfi_st  fp, PT_R22
+       cfi_st          tp, PT_R2
+       cfi_st          u0, PT_R21
+       cfi_st          fp, PT_R22
 
        SAVE_STATIC
 
-       move    u0, t0
-       li.d    tp, ~_THREAD_MASK
-       and     tp, tp, sp
+       move            u0, t0
+       li.d            tp, ~_THREAD_MASK
+       and             tp, tp, sp
 
-       move    a0, sp
-       bl      do_syscall
+       move            a0, sp
+       bl              do_syscall
 
        RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_syscall)
+_ASM_NOKPROBE(handle_syscall)
 
 SYM_CODE_START(ret_from_fork)
-       bl      schedule_tail           # a0 = struct task_struct *prev
-       move    a0, sp
-       bl      syscall_exit_to_user_mode
+       bl              schedule_tail           # a0 = struct task_struct *prev
+       move            a0, sp
+       bl              syscall_exit_to_user_mode
        RESTORE_STATIC
        RESTORE_SOME
        RESTORE_SP_AND_RET
 SYM_CODE_END(ret_from_fork)
 
 SYM_CODE_START(ret_from_kernel_thread)
-       bl      schedule_tail           # a0 = struct task_struct *prev
-       move    a0, s1
-       jirl    ra, s0, 0
-       move    a0, sp
-       bl      syscall_exit_to_user_mode
+       bl              schedule_tail           # a0 = struct task_struct *prev
+       move            a0, s1
+       jirl            ra, s0, 0
+       move            a0, sp
+       bl              syscall_exit_to_user_mode
        RESTORE_STATIC
        RESTORE_SOME
        RESTORE_SP_AND_RET
index 0f07591cab3096b6cb620cecc56ec508c7dddc31..4a3ef8516ccc6ff92cb73f7afe2c3361bec1a124 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/ftrace.h>
+#include <linux/kprobes.h>
 #include <linux/uaccess.h>
 
 #include <asm/inst.h>
@@ -271,3 +272,66 @@ int ftrace_disable_ftrace_graph_caller(void)
 }
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+                          struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+       int bit;
+       struct pt_regs *regs;
+       struct kprobe *p;
+       struct kprobe_ctlblk *kcb;
+
+       bit = ftrace_test_recursion_trylock(ip, parent_ip);
+       if (bit < 0)
+               return;
+
+       p = get_kprobe((kprobe_opcode_t *)ip);
+       if (unlikely(!p) || kprobe_disabled(p))
+               goto out;
+
+       regs = ftrace_get_regs(fregs);
+       if (!regs)
+               goto out;
+
+       kcb = get_kprobe_ctlblk();
+       if (kprobe_running()) {
+               kprobes_inc_nmissed_count(p);
+       } else {
+               unsigned long orig_ip = instruction_pointer(regs);
+
+               instruction_pointer_set(regs, ip);
+
+               __this_cpu_write(current_kprobe, p);
+               kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+               if (!p->pre_handler || !p->pre_handler(p, regs)) {
+                       /*
+                        * Emulate singlestep (and also recover regs->csr_era)
+                        * as if there is a nop
+                        */
+                       instruction_pointer_set(regs, (unsigned long)p->addr + MCOUNT_INSN_SIZE);
+                       if (unlikely(p->post_handler)) {
+                               kcb->kprobe_status = KPROBE_HIT_SSDONE;
+                               p->post_handler(p, regs, 0);
+                       }
+                       instruction_pointer_set(regs, orig_ip);
+               }
+
+               /*
+                * If pre_handler returns !0, it changes regs->csr_era. We have to
+                * skip emulating post_handler.
+                */
+               __this_cpu_write(current_kprobe, NULL);
+       }
+out:
+       ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+       p->ainsn.insn = NULL;
+       return 0;
+}
+#endif /* CONFIG_KPROBES_ON_FTRACE */
index 7e5c293ed89f70c4872d54dfcad422673dc65112..44ff1ff6426011bb0aaf26f12d9dd62ebe182ed3 100644 (file)
@@ -34,7 +34,7 @@ SYM_FUNC_END(__arch_cpu_idle)
 SYM_FUNC_START(handle_vint)
        BACKUP_T0T1
        SAVE_ALL
-       la.abs  t1, __arch_cpu_idle
+       la_abs  t1, __arch_cpu_idle
        LONG_L  t0, sp, PT_ERA
        /* 32 byte rollback region */
        ori     t0, t0, 0x1f
@@ -43,7 +43,7 @@ SYM_FUNC_START(handle_vint)
        LONG_S  t0, sp, PT_ERA
 1:     move    a0, sp
        move    a1, sp
-       la.abs  t0, do_vint
+       la_abs  t0, do_vint
        jirl    ra, t0, 0
        RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_vint)
@@ -72,7 +72,7 @@ SYM_FUNC_END(except_vec_cex)
        SAVE_ALL
        build_prep_\prep
        move    a0, sp
-       la.abs  t0, do_\handler
+       la_abs  t0, do_\handler
        jirl    ra, t0, 0
        668:
        RESTORE_ALL_AND_RET
@@ -93,6 +93,6 @@ SYM_FUNC_END(except_vec_cex)
        BUILD_HANDLER reserved reserved none    /* others */
 
 SYM_FUNC_START(handle_sys)
-       la.abs  t0, handle_syscall
+       la_abs  t0, handle_syscall
        jr      t0
 SYM_FUNC_END(handle_sys)
index 57bada6b4e9310983a8cb3b601bbca86e465ae8c..aa64b179744f52ac4332bfe82302490e14a8210a 100644 (file)
@@ -24,7 +24,7 @@ _head:
        .org    0x8
        .dword  kernel_entry            /* Kernel entry point */
        .dword  _end - _text            /* Kernel image effective size */
-       .quad   0                       /* Kernel image load offset from start of RAM */
+       .quad   PHYS_LINK_KADDR         /* Kernel image load offset from start of RAM */
        .org    0x38                    /* 0x20 ~ 0x37 reserved */
        .long   LINUX_PE_MAGIC
        .long   pe_header - _head       /* Offset to the PE header */
@@ -50,11 +50,8 @@ SYM_CODE_START(kernel_entry)                 # kernel entry point
        li.d            t0, CSR_DMW1_INIT       # CA, PLV0, 0x9000 xxxx xxxx xxxx
        csrwr           t0, LOONGARCH_CSR_DMWIN1
 
-       /* We might not get launched at the address the kernel is linked to,
-          so we jump there.  */
-       la.abs          t0, 0f
-       jr              t0
-0:
+       JUMP_VIRT_ADDR  t0, t1
+
        /* Enable PG */
        li.w            t0, 0xb0                # PLV=0, IE=0, PG=1
        csrwr           t0, LOONGARCH_CSR_CRMD
@@ -89,6 +86,23 @@ SYM_CODE_START(kernel_entry)                 # kernel entry point
        PTR_ADD         sp, sp, tp
        set_saved_sp    sp, t0, t1
 
+#ifdef CONFIG_RELOCATABLE
+
+       bl              relocate_kernel
+
+#ifdef CONFIG_RANDOMIZE_BASE
+       /* Repoint the sp into the new kernel */
+       PTR_LI          sp, (_THREAD_SIZE - PT_SIZE)
+       PTR_ADD         sp, sp, tp
+       set_saved_sp    sp, t0, t1
+#endif
+
+       /* relocate_kernel() returns the new kernel entry point */
+       jr              a0
+       ASM_BUG()
+
+#endif
+
        bl              start_kernel
        ASM_BUG()
 
@@ -106,9 +120,8 @@ SYM_CODE_START(smpboot_entry)
        li.d            t0, CSR_DMW1_INIT       # CA, PLV0
        csrwr           t0, LOONGARCH_CSR_DMWIN1
 
-       la.abs          t0, 0f
-       jr              t0
-0:
+       JUMP_VIRT_ADDR  t0, t1
+
        /* Enable PG */
        li.w            t0, 0xb0                # PLV=0, IE=0, PG=1
        csrwr           t0, LOONGARCH_CSR_CRMD
@@ -117,7 +130,7 @@ SYM_CODE_START(smpboot_entry)
        li.w            t0, 0x00                # FPE=0, SXE=0, ASXE=0, BTE=0
        csrwr           t0, LOONGARCH_CSR_EUEN
 
-       la.abs          t0, cpuboot_data
+       la.pcrel        t0, cpuboot_data
        ld.d            sp, t0, CPU_BOOT_STACK
        ld.d            tp, t0, CPU_BOOT_TINFO
 
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
new file mode 100644 (file)
index 0000000..2406c95
--- /dev/null
@@ -0,0 +1,548 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "hw-breakpoint: " fmt
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+
+#include <asm/hw_breakpoint.h>
+
+/* Breakpoint currently in use for each BRP. */
+static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[LOONGARCH_MAX_BRP]);
+
+/* Watchpoint currently in use for each WRP. */
+static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[LOONGARCH_MAX_WRP]);
+
+int hw_breakpoint_slots(int type)
+{
+       /*
+        * We can be called early, so don't rely on
+        * our static variables being initialised.
+        */
+       switch (type) {
+       case TYPE_INST:
+               return get_num_brps();
+       case TYPE_DATA:
+               return get_num_wrps();
+       default:
+               pr_warn("unknown slot type: %d\n", type);
+               return 0;
+       }
+}
+
+#define READ_WB_REG_CASE(OFF, N, REG, T, VAL)          \
+       case (OFF + N):                                 \
+               LOONGARCH_CSR_WATCH_READ(N, REG, T, VAL);       \
+               break
+
+#define WRITE_WB_REG_CASE(OFF, N, REG, T, VAL)         \
+       case (OFF + N):                                 \
+               LOONGARCH_CSR_WATCH_WRITE(N, REG, T, VAL);      \
+               break
+
+#define GEN_READ_WB_REG_CASES(OFF, REG, T, VAL)                \
+       READ_WB_REG_CASE(OFF, 0, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 1, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 2, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 3, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 4, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 5, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 6, REG, T, VAL);          \
+       READ_WB_REG_CASE(OFF, 7, REG, T, VAL);
+
+#define GEN_WRITE_WB_REG_CASES(OFF, REG, T, VAL)       \
+       WRITE_WB_REG_CASE(OFF, 0, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 1, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 2, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 3, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 4, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 5, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 6, REG, T, VAL);         \
+       WRITE_WB_REG_CASE(OFF, 7, REG, T, VAL);
+
+static u64 read_wb_reg(int reg, int n, int t)
+{
+       u64 val = 0;
+
+       switch (reg + n) {
+       GEN_READ_WB_REG_CASES(CSR_CFG_ADDR, ADDR, t, val);
+       GEN_READ_WB_REG_CASES(CSR_CFG_MASK, MASK, t, val);
+       GEN_READ_WB_REG_CASES(CSR_CFG_CTRL, CTRL, t, val);
+       GEN_READ_WB_REG_CASES(CSR_CFG_ASID, ASID, t, val);
+       default:
+               pr_warn("Attempt to read from unknown breakpoint register %d\n", n);
+       }
+
+       return val;
+}
+NOKPROBE_SYMBOL(read_wb_reg);
+
+static void write_wb_reg(int reg, int n, int t, u64 val)
+{
+       switch (reg + n) {
+       GEN_WRITE_WB_REG_CASES(CSR_CFG_ADDR, ADDR, t, val);
+       GEN_WRITE_WB_REG_CASES(CSR_CFG_MASK, MASK, t, val);
+       GEN_WRITE_WB_REG_CASES(CSR_CFG_CTRL, CTRL, t, val);
+       GEN_WRITE_WB_REG_CASES(CSR_CFG_ASID, ASID, t, val);
+       default:
+               pr_warn("Attempt to write to unknown breakpoint register %d\n", n);
+       }
+}
+NOKPROBE_SYMBOL(write_wb_reg);
+
+enum hw_breakpoint_ops {
+       HW_BREAKPOINT_INSTALL,
+       HW_BREAKPOINT_UNINSTALL,
+};
+
+/*
+ * hw_breakpoint_slot_setup - Find and setup a perf slot according to operations
+ *
+ * @slots: pointer to array of slots
+ * @max_slots: max number of slots
+ * @bp: perf_event to setup
+ * @ops: operation to be carried out on the slot
+ *
+ * Return:
+ *     slot index on success
+ *     -ENOSPC if no slot is available/matches
+ *     -EINVAL on wrong operations parameter
+ */
+
+static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
+                                   struct perf_event *bp, enum hw_breakpoint_ops ops)
+{
+       int i;
+       struct perf_event **slot;
+
+       for (i = 0; i < max_slots; ++i) {
+               slot = &slots[i];
+               switch (ops) {
+               case HW_BREAKPOINT_INSTALL:
+                       if (!*slot) {
+                               *slot = bp;
+                               return i;
+                       }
+                       break;
+               case HW_BREAKPOINT_UNINSTALL:
+                       if (*slot == bp) {
+                               *slot = NULL;
+                               return i;
+                       }
+                       break;
+               default:
+                       pr_warn_once("Unhandled hw breakpoint ops %d\n", ops);
+                       return -EINVAL;
+               }
+       }
+
+       return -ENOSPC;
+}
+
+void ptrace_hw_copy_thread(struct task_struct *tsk)
+{
+       memset(tsk->thread.hbp_break, 0, sizeof(tsk->thread.hbp_break));
+       memset(tsk->thread.hbp_watch, 0, sizeof(tsk->thread.hbp_watch));
+}
+
+/*
+ * Unregister breakpoints from this task and reset the pointers in the thread_struct.
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+       int i;
+       struct thread_struct *t = &tsk->thread;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (t->hbp_break[i]) {
+                       unregister_hw_breakpoint(t->hbp_break[i]);
+                       t->hbp_break[i] = NULL;
+               }
+       }
+
+       for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
+               if (t->hbp_watch[i]) {
+                       unregister_hw_breakpoint(t->hbp_watch[i]);
+                       t->hbp_watch[i] = NULL;
+               }
+       }
+}
+
+static int hw_breakpoint_control(struct perf_event *bp,
+                                enum hw_breakpoint_ops ops)
+{
+       u32 ctrl;
+       int i, max_slots, enable;
+       struct perf_event **slots;
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+       if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+               /* Breakpoint */
+               slots = this_cpu_ptr(bp_on_reg);
+               max_slots = boot_cpu_data.watch_ireg_count;
+       } else {
+               /* Watchpoint */
+               slots = this_cpu_ptr(wp_on_reg);
+               max_slots = boot_cpu_data.watch_dreg_count;
+       }
+
+       i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops);
+
+       if (WARN_ONCE(i < 0, "Can't find any breakpoint slot"))
+               return i;
+
+       switch (ops) {
+       case HW_BREAKPOINT_INSTALL:
+               /* Set the FWPnCFG/MWPnCFG 1~4 register. */
+               write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
+               write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
+               write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
+               write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
+               write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+               write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+               if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+                       write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+               } else {
+                       ctrl = encode_ctrl_reg(info->ctrl);
+                       write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
+                                    1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+               }
+               enable = csr_read64(LOONGARCH_CSR_CRMD);
+               csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
+               break;
+       case HW_BREAKPOINT_UNINSTALL:
+               /* Reset the FWPnCFG/MWPnCFG 1~4 register. */
+               write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
+               write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
+               write_wb_reg(CSR_CFG_MASK, i, 0, 0);
+               write_wb_reg(CSR_CFG_MASK, i, 1, 0);
+               write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+               write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
+               write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+               write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+               break;
+       }
+
+       return 0;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+       return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL);
+}
+
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+       hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+       unsigned int len_in_bytes = 0;
+
+       switch (hbp_len) {
+       case LOONGARCH_BREAKPOINT_LEN_1:
+               len_in_bytes = 1;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_2:
+               len_in_bytes = 2;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_4:
+               len_in_bytes = 4;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_8:
+               len_in_bytes = 8;
+               break;
+       }
+
+       return len_in_bytes;
+}
+
+/*
+ * Check whether bp virtual address is in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
+{
+       unsigned int len;
+       unsigned long va;
+
+       va = hw->address;
+       len = get_hbp_len(hw->ctrl.len);
+
+       return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
+ * Hopefully this will disappear when ptrace can bypass the conversion
+ * to generic breakpoint descriptions.
+ */
+int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+                          int *gen_len, int *gen_type, int *offset)
+{
+       /* Type */
+       switch (ctrl.type) {
+       case LOONGARCH_BREAKPOINT_EXECUTE:
+               *gen_type = HW_BREAKPOINT_X;
+               break;
+       case LOONGARCH_BREAKPOINT_LOAD:
+               *gen_type = HW_BREAKPOINT_R;
+               break;
+       case LOONGARCH_BREAKPOINT_STORE:
+               *gen_type = HW_BREAKPOINT_W;
+               break;
+       case LOONGARCH_BREAKPOINT_LOAD | LOONGARCH_BREAKPOINT_STORE:
+               *gen_type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (!ctrl.len)
+               return -EINVAL;
+
+       *offset = __ffs(ctrl.len);
+
+       /* Len */
+       switch (ctrl.len) {
+       case LOONGARCH_BREAKPOINT_LEN_1:
+               *gen_len = HW_BREAKPOINT_LEN_1;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_2:
+               *gen_len = HW_BREAKPOINT_LEN_2;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_4:
+               *gen_len = HW_BREAKPOINT_LEN_4;
+               break;
+       case LOONGARCH_BREAKPOINT_LEN_8:
+               *gen_len = HW_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Construct an arch_hw_breakpoint from a perf_event.
+ */
+static int arch_build_bp_info(struct perf_event *bp,
+                             const struct perf_event_attr *attr,
+                             struct arch_hw_breakpoint *hw)
+{
+       /* Type */
+       switch (attr->bp_type) {
+       case HW_BREAKPOINT_X:
+               hw->ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE;
+               break;
+       case HW_BREAKPOINT_R:
+               hw->ctrl.type = LOONGARCH_BREAKPOINT_LOAD;
+               break;
+       case HW_BREAKPOINT_W:
+               hw->ctrl.type = LOONGARCH_BREAKPOINT_STORE;
+               break;
+       case HW_BREAKPOINT_RW:
+               hw->ctrl.type = LOONGARCH_BREAKPOINT_LOAD | LOONGARCH_BREAKPOINT_STORE;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* Len */
+       switch (attr->bp_len) {
+       case HW_BREAKPOINT_LEN_1:
+               hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_1;
+               break;
+       case HW_BREAKPOINT_LEN_2:
+               hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_2;
+               break;
+       case HW_BREAKPOINT_LEN_4:
+               hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_4;
+               break;
+       case HW_BREAKPOINT_LEN_8:
+               hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* Address */
+       hw->address = attr->bp_addr;
+
+       return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings.
+ */
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+                            const struct perf_event_attr *attr,
+                            struct arch_hw_breakpoint *hw)
+{
+       int ret;
+       u64 alignment_mask, offset;
+
+       /* Build the arch_hw_breakpoint. */
+       ret = arch_build_bp_info(bp, attr, hw);
+       if (ret)
+               return ret;
+
+       if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE)
+               alignment_mask = 0x7;
+       offset = hw->address & alignment_mask;
+
+       hw->address &= ~alignment_mask;
+       hw->ctrl.len <<= offset;
+
+       return 0;
+}
+
+static void update_bp_registers(struct pt_regs *regs, int enable, int type)
+{
+       u32 ctrl;
+       int i, max_slots;
+       struct perf_event **slots;
+       struct arch_hw_breakpoint *info;
+
+       switch (type) {
+       case 0:
+               slots = this_cpu_ptr(bp_on_reg);
+               max_slots = boot_cpu_data.watch_ireg_count;
+               break;
+       case 1:
+               slots = this_cpu_ptr(wp_on_reg);
+               max_slots = boot_cpu_data.watch_dreg_count;
+               break;
+       default:
+               return;
+       }
+
+       for (i = 0; i < max_slots; ++i) {
+               if (!slots[i])
+                       continue;
+
+               info = counter_arch_bp(slots[i]);
+               if (enable) {
+                       if ((info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) && (type == 0)) {
+                               write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+                               write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+                       } else {
+                               ctrl = read_wb_reg(CSR_CFG_CTRL, i, 1);
+                               if (info->ctrl.type == LOONGARCH_BREAKPOINT_LOAD)
+                                       ctrl |= 0x1 << MWPnCFG3_LoadEn;
+                               if (info->ctrl.type == LOONGARCH_BREAKPOINT_STORE)
+                                       ctrl |= 0x1 << MWPnCFG3_StoreEn;
+                               write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl);
+                       }
+                       regs->csr_prmd |= CSR_PRMD_PWE;
+               } else {
+                       if ((info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) && (type == 0)) {
+                               write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+                       } else {
+                               ctrl = read_wb_reg(CSR_CFG_CTRL, i, 1);
+                               if (info->ctrl.type == LOONGARCH_BREAKPOINT_LOAD)
+                                       ctrl &= ~0x1 << MWPnCFG3_LoadEn;
+                               if (info->ctrl.type == LOONGARCH_BREAKPOINT_STORE)
+                                       ctrl &= ~0x1 << MWPnCFG3_StoreEn;
+                               write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl);
+                       }
+                       regs->csr_prmd &= ~CSR_PRMD_PWE;
+               }
+       }
+}
+NOKPROBE_SYMBOL(update_bp_registers);
+
+/*
+ * Debug exception handlers.
+ */
+void breakpoint_handler(struct pt_regs *regs)
+{
+       int i;
+       struct perf_event *bp, **slots;
+
+       slots = this_cpu_ptr(bp_on_reg);
+
+       for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) {
+               bp = slots[i];
+               if (bp == NULL)
+                       continue;
+               perf_bp_event(bp, regs);
+       }
+       update_bp_registers(regs, 0, 0);
+}
+NOKPROBE_SYMBOL(breakpoint_handler);
+
+void watchpoint_handler(struct pt_regs *regs)
+{
+       int i;
+       struct perf_event *wp, **slots;
+
+       slots = this_cpu_ptr(wp_on_reg);
+
+       for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) {
+               wp = slots[i];
+               if (wp == NULL)
+                       continue;
+               perf_bp_event(wp, regs);
+       }
+       update_bp_registers(regs, 0, 1);
+}
+NOKPROBE_SYMBOL(watchpoint_handler);
+
+static int __init arch_hw_breakpoint_init(void)
+{
+       int cpu;
+
+       boot_cpu_data.watch_ireg_count = get_num_brps();
+       boot_cpu_data.watch_dreg_count = get_num_wrps();
+
+       pr_info("Found %d breakpoint and %d watchpoint registers.\n",
+               boot_cpu_data.watch_ireg_count, boot_cpu_data.watch_dreg_count);
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_data[cpu].watch_ireg_count = boot_cpu_data.watch_ireg_count;
+               cpu_data[cpu].watch_dreg_count = boot_cpu_data.watch_dreg_count;
+       }
+
+       return 0;
+}
+arch_initcall(arch_hw_breakpoint_init);
+
+void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+       u64 addr, mask;
+       struct pt_regs *regs = task_pt_regs(next);
+
+       if (test_tsk_thread_flag(next, TIF_SINGLESTEP)) {
+               addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
+               mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
+               if (!((regs->csr_era ^ addr) & ~mask))
+                       csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+               regs->csr_prmd |= CSR_PRMD_PWE;
+       } else {
+               /* Update breakpoints */
+               update_bp_registers(regs, 1, 0);
+               /* Update watchpoints */
+               update_bp_registers(regs, 1, 1);
+       }
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+}
+
+/*
+ * Dummy function to register with die_notifier.
+ */
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+                                   unsigned long val, void *data)
+{
+       return NOTIFY_DONE;
+}
index badc590870423433495616c47e53022e4cf96f35..258ef267cd306fd97a27f8aced0931634b518561 100644 (file)
 
 static DEFINE_RAW_SPINLOCK(patch_lock);
 
+void simu_pc(struct pt_regs *regs, union loongarch_instruction insn)
+{
+       unsigned long pc = regs->csr_era;
+       unsigned int rd = insn.reg1i20_format.rd;
+       unsigned int imm = insn.reg1i20_format.immediate;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return;
+       }
+
+       switch (insn.reg1i20_format.opcode) {
+       case pcaddi_op:
+               regs->regs[rd] = pc + sign_extend64(imm << 2, 21);
+               break;
+       case pcaddu12i_op:
+               regs->regs[rd] = pc + sign_extend64(imm << 12, 31);
+               break;
+       case pcaddu18i_op:
+               regs->regs[rd] = pc + sign_extend64(imm << 18, 37);
+               break;
+       case pcalau12i_op:
+               regs->regs[rd] = pc + sign_extend64(imm << 12, 31);
+               regs->regs[rd] &= ~((1 << 12) - 1);
+               break;
+       default:
+               pr_info("%s: unknown opcode\n", __func__);
+               return;
+       }
+
+       regs->csr_era += LOONGARCH_INSN_SIZE;
+}
+
+void simu_branch(struct pt_regs *regs, union loongarch_instruction insn)
+{
+       unsigned int imm, imm_l, imm_h, rd, rj;
+       unsigned long pc = regs->csr_era;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return;
+       }
+
+       imm_l = insn.reg0i26_format.immediate_l;
+       imm_h = insn.reg0i26_format.immediate_h;
+       switch (insn.reg0i26_format.opcode) {
+       case b_op:
+               regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 27);
+               return;
+       case bl_op:
+               regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 27);
+               regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+               return;
+       }
+
+       imm_l = insn.reg1i21_format.immediate_l;
+       imm_h = insn.reg1i21_format.immediate_h;
+       rj = insn.reg1i21_format.rj;
+       switch (insn.reg1i21_format.opcode) {
+       case beqz_op:
+               if (regs->regs[rj] == 0)
+                       regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 22);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               return;
+       case bnez_op:
+               if (regs->regs[rj] != 0)
+                       regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 22);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               return;
+       }
+
+       imm = insn.reg2i16_format.immediate;
+       rj = insn.reg2i16_format.rj;
+       rd = insn.reg2i16_format.rd;
+       switch (insn.reg2i16_format.opcode) {
+       case beq_op:
+               if (regs->regs[rj] == regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case bne_op:
+               if (regs->regs[rj] != regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case blt_op:
+               if ((long)regs->regs[rj] < (long)regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case bge_op:
+               if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case bltu_op:
+               if (regs->regs[rj] < regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case bgeu_op:
+               if (regs->regs[rj] >= regs->regs[rd])
+                       regs->csr_era = pc + sign_extend64(imm << 2, 17);
+               else
+                       regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+               break;
+       case jirl_op:
+               regs->csr_era = regs->regs[rj] + sign_extend64(imm << 2, 17);
+               regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+               break;
+       default:
+               pr_info("%s: unknown opcode\n", __func__);
+               return;
+       }
+}
+
 int larch_insn_read(void *addr, u32 *insnp)
 {
        int ret;
diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c
new file mode 100644 (file)
index 0000000..56c8c4b
--- /dev/null
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/preempt.h>
+#include <asm/break.h>
+
+static const union loongarch_instruction breakpoint_insn = {
+       .reg0i15_format = {
+               .opcode = break_op,
+               .immediate = BRK_KPROBE_BP,
+       }
+};
+
+static const union loongarch_instruction singlestep_insn = {
+       .reg0i15_format = {
+               .opcode = break_op,
+               .immediate = BRK_KPROBE_SSTEPBP,
+       }
+};
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static bool insns_not_supported(union loongarch_instruction insn)
+{
+       switch (insn.reg2i14_format.opcode) {
+       case llw_op:
+       case lld_op:
+       case scw_op:
+       case scd_op:
+               pr_notice("kprobe: ll and sc instructions are not supported\n");
+               return true;
+       }
+
+       switch (insn.reg1i21_format.opcode) {
+       case bceqz_op:
+               pr_notice("kprobe: bceqz and bcnez instructions are not supported\n");
+               return true;
+       }
+
+       return false;
+}
+NOKPROBE_SYMBOL(insns_not_supported);
+
+static bool insns_need_simulation(struct kprobe *p)
+{
+       if (is_pc_ins(&p->opcode))
+               return true;
+
+       if (is_branch_ins(&p->opcode))
+               return true;
+
+       return false;
+}
+NOKPROBE_SYMBOL(insns_need_simulation);
+
+static void arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+       if (is_pc_ins(&p->opcode))
+               simu_pc(regs, p->opcode);
+       else if (is_branch_ins(&p->opcode))
+               simu_branch(regs, p->opcode);
+}
+NOKPROBE_SYMBOL(arch_simulate_insn);
+
+static void arch_prepare_ss_slot(struct kprobe *p)
+{
+       p->ainsn.insn[0] = *p->addr;
+       p->ainsn.insn[1] = singlestep_insn;
+       p->ainsn.restore = (unsigned long)p->addr + LOONGARCH_INSN_SIZE;
+}
+NOKPROBE_SYMBOL(arch_prepare_ss_slot);
+
+static void arch_prepare_simulate(struct kprobe *p)
+{
+       p->ainsn.restore = 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_simulate);
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+       if ((unsigned long)p->addr & 0x3)
+               return -EILSEQ;
+
+       /* copy instruction */
+       p->opcode = *p->addr;
+
+       /* decode instruction */
+       if (insns_not_supported(p->opcode))
+               return -EINVAL;
+
+       if (insns_need_simulation(p)) {
+               p->ainsn.insn = NULL;
+       } else {
+               p->ainsn.insn = get_insn_slot();
+               if (!p->ainsn.insn)
+                       return -ENOMEM;
+       }
+
+       /* prepare the instruction */
+       if (p->ainsn.insn)
+               arch_prepare_ss_slot(p);
+       else
+               arch_prepare_simulate(p);
+
+       return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+/* Install breakpoint in text */
+void arch_arm_kprobe(struct kprobe *p)
+{
+       *p->addr = breakpoint_insn;
+       flush_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+/* Remove breakpoint from text */
+void arch_disarm_kprobe(struct kprobe *p)
+{
+       *p->addr = p->opcode;
+       flush_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+       if (p->ainsn.insn) {
+               free_insn_slot(p->ainsn.insn, 0);
+               p->ainsn.insn = NULL;
+       }
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+       kcb->prev_kprobe.kp = kprobe_running();
+       kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+NOKPROBE_SYMBOL(save_previous_kprobe);
+
+static void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+       __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+       kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+NOKPROBE_SYMBOL(restore_previous_kprobe);
+
+static void set_current_kprobe(struct kprobe *p)
+{
+       __this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(set_current_kprobe);
+
+/*
+ * Interrupts need to be disabled before single-step mode is set,
+ * and not reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start
+ * of out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void save_local_irqflag(struct kprobe_ctlblk *kcb,
+                              struct pt_regs *regs)
+{
+       kcb->saved_status = regs->csr_prmd;
+       regs->csr_prmd &= ~CSR_PRMD_PIE;
+}
+NOKPROBE_SYMBOL(save_local_irqflag);
+
+static void restore_local_irqflag(struct kprobe_ctlblk *kcb,
+                                 struct pt_regs *regs)
+{
+       regs->csr_prmd = kcb->saved_status;
+}
+NOKPROBE_SYMBOL(restore_local_irqflag);
+
+static void post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb,
+                               struct pt_regs *regs)
+{
+       /* return addr restore if non-branching insn */
+       if (cur->ainsn.restore != 0)
+               instruction_pointer_set(regs, cur->ainsn.restore);
+
+       /* restore back original saved kprobe variables and continue */
+       if (kcb->kprobe_status == KPROBE_REENTER) {
+               restore_previous_kprobe(kcb);
+               preempt_enable_no_resched();
+               return;
+       }
+
+       /*
+        * update the kcb status even if the cur->post_handler is
+        * not set because reset_curent_kprobe() doesn't update kcb.
+        */
+       kcb->kprobe_status = KPROBE_HIT_SSDONE;
+       if (cur->post_handler)
+               cur->post_handler(cur, regs, 0);
+
+       reset_current_kprobe();
+       preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+                            struct kprobe_ctlblk *kcb, int reenter)
+{
+       if (reenter) {
+               save_previous_kprobe(kcb);
+               set_current_kprobe(p);
+               kcb->kprobe_status = KPROBE_REENTER;
+       } else {
+               kcb->kprobe_status = KPROBE_HIT_SS;
+       }
+
+       if (p->ainsn.insn) {
+               /* IRQs and single stepping do not mix well */
+               save_local_irqflag(kcb, regs);
+               /* set ip register to prepare for single stepping */
+               regs->csr_era = (unsigned long)p->ainsn.insn;
+       } else {
+               /* simulate single steping */
+               arch_simulate_insn(p, regs);
+               /* now go for post processing */
+               post_kprobe_handler(p, kcb, regs);
+       }
+}
+NOKPROBE_SYMBOL(setup_singlestep);
+
+static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+                          struct kprobe_ctlblk *kcb)
+{
+       switch (kcb->kprobe_status) {
+       case KPROBE_HIT_SS:
+       case KPROBE_HIT_SSDONE:
+       case KPROBE_HIT_ACTIVE:
+               kprobes_inc_nmissed_count(p);
+               setup_singlestep(p, regs, kcb, 1);
+               break;
+       case KPROBE_REENTER:
+               pr_warn("Failed to recover from reentered kprobes.\n");
+               dump_kprobe(p);
+               WARN_ON_ONCE(1);
+               break;
+       default:
+               WARN_ON(1);
+               return false;
+       }
+
+       return true;
+}
+NOKPROBE_SYMBOL(reenter_kprobe);
+
+bool kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+       struct kprobe_ctlblk *kcb;
+       struct kprobe *p, *cur_kprobe;
+       kprobe_opcode_t *addr = (kprobe_opcode_t *)regs->csr_era;
+
+       /*
+        * We don't want to be preempted for the entire
+        * duration of kprobe processing.
+        */
+       preempt_disable();
+       kcb = get_kprobe_ctlblk();
+       cur_kprobe = kprobe_running();
+
+       p = get_kprobe(addr);
+       if (p) {
+               if (cur_kprobe) {
+                       if (reenter_kprobe(p, regs, kcb))
+                               return true;
+               } else {
+                       /* Probe hit */
+                       set_current_kprobe(p);
+                       kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+                       /*
+                        * If we have no pre-handler or it returned 0, we
+                        * continue with normal processing.  If we have a
+                        * pre-handler and it returned non-zero, it will
+                        * modify the execution path and no need to single
+                        * stepping. Let's just reset current kprobe and exit.
+                        *
+                        * pre_handler can hit a breakpoint and can step thru
+                        * before return.
+                        */
+                       if (!p->pre_handler || !p->pre_handler(p, regs)) {
+                               setup_singlestep(p, regs, kcb, 0);
+                       } else {
+                               reset_current_kprobe();
+                               preempt_enable_no_resched();
+                       }
+                       return true;
+               }
+       }
+
+       if (addr->word != breakpoint_insn.word) {
+               /*
+                * The breakpoint instruction was removed right
+                * after we hit it.  Another cpu has removed
+                * either a probepoint or a debugger breakpoint
+                * at this address.  In either case, no further
+                * handling of this interrupt is appropriate.
+                * Return back to original instruction, and continue.
+                */
+               regs->csr_era = (unsigned long)addr;
+               preempt_enable_no_resched();
+               return true;
+       }
+
+       preempt_enable_no_resched();
+       return false;
+}
+NOKPROBE_SYMBOL(kprobe_breakpoint_handler);
+
+bool kprobe_singlestep_handler(struct pt_regs *regs)
+{
+       struct kprobe *cur = kprobe_running();
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       unsigned long addr = instruction_pointer(regs);
+
+       if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
+           ((unsigned long)&cur->ainsn.insn[1] == addr)) {
+               restore_local_irqflag(kcb, regs);
+               post_kprobe_handler(cur, kcb, regs);
+               return true;
+       }
+
+       preempt_enable_no_resched();
+       return false;
+}
+NOKPROBE_SYMBOL(kprobe_singlestep_handler);
+
+bool kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+       struct kprobe *cur = kprobe_running();
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+       switch (kcb->kprobe_status) {
+       case KPROBE_HIT_SS:
+       case KPROBE_REENTER:
+               /*
+                * We are here because the instruction being single
+                * stepped caused a page fault. We reset the current
+                * kprobe and the ip points back to the probe address
+                * and allow the page fault handler to continue as a
+                * normal page fault.
+                */
+               regs->csr_era = (unsigned long)cur->addr;
+               WARN_ON_ONCE(!instruction_pointer(regs));
+
+               if (kcb->kprobe_status == KPROBE_REENTER) {
+                       restore_previous_kprobe(kcb);
+               } else {
+                       restore_local_irqflag(kcb, regs);
+                       reset_current_kprobe();
+               }
+               preempt_enable_no_resched();
+               break;
+       }
+       return false;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
+ * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
+ */
+int __init arch_populate_kprobe_blacklist(void)
+{
+       return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+                                        (unsigned long)__irqentry_text_end);
+}
+
+int __init arch_init_kprobes(void)
+{
+       return 0;
+}
+
+/* ASM function that handles the kretprobes must not be probed */
+NOKPROBE_SYMBOL(__kretprobe_trampoline);
+
+/* Called from __kretprobe_trampoline */
+void __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+       return (void *)kretprobe_trampoline_handler(regs, NULL);
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri,
+                           struct pt_regs *regs)
+{
+       ri->ret_addr = (kprobe_opcode_t *)regs->regs[1];
+       ri->fp = NULL;
+
+       /* Replace the return addr with trampoline addr */
+       regs->regs[1] = (unsigned long)&__kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+       return 0;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/loongarch/kernel/kprobes_trampoline.S b/arch/loongarch/kernel/kprobes_trampoline.S
new file mode 100644 (file)
index 0000000..af94b0d
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include <linux/linkage.h>
+#include <asm/stackframe.h>
+
+       .text
+
+       .macro save_all_base_regs
+       cfi_st  ra, PT_R1
+       cfi_st  tp, PT_R2
+       cfi_st  a0, PT_R4
+       cfi_st  a1, PT_R5
+       cfi_st  a2, PT_R6
+       cfi_st  a3, PT_R7
+       cfi_st  a4, PT_R8
+       cfi_st  a5, PT_R9
+       cfi_st  a6, PT_R10
+       cfi_st  a7, PT_R11
+       cfi_st  t0, PT_R12
+       cfi_st  t1, PT_R13
+       cfi_st  t2, PT_R14
+       cfi_st  t3, PT_R15
+       cfi_st  t4, PT_R16
+       cfi_st  t5, PT_R17
+       cfi_st  t6, PT_R18
+       cfi_st  t7, PT_R19
+       cfi_st  t8, PT_R20
+       cfi_st  u0, PT_R21
+       cfi_st  fp, PT_R22
+       cfi_st  s0, PT_R23
+       cfi_st  s1, PT_R24
+       cfi_st  s2, PT_R25
+       cfi_st  s3, PT_R26
+       cfi_st  s4, PT_R27
+       cfi_st  s5, PT_R28
+       cfi_st  s6, PT_R29
+       cfi_st  s7, PT_R30
+       cfi_st  s8, PT_R31
+       csrrd   t0, LOONGARCH_CSR_CRMD
+       andi    t0, t0, 0x7 /* extract bit[1:0] PLV, bit[2] IE */
+       LONG_S  t0, sp, PT_CRMD
+       .endm
+
+       .macro restore_all_base_regs
+       cfi_ld  tp, PT_R2
+       cfi_ld  a0, PT_R4
+       cfi_ld  a1, PT_R5
+       cfi_ld  a2, PT_R6
+       cfi_ld  a3, PT_R7
+       cfi_ld  a4, PT_R8
+       cfi_ld  a5, PT_R9
+       cfi_ld  a6, PT_R10
+       cfi_ld  a7, PT_R11
+       cfi_ld  t0, PT_R12
+       cfi_ld  t1, PT_R13
+       cfi_ld  t2, PT_R14
+       cfi_ld  t3, PT_R15
+       cfi_ld  t4, PT_R16
+       cfi_ld  t5, PT_R17
+       cfi_ld  t6, PT_R18
+       cfi_ld  t7, PT_R19
+       cfi_ld  t8, PT_R20
+       cfi_ld  u0, PT_R21
+       cfi_ld  fp, PT_R22
+       cfi_ld  s0, PT_R23
+       cfi_ld  s1, PT_R24
+       cfi_ld  s2, PT_R25
+       cfi_ld  s3, PT_R26
+       cfi_ld  s4, PT_R27
+       cfi_ld  s5, PT_R28
+       cfi_ld  s6, PT_R29
+       cfi_ld  s7, PT_R30
+       cfi_ld  s8, PT_R31
+       LONG_L  t0, sp, PT_CRMD
+       li.d    t1, 0x7 /* mask bit[1:0] PLV, bit[2] IE */
+       csrxchg t0, t1, LOONGARCH_CSR_CRMD
+       .endm
+
+SYM_CODE_START(__kretprobe_trampoline)
+       addi.d  sp, sp, -PT_SIZE
+       save_all_base_regs
+
+       addi.d  t0, sp, PT_SIZE
+       LONG_S  t0, sp, PT_R3
+
+       move a0, sp /* pt_regs */
+
+       bl trampoline_probe_handler
+
+       /* use the result as the return-address */
+       move ra, a0
+
+       restore_all_base_regs
+       addi.d  sp, sp, PT_SIZE
+
+       jr ra
+SYM_CODE_END(__kretprobe_trampoline)
index edfd220a3737aadad190d43a2ecaf91fdcaf44e5..fa2443c7afb23688ff4b5ca0ba8363720c981282 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -96,6 +97,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
        regs->regs[3] = sp;
 }
 
+void flush_thread(void)
+{
+       flush_ptrace_hw_breakpoint(current);
+}
+
 void exit_thread(struct task_struct *tsk)
 {
 }
@@ -181,6 +187,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
                childregs->regs[2] = tls;
 
 out:
+       ptrace_hw_copy_thread(p);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
        clear_tsk_thread_flag(p, TIF_USEDSIMD);
        clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
index dc2b82ea894cd26c1c3d662e488145d82d1fba0d..06bceae7d1040c6cfb38fe07acea7f1f765eb1a0 100644 (file)
@@ -20,7 +20,9 @@
 #include <linux/context_tracking.h>
 #include <linux/elf.h>
 #include <linux/errno.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/mm.h>
+#include <linux/nospec.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/sched.h>
@@ -29,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/seccomp.h>
+#include <linux/thread_info.h>
 #include <linux/uaccess.h>
 
 #include <asm/byteorder.h>
@@ -39,6 +42,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/reg.h>
 #include <asm/syscall.h>
 
@@ -246,6 +250,384 @@ static int cfg_set(struct task_struct *target,
        return 0;
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+/*
+ * Handle hitting a HW-breakpoint.
+ */
+static void ptrace_hbptriggered(struct perf_event *bp,
+                               struct perf_sample_data *data,
+                               struct pt_regs *regs)
+{
+       int i;
+       struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; ++i)
+               if (current->thread.hbp_break[i] == bp)
+                       break;
+
+       for (i = 0; i < LOONGARCH_MAX_WRP; ++i)
+               if (current->thread.hbp_watch[i] == bp)
+                       break;
+
+       force_sig_ptrace_errno_trap(i, (void __user *)bkpt->address);
+}
+
+static struct perf_event *ptrace_hbp_get_event(unsigned int note_type,
+                                              struct task_struct *tsk,
+                                              unsigned long idx)
+{
+       struct perf_event *bp;
+
+       switch (note_type) {
+       case NT_LOONGARCH_HW_BREAK:
+               if (idx >= LOONGARCH_MAX_BRP)
+                       return ERR_PTR(-EINVAL);
+               idx = array_index_nospec(idx, LOONGARCH_MAX_BRP);
+               bp = tsk->thread.hbp_break[idx];
+               break;
+       case NT_LOONGARCH_HW_WATCH:
+               if (idx >= LOONGARCH_MAX_WRP)
+                       return ERR_PTR(-EINVAL);
+               idx = array_index_nospec(idx, LOONGARCH_MAX_WRP);
+               bp = tsk->thread.hbp_watch[idx];
+               break;
+       }
+
+       return bp;
+}
+
+static int ptrace_hbp_set_event(unsigned int note_type,
+                               struct task_struct *tsk,
+                               unsigned long idx,
+                               struct perf_event *bp)
+{
+       switch (note_type) {
+       case NT_LOONGARCH_HW_BREAK:
+               if (idx >= LOONGARCH_MAX_BRP)
+                       return -EINVAL;
+               idx = array_index_nospec(idx, LOONGARCH_MAX_BRP);
+               tsk->thread.hbp_break[idx] = bp;
+               break;
+       case NT_LOONGARCH_HW_WATCH:
+               if (idx >= LOONGARCH_MAX_WRP)
+                       return -EINVAL;
+               idx = array_index_nospec(idx, LOONGARCH_MAX_WRP);
+               tsk->thread.hbp_watch[idx] = bp;
+               break;
+       }
+
+       return 0;
+}
+
+static struct perf_event *ptrace_hbp_create(unsigned int note_type,
+                                           struct task_struct *tsk,
+                                           unsigned long idx)
+{
+       int err, type;
+       struct perf_event *bp;
+       struct perf_event_attr attr;
+
+       switch (note_type) {
+       case NT_LOONGARCH_HW_BREAK:
+               type = HW_BREAKPOINT_X;
+               break;
+       case NT_LOONGARCH_HW_WATCH:
+               type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+
+       ptrace_breakpoint_init(&attr);
+
+       /*
+        * Initialise fields to sane defaults
+        * (i.e. values that will pass validation).
+        */
+       attr.bp_addr    = 0;
+       attr.bp_len     = HW_BREAKPOINT_LEN_4;
+       attr.bp_type    = type;
+       attr.disabled   = 1;
+
+       bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk);
+       if (IS_ERR(bp))
+               return bp;
+
+       err = ptrace_hbp_set_event(note_type, tsk, idx, bp);
+       if (err)
+               return ERR_PTR(err);
+
+       return bp;
+}
+
+static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
+                                    struct arch_hw_breakpoint_ctrl ctrl,
+                                    struct perf_event_attr *attr)
+{
+       int err, len, type, offset;
+
+       err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
+       if (err)
+               return err;
+
+       switch (note_type) {
+       case NT_LOONGARCH_HW_BREAK:
+               if ((type & HW_BREAKPOINT_X) != type)
+                       return -EINVAL;
+               break;
+       case NT_LOONGARCH_HW_WATCH:
+               if ((type & HW_BREAKPOINT_RW) != type)
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       attr->bp_len    = len;
+       attr->bp_type   = type;
+       attr->bp_addr   += offset;
+
+       return 0;
+}
+
+static int ptrace_hbp_get_resource_info(unsigned int note_type, u16 *info)
+{
+       u8 num;
+       u16 reg = 0;
+
+       switch (note_type) {
+       case NT_LOONGARCH_HW_BREAK:
+               num = hw_breakpoint_slots(TYPE_INST);
+               break;
+       case NT_LOONGARCH_HW_WATCH:
+               num = hw_breakpoint_slots(TYPE_DATA);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       *info = reg | num;
+
+       return 0;
+}
+
+static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type,
+                                                       struct task_struct *tsk,
+                                                       unsigned long idx)
+{
+       struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+       if (!bp)
+               bp = ptrace_hbp_create(note_type, tsk, idx);
+
+       return bp;
+}
+
+static int ptrace_hbp_get_ctrl(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u32 *ctrl)
+{
+       struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       *ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0;
+
+       return 0;
+}
+
+static int ptrace_hbp_get_mask(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u64 *mask)
+{
+       struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       *mask = bp ? counter_arch_bp(bp)->mask : 0;
+
+       return 0;
+}
+
+static int ptrace_hbp_get_addr(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u64 *addr)
+{
+       struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       *addr = bp ? counter_arch_bp(bp)->address : 0;
+
+       return 0;
+}
+
+static int ptrace_hbp_set_ctrl(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u32 uctrl)
+{
+       int err;
+       struct perf_event *bp;
+       struct perf_event_attr attr;
+       struct arch_hw_breakpoint_ctrl ctrl;
+
+       bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       attr = bp->attr;
+       decode_ctrl_reg(uctrl, &ctrl);
+       err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
+       if (err)
+               return err;
+
+       return modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int ptrace_hbp_set_mask(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u64 mask)
+{
+       struct perf_event *bp;
+       struct perf_event_attr attr;
+       struct arch_hw_breakpoint *info;
+
+       bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       attr = bp->attr;
+       info = counter_arch_bp(bp);
+       info->mask = mask;
+
+       return modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int ptrace_hbp_set_addr(unsigned int note_type,
+                              struct task_struct *tsk,
+                              unsigned long idx, u64 addr)
+{
+       struct perf_event *bp;
+       struct perf_event_attr attr;
+
+       bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+       if (IS_ERR(bp))
+               return PTR_ERR(bp);
+
+       attr = bp->attr;
+       attr.bp_addr = addr;
+
+       return modify_user_hw_breakpoint(bp, &attr);
+}
+
+#define PTRACE_HBP_CTRL_SZ     sizeof(u32)
+#define PTRACE_HBP_ADDR_SZ     sizeof(u64)
+#define PTRACE_HBP_MASK_SZ     sizeof(u64)
+
+static int hw_break_get(struct task_struct *target,
+                       const struct user_regset *regset,
+                       struct membuf to)
+{
+       u16 info;
+       u32 ctrl;
+       u64 addr, mask;
+       int ret, idx = 0;
+       unsigned int note_type = regset->core_note_type;
+
+       /* Resource info */
+       ret = ptrace_hbp_get_resource_info(note_type, &info);
+       if (ret)
+               return ret;
+
+       membuf_write(&to, &info, sizeof(info));
+
+       /* (address, ctrl) registers */
+       while (to.left) {
+               ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
+               if (ret)
+                       return ret;
+
+               ret = ptrace_hbp_get_mask(note_type, target, idx, &mask);
+               if (ret)
+                       return ret;
+
+               ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl);
+               if (ret)
+                       return ret;
+
+               membuf_store(&to, addr);
+               membuf_store(&to, mask);
+               membuf_store(&to, ctrl);
+               idx++;
+       }
+
+       return 0;
+}
+
+static int hw_break_set(struct task_struct *target,
+                       const struct user_regset *regset,
+                       unsigned int pos, unsigned int count,
+                       const void *kbuf, const void __user *ubuf)
+{
+       u32 ctrl;
+       u64 addr, mask;
+       int ret, idx = 0, offset, limit;
+       unsigned int note_type = regset->core_note_type;
+
+       /* Resource info */
+       offset = offsetof(struct user_watch_state, dbg_regs);
+       user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
+
+       /* (address, ctrl) registers */
+       limit = regset->n * regset->size;
+       while (count && offset < limit) {
+               if (count < PTRACE_HBP_ADDR_SZ)
+                       return -EINVAL;
+
+               ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr,
+                                        offset, offset + PTRACE_HBP_ADDR_SZ);
+               if (ret)
+                       return ret;
+
+               ret = ptrace_hbp_set_addr(note_type, target, idx, addr);
+               if (ret)
+                       return ret;
+               offset += PTRACE_HBP_ADDR_SZ;
+
+               if (!count)
+                       break;
+
+               ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
+                                        offset, offset + PTRACE_HBP_ADDR_SZ);
+               if (ret)
+                       return ret;
+
+               ret = ptrace_hbp_set_mask(note_type, target, idx, mask);
+               if (ret)
+                       return ret;
+               offset += PTRACE_HBP_MASK_SZ;
+
+               ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
+                                        offset, offset + PTRACE_HBP_MASK_SZ);
+               if (ret)
+                       return ret;
+
+               ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl);
+               if (ret)
+                       return ret;
+               offset += PTRACE_HBP_CTRL_SZ;
+               idx++;
+       }
+
+       return 0;
+}
+
+#endif
+
 struct pt_regs_offset {
        const char *name;
        int offset;
@@ -319,6 +701,10 @@ enum loongarch_regset {
        REGSET_GPR,
        REGSET_FPR,
        REGSET_CPUCFG,
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+       REGSET_HW_BREAK,
+       REGSET_HW_WATCH,
+#endif
 };
 
 static const struct user_regset loongarch64_regsets[] = {
@@ -346,6 +732,24 @@ static const struct user_regset loongarch64_regsets[] = {
                .regset_get     = cfg_get,
                .set            = cfg_set,
        },
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+       [REGSET_HW_BREAK] = {
+               .core_note_type = NT_LOONGARCH_HW_BREAK,
+               .n = sizeof(struct user_watch_state) / sizeof(u32),
+               .size = sizeof(u32),
+               .align = sizeof(u32),
+               .regset_get = hw_break_get,
+               .set = hw_break_set,
+       },
+       [REGSET_HW_WATCH] = {
+               .core_note_type = NT_LOONGARCH_HW_WATCH,
+               .n = sizeof(struct user_watch_state) / sizeof(u32),
+               .size = sizeof(u32),
+               .align = sizeof(u32),
+               .regset_get = hw_break_get,
+               .set = hw_break_set,
+       },
+#endif
 };
 
 static const struct user_regset_view user_loongarch64_view = {
@@ -431,3 +835,71 @@ long arch_ptrace(struct task_struct *child, long request,
 
        return ret;
 }
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void ptrace_triggered(struct perf_event *bp,
+                     struct perf_sample_data *data, struct pt_regs *regs)
+{
+       struct perf_event_attr attr;
+
+       attr = bp->attr;
+       attr.disabled = true;
+       modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int set_single_step(struct task_struct *tsk, unsigned long addr)
+{
+       struct perf_event *bp;
+       struct perf_event_attr attr;
+       struct arch_hw_breakpoint *info;
+       struct thread_struct *thread = &tsk->thread;
+
+       bp = thread->hbp_break[0];
+       if (!bp) {
+               ptrace_breakpoint_init(&attr);
+
+               attr.bp_addr = addr;
+               attr.bp_len = HW_BREAKPOINT_LEN_8;
+               attr.bp_type = HW_BREAKPOINT_X;
+
+               bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
+                                                NULL, tsk);
+               if (IS_ERR(bp))
+                       return PTR_ERR(bp);
+
+               thread->hbp_break[0] = bp;
+       } else {
+               int err;
+
+               attr = bp->attr;
+               attr.bp_addr = addr;
+
+               /* Reenable breakpoint */
+               attr.disabled = false;
+               err = modify_user_hw_breakpoint(bp, &attr);
+               if (unlikely(err))
+                       return err;
+
+               csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
+       }
+       info = counter_arch_bp(bp);
+       info->mask = TASK_SIZE - 1;
+
+       return 0;
+}
+
+/* ptrace API */
+void user_enable_single_step(struct task_struct *task)
+{
+       struct thread_info *ti = task_thread_info(task);
+
+       set_single_step(task, task_pt_regs(task)->csr_era);
+       task->thread.single_step = task_pt_regs(task)->csr_era;
+       set_ti_thread_flag(ti, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+       clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+#endif
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
new file mode 100644 (file)
index 0000000..01f94d1
--- /dev/null
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for Kernel relocation at boot time
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/panic_notifier.h>
+#include <linux/start_kernel.h>
+#include <asm/bootinfo.h>
+#include <asm/early_ioremap.h>
+#include <asm/inst.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+#define RELOCATED(x) ((void *)((long)x + reloc_offset))
+#define RELOCATED_KASLR(x) ((void *)((long)x + random_offset))
+
+static unsigned long reloc_offset;
+
+static inline void __init relocate_relative(void)
+{
+       Elf64_Rela *rela, *rela_end;
+       rela = (Elf64_Rela *)&__rela_dyn_begin;
+       rela_end = (Elf64_Rela *)&__rela_dyn_end;
+
+       for ( ; rela < rela_end; rela++) {
+               Elf64_Addr addr = rela->r_offset;
+               Elf64_Addr relocated_addr = rela->r_addend;
+
+               if (rela->r_info != R_LARCH_RELATIVE)
+                       continue;
+
+               if (relocated_addr >= VMLINUX_LOAD_ADDRESS)
+                       relocated_addr = (Elf64_Addr)RELOCATED(relocated_addr);
+
+               *(Elf64_Addr *)RELOCATED(addr) = relocated_addr;
+       }
+}
+
+static inline void __init relocate_absolute(long random_offset)
+{
+       void *begin, *end;
+       struct rela_la_abs *p;
+
+       begin = RELOCATED_KASLR(&__la_abs_begin);
+       end   = RELOCATED_KASLR(&__la_abs_end);
+
+       for (p = begin; (void *)p < end; p++) {
+               long v = p->symvalue;
+               uint32_t lu12iw, ori, lu32id, lu52id;
+               union loongarch_instruction *insn = (void *)p - p->offset;
+
+               lu12iw = (v >> 12) & 0xfffff;
+               ori    = v & 0xfff;
+               lu32id = (v >> 32) & 0xfffff;
+               lu52id = v >> 52;
+
+               insn[0].reg1i20_format.immediate = lu12iw;
+               insn[1].reg2i12_format.immediate = ori;
+               insn[2].reg1i20_format.immediate = lu32id;
+               insn[3].reg2i12_format.immediate = lu52id;
+       }
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+static inline __init unsigned long rotate_xor(unsigned long hash,
+                                             const void *area, size_t size)
+{
+       size_t i, diff;
+       const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash));
+
+       diff = (void *)ptr - area;
+       if (size < diff + sizeof(hash))
+               return hash;
+
+       size = ALIGN_DOWN(size - diff, sizeof(hash));
+
+       for (i = 0; i < size / sizeof(hash); i++) {
+               /* Rotate by odd number of bits and XOR. */
+               hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+               hash ^= ptr[i];
+       }
+
+       return hash;
+}
+
+static inline __init unsigned long get_random_boot(void)
+{
+       unsigned long hash = 0;
+       unsigned long entropy = random_get_entropy();
+
+       /* Attempt to create a simple but unpredictable starting entropy. */
+       hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+
+       /* Add in any runtime entropy we can get */
+       hash = rotate_xor(hash, &entropy, sizeof(entropy));
+
+       return hash;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+       char *str;
+       const char *builtin_cmdline = CONFIG_CMDLINE;
+
+       str = strstr(builtin_cmdline, "nokaslr");
+       if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' '))
+               return true;
+
+       str = strstr(boot_command_line, "nokaslr");
+       if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' '))
+               return true;
+
+       return false;
+}
+
+/* Choose a new address for the kernel */
+static inline void __init *determine_relocation_address(void)
+{
+       unsigned long kernel_length;
+       unsigned long random_offset;
+       void *destination = _text;
+
+       if (kaslr_disabled())
+               return destination;
+
+       kernel_length = (long)_end - (long)_text;
+
+       random_offset = get_random_boot() << 16;
+       random_offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+       if (random_offset < kernel_length)
+               random_offset += ALIGN(kernel_length, 0xffff);
+
+       return RELOCATED_KASLR(destination);
+}
+
+static inline int __init relocation_addr_valid(void *location_new)
+{
+       if ((unsigned long)location_new & 0x00000ffff)
+               return 0; /* Inappropriately aligned new location */
+
+       if ((unsigned long)location_new < (unsigned long)_end)
+               return 0; /* New location overlaps original kernel */
+
+       return 1;
+}
+#endif
+
+static inline void __init update_reloc_offset(unsigned long *addr, long random_offset)
+{
+       unsigned long *new_addr = (unsigned long *)RELOCATED_KASLR(addr);
+
+       *new_addr = (unsigned long)reloc_offset;
+}
+
+void * __init relocate_kernel(void)
+{
+       unsigned long kernel_length;
+       unsigned long random_offset = 0;
+       void *location_new = _text; /* Default to original kernel start */
+       void *kernel_entry = start_kernel; /* Default to original kernel entry point */
+       char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
+
+       strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+       location_new = determine_relocation_address();
+
+       /* Sanity check relocation address */
+       if (relocation_addr_valid(location_new))
+               random_offset = (unsigned long)location_new - (unsigned long)(_text);
+#endif
+       reloc_offset = (unsigned long)_text - VMLINUX_LOAD_ADDRESS;
+
+       if (random_offset) {
+               kernel_length = (long)(_end) - (long)(_text);
+
+               /* Copy the kernel to it's new location */
+               memcpy(location_new, _text, kernel_length);
+
+               /* Sync the caches ready for execution of new kernel */
+               __asm__ __volatile__ (
+                       "ibar 0 \t\n"
+                       "dbar 0 \t\n"
+                       ::: "memory");
+
+               reloc_offset += random_offset;
+
+               /* Return the new kernel's entry point */
+               kernel_entry = RELOCATED_KASLR(start_kernel);
+
+               /* The current thread is now within the relocated kernel */
+               __current_thread_info = RELOCATED_KASLR(__current_thread_info);
+
+               update_reloc_offset(&reloc_offset, random_offset);
+       }
+
+       if (reloc_offset)
+               relocate_relative();
+
+       relocate_absolute(random_offset);
+
+       return kernel_entry;
+}
+
+/*
+ * Show relocation information on panic.
+ */
+static void show_kernel_relocation(const char *level)
+{
+       if (reloc_offset > 0) {
+               printk(level);
+               pr_cont("Kernel relocated by 0x%lx\n", reloc_offset);
+               pr_cont(" .text @ 0x%px\n", _text);
+               pr_cont(" .data @ 0x%px\n", _sdata);
+               pr_cont(" .bss  @ 0x%px\n", __bss_start);
+       }
+}
+
+static int kernel_location_notifier_fn(struct notifier_block *self,
+                                      unsigned long v, void *p)
+{
+       show_kernel_relocation(KERN_EMERG);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block kernel_location_notifier = {
+       .notifier_call = kernel_location_notifier_fn
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &kernel_location_notifier);
+       return 0;
+}
+
+arch_initcall(register_kernel_offset_dumper);
index 4344502c0b31780677a84cd5bb117004336df712..bae84ccf6d3671c29e1849ec1542693d1ef5a06f 100644 (file)
@@ -234,11 +234,14 @@ static void __init arch_reserve_vmcore(void)
 #endif
 }
 
+/* 2MB alignment for crash kernel regions */
+#define CRASH_ALIGN    SZ_2M
+#define CRASH_ADDR_MAX SZ_4G
+
 static void __init arch_parse_crashkernel(void)
 {
 #ifdef CONFIG_KEXEC
        int ret;
-       unsigned long long start;
        unsigned long long total_mem;
        unsigned long long crash_base, crash_size;
 
@@ -247,8 +250,13 @@ static void __init arch_parse_crashkernel(void)
        if (ret < 0 || crash_size <= 0)
                return;
 
-       start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size);
-       if (start != crash_base) {
+       if (crash_base <= 0) {
+               crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX);
+               if (!crash_base) {
+                       pr_warn("crashkernel reservation failed - No suitable area found.\n");
+                       return;
+               }
+       } else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) {
                pr_warn("Invalid memory region reserved for crash kernel\n");
                return;
        }
index a6576dea590c0b26644ce3eebd392b17a1477490..4351f69d995015e471c0269e1c6c37c9d4b388e2 100644 (file)
@@ -140,16 +140,17 @@ static int get_timer_irq(void)
 
 int constant_clockevent_init(void)
 {
-       int irq;
        unsigned int cpu = smp_processor_id();
        unsigned long min_delta = 0x600;
        unsigned long max_delta = (1UL << 48) - 1;
        struct clock_event_device *cd;
-       static int timer_irq_installed = 0;
+       static int irq = 0, timer_irq_installed = 0;
 
-       irq = get_timer_irq();
-       if (irq < 0)
-               pr_err("Failed to map irq %d (timer)\n", irq);
+       if (!timer_irq_installed) {
+               irq = get_timer_irq();
+               if (irq < 0)
+                       pr_err("Failed to map irq %d (timer)\n", irq);
+       }
 
        cd = &per_cpu(constant_clockevent_device, cpu);
 
index c38a146a973b45beafc0e984bdff77631fbea0bb..de8ebe20b666cf5934751801e615f6f16ea21f4a 100644 (file)
@@ -371,9 +371,14 @@ int no_unaligned_warning __read_mostly = 1;        /* Only 1 warning by default */
 
 asmlinkage void noinstr do_ale(struct pt_regs *regs)
 {
-       unsigned int *pc;
        irqentry_state_t state = irqentry_enter(regs);
 
+#ifndef CONFIG_ARCH_STRICT_ALIGN
+       die_if_kernel("Kernel ale access", regs);
+       force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
+#else
+       unsigned int *pc;
+
        perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
 
        /*
@@ -397,8 +402,8 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
 sigbus:
        die_if_kernel("Kernel ale access", regs);
        force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
-
 out:
+#endif
        irqentry_exit(regs, state);
 }
 
@@ -432,7 +437,9 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
        unsigned long era = exception_era(regs);
        irqentry_state_t state = irqentry_enter(regs);
 
-       local_irq_enable();
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_enable();
+
        current->thread.trap_nr = read_csr_excode();
        if (__get_inst(&opcode, (u32 *)era, user))
                goto out_sigsegv;
@@ -445,14 +452,12 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
         */
        switch (bcode) {
        case BRK_KPROBE_BP:
-               if (notify_die(DIE_BREAK, "Kprobe", regs, bcode,
-                              current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP)
+               if (kprobe_breakpoint_handler(regs))
                        goto out;
                else
                        break;
        case BRK_KPROBE_SSTEPBP:
-               if (notify_die(DIE_SSTEPBP, "Kprobe_SingleStep", regs, bcode,
-                              current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP)
+               if (kprobe_singlestep_handler(regs))
                        goto out;
                else
                        break;
@@ -495,7 +500,9 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
        }
 
 out:
-       local_irq_disable();
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_disable();
+
        irqentry_exit(regs, state);
        return;
 
@@ -506,7 +513,52 @@ out_sigsegv:
 
 asmlinkage void noinstr do_watch(struct pt_regs *regs)
 {
+       irqentry_state_t state = irqentry_enter(regs);
+
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
        pr_warn("Hardware watch point handler not implemented!\n");
+#else
+       if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
+               int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
+               unsigned long pc = instruction_pointer(regs);
+               union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+               if (llbit) {
+                       /*
+                        * When the ll-sc combo is encountered, it is regarded as an single
+                        * instruction. So don't clear llbit and reset CSR.FWPS.Skip until
+                        * the llsc execution is completed.
+                        */
+                       csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+                       csr_write32(CSR_LLBCTL_KLO, LOONGARCH_CSR_LLBCTL);
+                       goto out;
+               }
+
+               if (pc == current->thread.single_step) {
+                       /*
+                        * Certain insns are occasionally not skipped when CSR.FWPS.Skip is
+                        * set, such as fld.d/fst.d. So singlestep needs to compare whether
+                        * the csr_era is equal to the value of singlestep which last time set.
+                        */
+                       if (!is_self_loop_ins(ip, regs)) {
+                               /*
+                                * Check if the given instruction the target pc is equal to the
+                                * current pc, If yes, then we should not set the CSR.FWPS.SKIP
+                                * bit to break the original instruction stream.
+                                */
+                               csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+                               goto out;
+                       }
+               }
+       } else {
+               breakpoint_handler(regs);
+               watchpoint_handler(regs);
+       }
+
+       force_sig(SIGTRAP);
+out:
+#endif
+       irqentry_exit(regs, state);
 }
 
 asmlinkage void noinstr do_ri(struct pt_regs *regs)
index 78506b31ba61babffa2238b5b891652f078ed741..0c7b041be9d8de9ef82e878fa0904c88818044f0 100644 (file)
@@ -65,10 +65,21 @@ SECTIONS
                __alt_instructions_end = .;
        }
 
+#ifdef CONFIG_RELOCATABLE
+       . = ALIGN(8);
+       .la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) {
+               __la_abs_begin = .;
+               *(.la_abs)
+               __la_abs_end = .;
+       }
+#endif
+
        .got : ALIGN(16) { *(.got) }
        .plt : ALIGN(16) { *(.plt) }
        .got.plt : ALIGN(16) { *(.got.plt) }
 
+       .data.rel : { *(.data.rel*) }
+
        . = ALIGN(PECOFF_SEGMENT_ALIGN);
        __init_begin = .;
        __inittext_begin = .;
@@ -92,8 +103,6 @@ SECTIONS
        PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT)
 #endif
 
-       .rela.dyn : ALIGN(8) { *(.rela.dyn) *(.rela*) }
-
        .init.bss : {
                *(.init.bss)
        }
@@ -106,6 +115,12 @@ SECTIONS
        RO_DATA(4096)
        RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE)
 
+       .rela.dyn : ALIGN(8) {
+               __rela_dyn_begin = .;
+                *(.rela.dyn) *(.rela*)
+               __rela_dyn_end = .;
+       }
+
        .sdata : {
                *(.sdata)
        }
@@ -132,6 +147,7 @@ SECTIONS
 
        DISCARDS
        /DISCARD/ : {
+               *(.dynamic .dynsym .dynstr .hash .gnu.hash)
                *(.gnu.attributes)
                *(.options)
                *(.eh_frame)
index 7c07d595ee89aca1428431b18727f33332576b06..3b7e1dec71094d4eadb829b65a433a7e6e7fcdca 100644 (file)
@@ -17,6 +17,7 @@ SYM_FUNC_START(memcpy)
        ALTERNATIVE     "b __memcpy_generic", \
                        "b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
+_ASM_NOKPROBE(memcpy)
 
 EXPORT_SYMBOL(memcpy)
 
@@ -41,6 +42,7 @@ SYM_FUNC_START(__memcpy_generic)
 2:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__memcpy_generic)
+_ASM_NOKPROBE(__memcpy_generic)
 
 /*
  * void *__memcpy_fast(void *dst, const void *src, size_t n)
@@ -93,3 +95,4 @@ SYM_FUNC_START(__memcpy_fast)
 3:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__memcpy_fast)
+_ASM_NOKPROBE(__memcpy_fast)
index 6ffdb46da78fdfee56ce06f73fb16e250d61e663..b796c3d6da05258aad1e40f9051dc81c937f4b17 100644 (file)
@@ -29,6 +29,7 @@ SYM_FUNC_START(memmove)
        b       rmemcpy
 4:     b       __rmemcpy_generic
 SYM_FUNC_END(memmove)
+_ASM_NOKPROBE(memmove)
 
 EXPORT_SYMBOL(memmove)
 
@@ -39,6 +40,7 @@ SYM_FUNC_START(rmemcpy)
        ALTERNATIVE     "b __rmemcpy_generic", \
                        "b __rmemcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(rmemcpy)
+_ASM_NOKPROBE(rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
@@ -64,6 +66,7 @@ SYM_FUNC_START(__rmemcpy_generic)
 2:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__rmemcpy_generic)
+_ASM_NOKPROBE(__rmemcpy_generic)
 
 /*
  * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
@@ -119,3 +122,4 @@ SYM_FUNC_START(__rmemcpy_fast)
 3:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__rmemcpy_fast)
+_ASM_NOKPROBE(__rmemcpy_fast)
index e7cb4ea3747d7ce045ad51406604fb9035da991f..a9eb732ab2adb9e132592f998717c49be542423e 100644 (file)
@@ -23,6 +23,7 @@ SYM_FUNC_START(memset)
        ALTERNATIVE     "b __memset_generic", \
                        "b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
+_ASM_NOKPROBE(memset)
 
 EXPORT_SYMBOL(memset)
 
@@ -45,6 +46,7 @@ SYM_FUNC_START(__memset_generic)
 2:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__memset_generic)
+_ASM_NOKPROBE(__memset_generic)
 
 /*
  * void *__memset_fast(void *s, int c, size_t n)
@@ -89,3 +91,4 @@ SYM_FUNC_START(__memset_fast)
 3:     move    a0, a3
        jr      ra
 SYM_FUNC_END(__memset_fast)
+_ASM_NOKPROBE(__memset_fast)
index 1ccd53655cab097f02ed09a5c1bd566de38d01ec..449087bd589d339dba1b27f66cc07f170085cff7 100644 (file)
@@ -135,6 +135,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
        struct vm_area_struct *vma = NULL;
        vm_fault_t fault;
 
+       if (kprobe_page_fault(regs, current->thread.trap_nr))
+               return;
+
        /*
         * We fault-in kernel-space virtual memory on-demand. The
         * 'reference' page table is init_mm.pgd.
index 58781c6e4191a892d9f17b8bed22dfe10d8e544b..244e2f5aeee564c75c0459569caf4f879876cbe3 100644 (file)
@@ -24,8 +24,7 @@
        move            a0, sp
        REG_S           a2, sp, PT_BVADDR
        li.w            a1, \write
-       la.abs          t0, do_page_fault
-       jirl            ra, t0, 0
+       bl              do_page_fault
        RESTORE_ALL_AND_RET
        SYM_FUNC_END(tlb_do_page_fault_\write)
        .endm
@@ -40,7 +39,7 @@ SYM_FUNC_START(handle_tlb_protect)
        move            a1, zero
        csrrd           a2, LOONGARCH_CSR_BADV
        REG_S           a2, sp, PT_BVADDR
-       la.abs          t0, do_page_fault
+       la_abs          t0, do_page_fault
        jirl            ra, t0, 0
        RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_tlb_protect)
@@ -116,7 +115,7 @@ smp_pgtable_change_load:
 
 #ifdef CONFIG_64BIT
 vmalloc_load:
-       la.abs          t1, swapper_pg_dir
+       la_abs          t1, swapper_pg_dir
        b               vmalloc_done_load
 #endif
 
@@ -187,7 +186,7 @@ tlb_huge_update_load:
 nopage_tlb_load:
        dbar            0
        csrrd           ra, EXCEPTION_KS2
-       la.abs          t0, tlb_do_page_fault_0
+       la_abs          t0, tlb_do_page_fault_0
        jr              t0
 SYM_FUNC_END(handle_tlb_load)
 
@@ -263,7 +262,7 @@ smp_pgtable_change_store:
 
 #ifdef CONFIG_64BIT
 vmalloc_store:
-       la.abs          t1, swapper_pg_dir
+       la_abs          t1, swapper_pg_dir
        b               vmalloc_done_store
 #endif
 
@@ -336,7 +335,7 @@ tlb_huge_update_store:
 nopage_tlb_store:
        dbar            0
        csrrd           ra, EXCEPTION_KS2
-       la.abs          t0, tlb_do_page_fault_1
+       la_abs          t0, tlb_do_page_fault_1
        jr              t0
 SYM_FUNC_END(handle_tlb_store)
 
@@ -411,7 +410,7 @@ smp_pgtable_change_modify:
 
 #ifdef CONFIG_64BIT
 vmalloc_modify:
-       la.abs          t1, swapper_pg_dir
+       la_abs          t1, swapper_pg_dir
        b               vmalloc_done_modify
 #endif
 
@@ -483,7 +482,7 @@ tlb_huge_update_modify:
 nopage_tlb_modify:
        dbar            0
        csrrd           ra, EXCEPTION_KS2
-       la.abs          t0, tlb_do_page_fault_1
+       la_abs          t0, tlb_do_page_fault_1
        jr              t0
 SYM_FUNC_END(handle_tlb_modify)
 
index eb2675642f9f455ff2b7aa635ccc699980e205a9..90da899c06a194989ee4c0044f7cc908131090e7 100644 (file)
@@ -78,9 +78,8 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL)
        li.d            t0, CSR_DMW1_INIT       # CA, PLV0
        csrwr           t0, LOONGARCH_CSR_DMWIN1
 
-       la.abs          t0, 0f
-       jr              t0
-0:
+       JUMP_VIRT_ADDR  t0, t1
+
        la.pcrel        t0, acpi_saved_sp
        ld.d            sp, t0, 0
        SETUP_WAKEUP
index 3a2bb2e8fdad47b44f3ff442325d3d7582eaea4b..fbff1cea62caac23c0463270f320394fabb235b2 100644 (file)
@@ -326,16 +326,16 @@ void __init setup_arch(char **cmdline_p)
                panic("No configuration setup");
        }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-       if (m68k_ramdisk.size) {
+       if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && m68k_ramdisk.size)
                memblock_reserve(m68k_ramdisk.addr, m68k_ramdisk.size);
+
+       paging_init();
+
+       if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && m68k_ramdisk.size) {
                initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
                initrd_end = initrd_start + m68k_ramdisk.size;
                pr_info("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
        }
-#endif
-
-       paging_init();
 
 #ifdef CONFIG_NATFEAT
        nf_init();
index 5c8cba0efc63e580d924d6ad667220d927dbb637..a700807c9b6d9999f188c215edb872ba1724c3d8 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/ptrace.h>
 #include <linux/kallsyms.h>
+#include <linux/extable.h>
 
 #include <asm/setup.h>
 #include <asm/fpu.h>
@@ -545,7 +546,8 @@ static inline void bus_error030 (struct frame *fp)
                        errorcode |= 2;
 
                if (mmusr & (MMU_I | MMU_WP)) {
-                       if (ssw & 4) {
+                       /* We might have an exception table for this PC */
+                       if (ssw & 4 && !search_exception_tables(fp->ptregs.pc)) {
                                pr_err("Data %s fault at %#010lx in %s (pc=%#lx)\n",
                                       ssw & RW ? "read" : "write",
                                       fp->un.fmtb.daddr,
index 4d2837eb3e2a3eea5b5523c3f9e79b8e11be580a..228128e45c673b0b6dea3ed8a4deb63afe182985 100644 (file)
@@ -138,8 +138,11 @@ good_area:
        fault = handle_mm_fault(vma, address, flags, regs);
        pr_debug("handle_mm_fault returns %x\n", fault);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return 0;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 2a375637e0077852ed86abf7b9310bc872f345b4..9113012240789469e20f7360583e861d4d3c3c65 100644 (file)
@@ -437,7 +437,7 @@ void __init paging_init(void)
        }
 
        min_addr = m68k_memory[0].addr;
-       max_addr = min_addr + m68k_memory[0].size;
+       max_addr = min_addr + m68k_memory[0].size - 1;
        memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0,
                          MEMBLOCK_NONE);
        for (i = 1; i < m68k_num_memory;) {
@@ -452,21 +452,21 @@ void __init paging_init(void)
                }
                memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i,
                                  MEMBLOCK_NONE);
-               addr = m68k_memory[i].addr + m68k_memory[i].size;
+               addr = m68k_memory[i].addr + m68k_memory[i].size - 1;
                if (addr > max_addr)
                        max_addr = addr;
                i++;
        }
        m68k_memoffset = min_addr - PAGE_OFFSET;
-       m68k_virt_to_node_shift = fls(max_addr - min_addr - 1) - 6;
+       m68k_virt_to_node_shift = fls(max_addr - min_addr) - 6;
 
        module_fixup(NULL, __start_fixup, __stop_fixup);
        flush_icache();
 
-       high_memory = phys_to_virt(max_addr);
+       high_memory = phys_to_virt(max_addr) + 1;
 
        min_low_pfn = availmem >> PAGE_SHIFT;
-       max_pfn = max_low_pfn = max_addr >> PAGE_SHIFT;
+       max_pfn = max_low_pfn = (max_addr >> PAGE_SHIFT) + 1;
 
        /* Reserve kernel text/data/bss and the memory allocated in head.S */
        memblock_reserve(m68k_memory[0].addr, availmem - m68k_memory[0].addr);
index 5c40c3ebe52f770522b7e98b3c3ca327aec1d94b..687714db6f4d0c2c9909613df5edf973a8a532c1 100644 (file)
@@ -219,8 +219,11 @@ good_area:
         */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       bad_page_fault(regs, address, SIGBUS);
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 37072e15b263403d19a940d044cb7e2ade6c8133..e2f3ca73f40d6542fc711cba3d502a06f284ad23 100644 (file)
@@ -610,7 +610,6 @@ config RALINK
        select DMA_NONCOHERENT
        select IRQ_MIPS_CPU
        select USE_OF
-       select SYS_HAS_CPU_MIPS32_R1
        select SYS_HAS_CPU_MIPS32_R2
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -1080,11 +1079,6 @@ config FW_CFE
 config ARCH_SUPPORTS_UPROBES
        bool
 
-config DMA_PERDEV_COHERENT
-       bool
-       select ARCH_HAS_SETUP_DMA_OPS
-       select DMA_NONCOHERENT
-
 config DMA_NONCOHERENT
        bool
        #
@@ -3206,6 +3200,10 @@ config CC_HAS_MNO_BRANCH_LIKELY
        def_bool y
        depends on $(cc-option,-mno-branch-likely)
 
+# https://github.com/llvm/llvm-project/issues/61045
+config CC_HAS_BROKEN_INLINE_COMPAT_BRANCH
+       def_bool y if CC_IS_CLANG
+
 menu "Power management options"
 
 config ARCH_HIBERNATION_POSSIBLE
index 290d47fbcfbb79bdadf44077a3cd37d2283076f2..7caed0d14f11a68de7f309ba83d4990ebc5e5e37 100644 (file)
                                             "250m", "270m";
                };
 
-               wdt: wdt@100 {
+               wdt: watchdog@100 {
                        compatible = "mediatek,mt7621-wdt";
                        reg = <0x100 0x100>;
+                       mediatek,sysctl = <&sysc>;
                };
 
                gpio: gpio@600 {
index 89a1511d2ee47126ca5121b1cfccd6d749ab4afb..edf9634aa8ee1d1b904252c66895ef5dbce7ec2c 100644 (file)
@@ -284,6 +284,7 @@ CONFIG_IXGB=m
 CONFIG_SKGE=m
 CONFIG_SKY2=m
 CONFIG_MYRI10GE=m
+CONFIG_FEALNX=m
 CONFIG_NATSEMI=m
 CONFIG_NS83820=m
 CONFIG_S2IO=m
index 336ac9b6523502e197e05ccfb3772d9c21733194..2e99450f42284b60168e525ee41b0a7b6c7b52ed 100644 (file)
@@ -336,7 +336,7 @@ symbol              =       value
  */
 #ifdef CONFIG_WAR_R10000_LLSC
 # define SC_BEQZ       beqzl
-#elif MIPS_ISA_REV >= 6
+#elif !defined(CONFIG_CC_HAS_BROKEN_INLINE_COMPAT_BRANCH) && MIPS_ISA_REV >= 6
 # define SC_BEQZ       beqzc
 #else
 # define SC_BEQZ       beqz
index 7e5b9411faee0794f687340aedd8e0eeb212c610..22a572b70fe31c1d9f266235b5c94a504ec210e7 100644 (file)
@@ -7,6 +7,8 @@
 #ifndef __MIPS_ASM_SMP_CPS_H__
 #define __MIPS_ASM_SMP_CPS_H__
 
+#define CPS_ENTRY_PATCH_INSNS  6
+
 #ifndef __ASSEMBLY__
 
 struct vpe_boot_config {
@@ -30,6 +32,8 @@ extern void mips_cps_boot_vpes(struct core_boot_config *cfg, unsigned vpe);
 extern void mips_cps_pm_save(void);
 extern void mips_cps_pm_restore(void);
 
+extern void *mips_cps_core_entry_patch_end;
+
 #ifdef CONFIG_MIPS_CPS
 
 extern bool mips_cps_smp_in_use(void);
index 32ec67c9ab67bbdcc14f3ed3c36792b6a17e060d..368e8475870f0810be1a707266c357c06b51ffd1 100644 (file)
@@ -200,7 +200,7 @@ int c0_compare_int_usable(void)
         */
        if (c0_compare_int_pending()) {
                cnt = read_c0_count();
-               write_c0_compare(cnt);
+               write_c0_compare(cnt - 1);
                back_to_back_c0_hazard();
                while (read_c0_count() < (cnt  + COMPARE_INT_SEEN_TICKS))
                        if (!c0_compare_int_pending())
@@ -228,7 +228,7 @@ int c0_compare_int_usable(void)
        if (!c0_compare_int_pending())
                return 0;
        cnt = read_c0_count();
-       write_c0_compare(cnt);
+       write_c0_compare(cnt - 1);
        back_to_back_c0_hazard();
        while (read_c0_count() < (cnt + COMPARE_INT_SEEN_TICKS))
                if (!c0_compare_int_pending())
index 97534324014876c65e3314776924f3fb37b7051a..8ef492da827f847ae109dd22820f62af3a357529 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/pm.h>
+#include <asm/smp-cps.h>
 
 #define GCR_CPC_BASE_OFS       0x0088
 #define GCR_CL_COHERENCE_OFS   0x2008
         nop
        .endm
 
-       /* Calculate an uncached address for the CM GCRs */
-       .macro  cmgcrb  dest
-       .set    push
-       .set    noat
-       MFC0    $1, CP0_CMGCRBASE
-       PTR_SLL $1, $1, 4
-       PTR_LI  \dest, UNCAC_BASE
-       PTR_ADDU \dest, \dest, $1
-       .set    pop
-       .endm
 
 .balign 0x1000
 
 LEAF(mips_cps_core_entry)
        /*
-        * These first 4 bytes will be patched by cps_smp_setup to load the
-        * CCA to use into register s0.
+        * These first several instructions will be patched by cps_smp_setup to load the
+        * CCA to use into register s0 and GCR base address to register s1.
         */
-       .word   0
+       .rept   CPS_ENTRY_PATCH_INSNS
+       nop
+       .endr
+
+       .global mips_cps_core_entry_patch_end
+mips_cps_core_entry_patch_end:
 
        /* Check whether we're here due to an NMI */
        mfc0    k0, CP0_STATUS
@@ -121,8 +117,7 @@ not_nmi:
        mtc0    t0, CP0_STATUS
 
        /* Skip cache & coherence setup if we're already coherent */
-       cmgcrb  v1
-       lw      s7, GCR_CL_COHERENCE_OFS(v1)
+       lw      s7, GCR_CL_COHERENCE_OFS(s1)
        bnez    s7, 1f
         nop
 
@@ -132,7 +127,7 @@ not_nmi:
 
        /* Enter the coherent domain */
        li      t0, 0xff
-       sw      t0, GCR_CL_COHERENCE_OFS(v1)
+       sw      t0, GCR_CL_COHERENCE_OFS(s1)
        ehb
 
        /* Set Kseg0 CCA to that in s0 */
@@ -305,8 +300,7 @@ LEAF(mips_cps_core_init)
  */
 LEAF(mips_cps_get_bootcfg)
        /* Calculate a pointer to this cores struct core_boot_config */
-       cmgcrb  t0
-       lw      t0, GCR_CL_ID_OFS(t0)
+       lw      t0, GCR_CL_ID_OFS(s1)
        li      t1, COREBOOTCFG_SIZE
        mul     t0, t0, t1
        PTR_LA  t1, mips_cps_core_bootcfg
@@ -366,8 +360,9 @@ LEAF(mips_cps_boot_vpes)
        has_vp  t0, 5f
 
        /* Find base address of CPC */
-       cmgcrb  t3
-       PTR_L   t1, GCR_CPC_BASE_OFS(t3)
+       PTR_LA  t1, mips_gcr_base
+       PTR_L   t1, 0(t1)
+       PTR_L   t1, GCR_CPC_BASE_OFS(t1)
        PTR_LI  t2, ~0x7fff
        and     t1, t1, t2
        PTR_LI  t2, UNCAC_BASE
index f2df0cae1b4d9f6f4d66a95d00bf316592a6c52c..4fc288bb85b96a054e33c6e0e835c60911f75936 100644 (file)
@@ -162,6 +162,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
         */
        entry_code = (u32 *)&mips_cps_core_entry;
        uasm_i_addiu(&entry_code, 16, 0, cca);
+       UASM_i_LA(&entry_code, 17, (long)mips_gcr_base);
+       BUG_ON((void *)entry_code > (void *)&mips_cps_core_entry_patch_end);
        blast_dcache_range((unsigned long)&mips_cps_core_entry,
                           (unsigned long)entry_code);
        bc_wback_inv((unsigned long)&mips_cps_core_entry,
index 06031796c87bddee3d13ddc1e1eb9dce943c09fb..83e61e147b902a99ee55f074d2c163efc091c60f 100644 (file)
@@ -54,7 +54,7 @@ choice
                select HAVE_PCI
                select PCI_DRIVERS_GENERIC
                select SOC_BUS
-               select PINCTRL_MT7621
+               select PINCTRL
 
                help
                  The MT7621 system-on-a-chip includes an 880 MHz MIPS1004Kc
index edaca0a6c1c1ca5bc467237e4235bdcd9dba1748..ca64eccea5511d223add396c5f92d486703858b0 100644 (file)
@@ -136,8 +136,11 @@ good_area:
         */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index b4762d66e9efe83de2487214053f45d2f0ebf664..6734fee3134f4fc285b575d151f2f1bf35133c7c 100644 (file)
@@ -162,8 +162,11 @@ good_area:
 
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 869204e97ec9d7926e948e3274f7b4d96eb56181..6941fdbf251738c8158e4da736d21033379a4766 100644 (file)
@@ -308,8 +308,13 @@ good_area:
 
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs)) {
+                       msg = "Page fault: fault signal on kernel memory";
+                       goto no_context;
+               }
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 2c9cdf1d8761204f18787566fb778ce77ac9931b..a6c4407d3ec835a972b78f15fb3033f7492b2d60 100644 (file)
@@ -236,7 +236,7 @@ config PPC
        select HAVE_KPROBES
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_KRETPROBES
-       select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+       select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT
        select HAVE_LIVEPATCH                   if HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI                         if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
index 1102582779599bf11e224a3d95f58f8a4ffc978a..f73c98be56c8fa4b2c2cbe202b45f60692779d1e 100644 (file)
@@ -461,6 +461,7 @@ CONFIG_MV643XX_ETH=m
 CONFIG_SKGE=m
 CONFIG_SKY2=m
 CONFIG_MYRI10GE=m
+CONFIG_FEALNX=m
 CONFIG_NATSEMI=m
 CONFIG_NS83820=m
 CONFIG_PCMCIA_AXNET=m
index f128c7cf9c1dad241316ab91cfa82fcb275a1f94..ee86753e444ea41edd24a32581e538a91d3f442f 100644 (file)
@@ -124,6 +124,7 @@ SECTIONS
                 * included with the main text sections, so put it by itself.
                 */
                *(.sfpr);
+               *(.text.asan.* .text.tsan.*)
                MEM_KEEP(init.text)
                MEM_KEEP(exit.text)
        } :text
index 73c620c2a3a166b2b5c6b4d460847b01edf27541..e753a6bd48881026339abd56fe92c3944a0eb124 100644 (file)
@@ -1275,7 +1275,7 @@ static int xmon_batch_next_cpu(void)
        while (!cpumask_empty(&xmon_batch_cpus)) {
                cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus,
                                        xmon_batch_start_cpu, true);
-               if (cpu == nr_cpumask_bits)
+               if (cpu >= nr_cpu_ids)
                        break;
                if (xmon_batch_start_cpu == -1)
                        xmon_batch_start_cpu = cpu;
diff --git a/arch/riscv/include/uapi/asm/setup.h b/arch/riscv/include/uapi/asm/setup.h
new file mode 100644 (file)
index 0000000..66b13a5
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_RISCV_SETUP_H
+#define _UAPI_ASM_RISCV_SETUP_H
+
+#define COMMAND_LINE_SIZE      1024
+
+#endif /* _UAPI_ASM_RISCV_SETUP_H */
index 986ab23fe7877d267f3527b29169341c431feca2..c42a8412547f6147d2ebb1b2fe0093b9b2e276bf 100644 (file)
@@ -40,7 +40,9 @@ SYM_FUNC_START(strcmp)
        ret
 
 /*
- * Variant of strcmp using the ZBB extension if available
+ * Variant of strcmp using the ZBB extension if available.
+ * The code was published as part of the bitmanip manual
+ * in Appendix A.
  */
 #ifdef CONFIG_RISCV_ISA_ZBB
 strcmp_zbb:
@@ -57,7 +59,7 @@ strcmp_zbb:
         *   a1 - string2
         *
         * Clobbers
-        *   t0, t1, t2, t3, t4, t5
+        *   t0, t1, t2, t3, t4
         */
 
        or      t2, a0, a1
index 8345ceeee3f659d06b46f0563a0689ee4a66de84..15bb8f3aa959e017952fe6febc92e8eff3e12886 100644 (file)
@@ -96,7 +96,7 @@ strlen_zbb:
         * of valid bytes in this chunk.
         */
        srli    a0, t1, 3
-       bgtu    t3, a0, 3f
+       bgtu    t3, a0, 2f
 
        /* Prepare for the word comparison loop. */
        addi    t2, t0, SZREG
@@ -112,20 +112,20 @@ strlen_zbb:
        addi    t0, t0, SZREG
        orc.b   t1, t1
        beq     t1, t3, 1b
-2:
+
        not     t1, t1
        CZ      t1, t1
+       srli    t1, t1, 3
 
-       /* Get number of processed words.  */
+       /* Get number of processed bytes. */
        sub     t2, t0, t2
 
        /* Add number of characters in the first word.  */
        add     a0, a0, t2
-       srli    t1, t1, 3
 
        /* Add number of characters in the last word.  */
        add     a0, a0, t1
-3:
+2:
        ret
 
 .option pop
index ee49595075bee1681b642a8ae3d2cba0b0267058..7ac2f667285abfffa141277bb698207879b2a230 100644 (file)
@@ -70,7 +70,7 @@ strncmp_zbb:
        li      t5, -1
        and     t2, t2, SZREG-1
        add     t4, a0, a2
-       bnez    t2, 4f
+       bnez    t2, 3f
 
        /* Adjust limit for fast-path.  */
        andi    t6, t4, -SZREG
@@ -78,11 +78,13 @@ strncmp_zbb:
        /* Main loop for aligned string.  */
        .p2align 3
 1:
-       bgt     a0, t6, 3f
+       bge     a0, t6, 3f
        REG_L   t0, 0(a0)
        REG_L   t1, 0(a1)
        orc.b   t3, t0
        bne     t3, t5, 2f
+       orc.b   t3, t1
+       bne     t3, t5, 2f
        addi    a0, a0, SZREG
        addi    a1, a1, SZREG
        beq     t0, t1, 1b
@@ -114,23 +116,21 @@ strncmp_zbb:
        ret
 
        /* Simple loop for misaligned strings.  */
-3:
-       /* Restore limit for slow-path.  */
        .p2align 3
-4:
-       bge     a0, t4, 6f
+3:
+       bge     a0, t4, 5f
        lbu     t0, 0(a0)
        lbu     t1, 0(a1)
        addi    a0, a0, 1
        addi    a1, a1, 1
-       bne     t0, t1, 5f
-       bnez    t0, 4b
+       bne     t0, t1, 4f
+       bnez    t0, 3b
 
-5:
+4:
        sub     a0, t0, t1
        ret
 
-6:
+5:
        li      a0, 0
        ret
 
index eb0774d9c03b1539254adbe3e692c63c2e850ab6..460f785f6e09cd8fb1441f2fff49288f5256181e 100644 (file)
@@ -326,8 +326,11 @@ good_area:
         * signal first. We do not need to release the mmap_lock because it
         * would already be released in __lock_page_or_retry in mm/filemap.c.
         */
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       no_context(regs, addr);
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 078cd1a773a335af1c1049054895289539fabb07..9809c74e1240600de4ce0465c1f70e5666029746 100644 (file)
@@ -125,8 +125,8 @@ config S390
        select ARCH_WANTS_DYNAMIC_TASK_STRUCT
        select ARCH_WANTS_NO_INSTR
        select ARCH_WANT_DEFAULT_BPF_JIT
-       select ARCH_WANT_IPC_PARSE_VERSION
        select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+       select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_TABLE_SORT
        select CLONE_BACKWARDS2
        select DMA_OPS if PCI
@@ -187,7 +187,6 @@ config S390
        select HAVE_KPROBES
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_KRETPROBES
-       select HAVE_RETHOOK
        select HAVE_KVM
        select HAVE_LIVEPATCH
        select HAVE_MEMBLOCK_PHYS_MAP
@@ -200,6 +199,7 @@ config S390
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE
+       select HAVE_RETHOOK
        select HAVE_RSEQ
        select HAVE_SAMPLE_FTRACE_DIRECT
        select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
@@ -210,9 +210,9 @@ config S390
        select HAVE_VIRT_CPU_ACCOUNTING_IDLE
        select IOMMU_HELPER             if PCI
        select IOMMU_SUPPORT            if PCI
+       select MMU_GATHER_MERGE_VMAS
        select MMU_GATHER_NO_GATHER
        select MMU_GATHER_RCU_TABLE_FREE
-       select MMU_GATHER_MERGE_VMAS
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE       if PCI
        select NEED_PER_CPU_EMBED_FIRST_CHUNK
index 47a397da0498e3b0912419c3636c2c7be46d94ca..cebd4ca16916496e513ba73d47ddac697fca97b8 100644 (file)
@@ -52,6 +52,8 @@ targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
 OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
 
+clean-files += vmlinux.map
+
 quiet_cmd_section_cmp = SECTCMP $*
 define cmd_section_cmp
        s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
@@ -71,7 +73,7 @@ $(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.b
 $(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
        $(call if_changed,section_cmp)
 
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
        $(call if_changed,ld)
 
index 57a2d6518d2725add49f1c3889842955c4307380..c699f251a4648ba6beb01ed7be789d57de75bd1f 100644 (file)
@@ -49,6 +49,19 @@ struct ap_queue_status {
        unsigned int _pad2              : 16;
 };
 
+/*
+ * AP queue status reg union to access the reg1
+ * register with the lower 32 bits comprising the
+ * ap queue status.
+ */
+union ap_queue_status_reg {
+       unsigned long value;
+       struct {
+               u32 _pad;
+               struct ap_queue_status status;
+       };
+};
+
 /**
  * ap_intructions_available() - Test if AP instructions are available.
  *
@@ -82,7 +95,7 @@ static inline bool ap_instructions_available(void)
  */
 static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
 {
-       struct ap_queue_status reg1;
+       union ap_queue_status_reg reg1;
        unsigned long reg2;
 
        asm volatile(
@@ -91,12 +104,12 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
                "       .insn   rre,0xb2af0000,0,0\n"   /* PQAP(TAPQ) */
                "       lgr     %[reg1],1\n"            /* gr1 (status) into reg1 */
                "       lgr     %[reg2],2\n"            /* gr2 into reg2 */
-               : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2)
+               : [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2)
                : [qid] "d" (qid)
                : "cc", "0", "1", "2");
        if (info)
                *info = reg2;
-       return reg1;
+       return reg1.status;
 }
 
 /**
@@ -125,16 +138,16 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
 static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
 {
        unsigned long reg0 = qid | (1UL << 24);  /* fc 1UL is RAPQ */
-       struct ap_queue_status reg1;
+       union ap_queue_status_reg reg1;
 
        asm volatile(
                "       lgr     0,%[reg0]\n"            /* qid arg into gr0 */
                "       .insn   rre,0xb2af0000,0,0\n"   /* PQAP(RAPQ) */
                "       lgr     %[reg1],1\n"            /* gr1 (status) into reg1 */
-               : [reg1] "=&d" (reg1)
+               : [reg1] "=&d" (reg1.value)
                : [reg0] "d" (reg0)
                : "cc", "0", "1");
-       return reg1;
+       return reg1.status;
 }
 
 /**
@@ -146,16 +159,16 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
 static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
 {
        unsigned long reg0 = qid | (2UL << 24);  /* fc 2UL is ZAPQ */
-       struct ap_queue_status reg1;
+       union ap_queue_status_reg reg1;
 
        asm volatile(
                "       lgr     0,%[reg0]\n"            /* qid arg into gr0 */
                "       .insn   rre,0xb2af0000,0,0\n"   /* PQAP(ZAPQ) */
                "       lgr     %[reg1],1\n"            /* gr1 (status) into reg1 */
-               : [reg1] "=&d" (reg1)
+               : [reg1] "=&d" (reg1.value)
                : [reg0] "d" (reg0)
                : "cc", "0", "1");
-       return reg1;
+       return reg1.status;
 }
 
 /**
@@ -209,18 +222,21 @@ static inline int ap_qci(struct ap_config_info *config)
  * parameter to the PQAP(AQIC) instruction. For details please
  * see the AR documentation.
  */
-struct ap_qirq_ctrl {
-       unsigned int _res1 : 8;
-       unsigned int zone  : 8; /* zone info */
-       unsigned int ir    : 1; /* ir flag: enable (1) or disable (0) irq */
-       unsigned int _res2 : 4;
-       unsigned int gisc  : 3; /* guest isc field */
-       unsigned int _res3 : 6;
-       unsigned int gf    : 2; /* gisa format */
-       unsigned int _res4 : 1;
-       unsigned int gisa  : 27;        /* gisa origin */
-       unsigned int _res5 : 1;
-       unsigned int isc   : 3; /* irq sub class */
+union ap_qirq_ctrl {
+       unsigned long value;
+       struct {
+               unsigned int       : 8;
+               unsigned int zone  : 8; /* zone info */
+               unsigned int ir    : 1; /* ir flag: enable (1) or disable (0) irq */
+               unsigned int       : 4;
+               unsigned int gisc  : 3; /* guest isc field */
+               unsigned int       : 6;
+               unsigned int gf    : 2; /* gisa format */
+               unsigned int       : 1;
+               unsigned int gisa  : 27;        /* gisa origin */
+               unsigned int       : 1;
+               unsigned int isc   : 3; /* irq sub class */
+       };
 };
 
 /**
@@ -232,21 +248,14 @@ struct ap_qirq_ctrl {
  * Returns AP queue status.
  */
 static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
-                                            struct ap_qirq_ctrl qirqctrl,
+                                            union ap_qirq_ctrl qirqctrl,
                                             phys_addr_t pa_ind)
 {
        unsigned long reg0 = qid | (3UL << 24);  /* fc 3UL is AQIC */
-       union {
-               unsigned long value;
-               struct ap_qirq_ctrl qirqctrl;
-               struct {
-                       u32 _pad;
-                       struct ap_queue_status status;
-               };
-       } reg1;
+       union ap_queue_status_reg reg1;
        unsigned long reg2 = pa_ind;
 
-       reg1.qirqctrl = qirqctrl;
+       reg1.value = qirqctrl.value;
 
        asm volatile(
                "       lgr     0,%[reg0]\n"            /* qid param into gr0 */
@@ -254,7 +263,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
                "       lgr     2,%[reg2]\n"            /* ni addr into gr2 */
                "       .insn   rre,0xb2af0000,0,0\n"   /* PQAP(AQIC) */
                "       lgr     %[reg1],1\n"            /* gr1 (status) into reg1 */
-               : [reg1] "+&d" (reg1)
+               : [reg1] "+&d" (reg1.value)
                : [reg0] "d" (reg0), [reg2] "d" (reg2)
                : "cc", "memory", "0", "1", "2");
 
@@ -291,13 +300,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
                                             union ap_qact_ap_info *apinfo)
 {
        unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
-       union {
-               unsigned long value;
-               struct {
-                       u32 _pad;
-                       struct ap_queue_status status;
-               };
-       } reg1;
+       union ap_queue_status_reg reg1;
        unsigned long reg2;
 
        reg1.value = apinfo->val;
@@ -308,7 +311,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
                "       .insn   rre,0xb2af0000,0,0\n"   /* PQAP(QACT) */
                "       lgr     %[reg1],1\n"            /* gr1 (status) into reg1 */
                "       lgr     %[reg2],2\n"            /* qact out info into reg2 */
-               : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2)
+               : [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2)
                : [reg0] "d" (reg0)
                : "cc", "0", "1", "2");
        apinfo->val = reg2;
@@ -333,7 +336,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
 {
        unsigned long reg0 = qid | 0x40000000UL;  /* 0x4... is last msg part */
        union register_pair nqap_r1, nqap_r2;
-       struct ap_queue_status reg1;
+       union ap_queue_status_reg reg1;
 
        nqap_r1.even = (unsigned int)(psmid >> 32);
        nqap_r1.odd  = psmid & 0xffffffff;
@@ -345,11 +348,11 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
                "0:     .insn   rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
                "       brc     2,0b\n"       /* handle partial completion */
                "       lgr     %[reg1],1\n"  /* gr1 (status) into reg1 */
-               : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1),
+               : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
                  [nqap_r2] "+&d" (nqap_r2.pair)
                : [nqap_r1] "d" (nqap_r1.pair)
                : "cc", "memory", "0", "1");
-       return reg1;
+       return reg1.status;
 }
 
 /**
@@ -389,7 +392,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
                                             unsigned long *resgr0)
 {
        unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL;
-       struct ap_queue_status reg1;
+       union ap_queue_status_reg reg1;
        unsigned long reg2;
        union register_pair rp1, rp2;
 
@@ -408,8 +411,9 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
                "2:     lgr     %[reg0],0\n"   /* gr0 (qid + info) into reg0 */
                "       lgr     %[reg1],1\n"   /* gr1 (status) into reg1 */
                "       lgr     %[reg2],2\n"   /* gr2 (res length) into reg2 */
-               : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2),
-                 [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair)
+               : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
+                 [reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair),
+                 [rp2] "+&d" (rp2.pair)
                :
                : "cc", "memory", "0", "1", "2");
 
@@ -421,7 +425,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
                 * Signal the caller that this dqap is only partially received
                 * with a special status response code 0xFF and *resgr0 updated
                 */
-               reg1.response_code = 0xFF;
+               reg1.status.response_code = 0xFF;
                if (resgr0)
                        *resgr0 = reg0;
        } else {
@@ -430,7 +434,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
                        *resgr0 = 0;
        }
 
-       return reg1;
+       return reg1.status;
 }
 
 /*
index af1cd3a6f4060666aee6373891b40ff4e3ece46b..227466ce9e4163cf733fac65df2677c6c6dcb53e 100644 (file)
@@ -101,9 +101,8 @@ void nmi_alloc_mcesa_early(u64 *mcesad);
 int nmi_alloc_mcesa(u64 *mcesad);
 void nmi_free_mcesa(u64 *mcesad);
 
-void s390_handle_mcck(struct pt_regs *regs);
-void __s390_handle_mcck(void);
-int s390_do_machine_check(struct pt_regs *regs);
+void s390_handle_mcck(void);
+void s390_do_machine_check(struct pt_regs *regs);
 
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h
new file mode 100644 (file)
index 0000000..91fc245
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_S390_RWONCE_H
+#define __ASM_S390_RWONCE_H
+
+#include <linux/compiler_types.h>
+
+/*
+ * Use READ_ONCE_ALIGNED_128() for 128-bit block concurrent (atomic) read
+ * accesses. Note that x must be 128-bit aligned, otherwise a specification
+ * exception is generated.
+ */
+#define READ_ONCE_ALIGNED_128(x)                       \
+({                                                     \
+       union {                                         \
+               typeof(x) __x;                          \
+               __uint128_t val;                        \
+       } __u;                                          \
+                                                       \
+       BUILD_BUG_ON(sizeof(x) != 16);                  \
+       asm volatile(                                   \
+               "       lpq     %[val],%[_x]\n"         \
+               : [val] "=d" (__u.val)                  \
+               : [_x] "QS" (x)                         \
+               : "memory");                            \
+       __u.__x;                                        \
+})
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_S390_RWONCE_H */
index 59eba19ae0f2af5bd404c5209c956da5e31d45cf..d26f02495636e66d50cc2ab086aafec9f92286cc 100644 (file)
 
 int __bootdata(is_full_image);
 
+#define decompressor_handled_param(param)                      \
+static int __init ignore_decompressor_param_##param(char *s)   \
+{                                                              \
+       return 0;                                               \
+}                                                              \
+early_param(#param, ignore_decompressor_param_##param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(noexec);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
+#endif
+
 static void __init reset_tod_clock(void)
 {
        union tod_clock clk;
index c8d8c996093674f9ffd7a2f6cf11ffa8717d6705..76a06f3d367116ad196675455aef001b7140d060 100644 (file)
@@ -562,16 +562,6 @@ ENTRY(mcck_int_handler)
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        lgr     %r2,%r11                # pass pointer to pt_regs
        brasl   %r14,s390_do_machine_check
-       cghi    %r2,0
-       je      .Lmcck_return
-       lg      %r1,__LC_KERNEL_STACK   # switch to kernel stack
-       mvc     STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-       xc      __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-       la      %r11,STACK_FRAME_OVERHEAD(%r1)
-       lgr     %r2,%r11
-       lgr     %r15,%r1
-       brasl   %r14,s390_handle_mcck
-.Lmcck_return:
        lctlg   %c1,%c1,__PT_CR1(%r11)
        lmg     %r0,%r10,__PT_R0(%r11)
        mvc     __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
index 5e713f318de3e2735a83a626c0803484d57b29a9..7b41ceecbb253d730115e4e0de25625f46cafa92 100644 (file)
@@ -278,6 +278,7 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
 {
        __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
        kcb->kprobe_status = kcb->prev_kprobe.status;
+       kcb->prev_kprobe.kp = NULL;
 }
 NOKPROBE_SYMBOL(pop_kprobe);
 
@@ -402,12 +403,11 @@ static int post_kprobe_handler(struct pt_regs *regs)
        if (!p)
                return 0;
 
+       resume_execution(p, regs);
        if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
                kcb->kprobe_status = KPROBE_HIT_SSDONE;
                p->post_handler(p, regs, 0);
        }
-
-       resume_execution(p, regs);
        pop_kprobe(kcb);
        preempt_enable_no_resched();
 
index 5dbf274719a9596937f07ca026cb546977e8a5c9..38ec0487521c42e8aed1895b3e9dc22d85fe6379 100644 (file)
@@ -156,7 +156,7 @@ NOKPROBE_SYMBOL(s390_handle_damage);
  * Main machine check handler function. Will be called with interrupts disabled
  * and machine checks enabled.
  */
-void __s390_handle_mcck(void)
+void s390_handle_mcck(void)
 {
        struct mcck_struct mcck;
 
@@ -192,23 +192,16 @@ void __s390_handle_mcck(void)
        if (mcck.stp_queue)
                stp_queue_work();
        if (mcck.kill_task) {
-               local_irq_enable();
                printk(KERN_EMERG "mcck: Terminating task because of machine "
                       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
                printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
                       current->comm, current->pid);
-               make_task_dead(SIGSEGV);
+               if (is_global_init(current))
+                       panic("mcck: Attempting to kill init!\n");
+               do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
        }
 }
 
-void noinstr s390_handle_mcck(struct pt_regs *regs)
-{
-       trace_hardirqs_off();
-       pai_kernel_enter(regs);
-       __s390_handle_mcck();
-       pai_kernel_exit(regs);
-       trace_hardirqs_on();
-}
 /*
  * returns 0 if register contents could be validated
  * returns 1 otherwise
@@ -346,8 +339,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
        struct sie_page *sie_page;
 
        /* r14 contains the sie block, which was set in sie64a */
-       struct kvm_s390_sie_block *sie_block =
-                       (struct kvm_s390_sie_block *) regs->gprs[14];
+       struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
 
        if (sie_block == NULL)
                /* Something's seriously wrong, stop system. */
@@ -374,7 +366,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info);
 /*
  * machine check handler.
  */
-int notrace s390_do_machine_check(struct pt_regs *regs)
+void notrace s390_do_machine_check(struct pt_regs *regs)
 {
        static int ipd_count;
        static DEFINE_SPINLOCK(ipd_lock);
@@ -504,16 +496,10 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
        }
        clear_cpu_flag(CIF_MCCK_GUEST);
 
-       if (user_mode(regs) && mcck_pending) {
-               irqentry_nmi_exit(regs, irq_state);
-               return 1;
-       }
-
        if (mcck_pending)
                schedule_mcck_handler();
 
        irqentry_nmi_exit(regs, irq_state);
-       return 0;
 }
 NOKPROBE_SYMBOL(s390_do_machine_check);
 
index 79904a839fb9fa5e918a01fe0804c0bbf43edb4b..e7b867e2f73f8da76687f34aad01604c9386df47 100644 (file)
@@ -1355,8 +1355,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
                num_sdb++;
 
                /* Reset trailer (using compare-double-and-swap) */
-               /* READ_ONCE() 16 byte header */
-               prev.val = __cdsg(&te->header.val, 0, 0);
+               prev.val = READ_ONCE_ALIGNED_128(te->header.val);
                do {
                        old.val = prev.val;
                        new.val = prev.val;
@@ -1558,8 +1557,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
        struct hws_trailer_entry *te;
 
        te = aux_sdb_trailer(aux, alert_index);
-       /* READ_ONCE() 16 byte header */
-       prev.val = __cdsg(&te->header.val, 0, 0);
+       prev.val = READ_ONCE_ALIGNED_128(te->header.val);
        do {
                old.val = prev.val;
                new.val = prev.val;
@@ -1637,8 +1635,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
        idx_old = idx = aux->empty_mark + 1;
        for (i = 0; i < range_scan; i++, idx++) {
                te = aux_sdb_trailer(aux, idx);
-               /* READ_ONCE() 16 byte header */
-               prev.val = __cdsg(&te->header.val, 0, 0);
+               prev.val = READ_ONCE_ALIGNED_128(te->header.val);
                do {
                        old.val = prev.val;
                        new.val = prev.val;
index 23c427284773c1323928eb6cc89f735152d1eb69..d4888453bbf8b180714e24ad9c685f26db3f3636 100644 (file)
@@ -333,6 +333,7 @@ static void pcpu_delegate(struct pcpu *pcpu,
        }
        /* Stop target cpu (if func returns this stops the current cpu). */
        pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+       pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
        /* Restart func on the target cpu and stop the current cpu. */
        if (lc) {
                lc->restart_stack = stack;
@@ -522,7 +523,7 @@ static void smp_handle_ext_call(void)
        if (test_bit(ec_call_function_single, &bits))
                generic_smp_call_function_single_interrupt();
        if (test_bit(ec_mcck_pending, &bits))
-               __s390_handle_mcck();
+               s390_handle_mcck();
        if (test_bit(ec_irq_work, &bits))
                irq_work_run();
 }
index 5060956b8e7d61181452d47d998d181ef1877f0f..1bc42ce2659908778078528a67c169c4c719f6a0 100644 (file)
@@ -289,15 +289,17 @@ segment_overlaps_others (struct dcss_segment *seg)
 
 /*
  * real segment loading function, called from segment_load
+ * Must return either an error code < 0, or the segment type code >= 0
  */
 static int
 __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
 {
        unsigned long start_addr, end_addr, dummy;
        struct dcss_segment *seg;
-       int rc, diag_cc;
+       int rc, diag_cc, segtype;
 
        start_addr = end_addr = 0;
+       segtype = -1;
        seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
        if (seg == NULL) {
                rc = -ENOMEM;
@@ -326,9 +328,9 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
        seg->res_name[8] = '\0';
        strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
        seg->res->name = seg->res_name;
-       rc = seg->vm_segtype;
-       if (rc == SEG_TYPE_SC ||
-           ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
+       segtype = seg->vm_segtype;
+       if (segtype == SEG_TYPE_SC ||
+           ((segtype == SEG_TYPE_SR || segtype == SEG_TYPE_ER) && !do_nonshared))
                seg->res->flags |= IORESOURCE_READONLY;
 
        /* Check for overlapping resources before adding the mapping. */
@@ -386,7 +388,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
  out_free:
        kfree(seg);
  out:
-       return rc;
+       return rc < 0 ? rc : segtype;
 }
 
 /*
index 27aebf1e75a2003003cffaf82c43d46b3e206357..3ef7adf739c836d8f742e131daae0c511d3060db 100644 (file)
@@ -50,6 +50,7 @@
 #define SR_FD          0x00008000
 #define SR_MD          0x40000000
 
+#define SR_USER_MASK   0x00000303      // M, Q, S, T bits
 /*
  * DSP structure and data
  */
index 90f495d35db29a7e7b139d4541eb20fe053f02ed..a6bfc6f374911d5ac639030c196bd72ebd19d614 100644 (file)
@@ -115,6 +115,7 @@ static int
 restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p)
 {
        unsigned int err = 0;
+       unsigned int sr = regs->sr & ~SR_USER_MASK;
 
 #define COPY(x)                err |= __get_user(regs->x, &sc->sc_##x)
                        COPY(regs[1]);
@@ -130,6 +131,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p
        COPY(sr);       COPY(pc);
 #undef COPY
 
+       regs->sr = (regs->sr & SR_USER_MASK) | sr;
+
 #ifdef CONFIG_SH_FPU
        if (boot_cpu_data.flags & CPU_HAS_FPU) {
                int owned_fp;
index 91259f291c54078541d59e39e89de7c7933c99ad..179295b14664a55d889844790f0048c8b29ef553 100644 (file)
@@ -187,8 +187,11 @@ good_area:
         */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!from_user)
+                       goto no_context;
                return;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index 4acc12eafbf54da90987b7e9d7445fa69576386a..d91305de694c54dce9bfcaa8ababf0620465818f 100644 (file)
@@ -424,8 +424,13 @@ good_area:
 
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (regs->tstate & TSTATE_PRIV) {
+                       insn = get_fault_insn(regs, insn);
+                       goto handle_kernel_fault;
+               }
                goto exit_exception;
+       }
 
        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
index ad4ff3b0e91e5775b0642160a0d2a258ea2f8dff..541a9b18e3435ca49e932f5767a5906b2ab9ea58 100644 (file)
@@ -25,9 +25,12 @@ config UML
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
        select HAVE_GCC_PLUGINS
+       select ARCH_SUPPORTS_LTO_CLANG
+       select ARCH_SUPPORTS_LTO_CLANG_THIN
        select TRACE_IRQFLAGS_SUPPORT
        select TTY # Needed for line.c
        select HAVE_ARCH_VMAP_STACK
+       select HAVE_RUST                        if X86_64
 
 config MMU
        bool
@@ -242,4 +245,8 @@ source "arch/um/drivers/Kconfig"
 config ARCH_SUSPEND_POSSIBLE
        def_bool y
 
+menu "Power management options"
+
 source "kernel/power/Kconfig"
+
+endmenu
index f1d4d67157be0bf568ca476aed3508b9c9fd2f8f..8186d4761bda6b7b0d8ec32e58b1093cfe4a51bb 100644 (file)
@@ -68,6 +68,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
        -Din6addr_loopback=kernel_in6addr_loopback \
        -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
 
+KBUILD_RUSTFLAGS += -Crelocation-model=pie
+
 KBUILD_AFLAGS += $(ARCH_INCLUDE)
 
 USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
@@ -139,11 +141,10 @@ ifeq ($(CONFIG_LD_IS_BFD),y)
 LDFLAGS_EXECSTACK += $(call ld-option,--no-warn-rwx-segments)
 endif
 
-LD_FLAGS_CMDLINE = $(foreach opt,$(KBUILD_LDFLAGS),-Wl,$(opt))
+LD_FLAGS_CMDLINE = $(foreach opt,$(KBUILD_LDFLAGS) $(LDFLAGS_EXECSTACK),-Wl,$(opt))
 
 # Used by link-vmlinux.sh which has special support for um link
-export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
-export LDFLAGS_vmlinux := $(LDFLAGS_EXECSTACK)
+export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) $(CC_FLAGS_LTO)
 
 # When cleaning we don't include .config, so we don't include
 # TT or skas makefiles and don't clean skas_ptregs.h.
index a4f0a19fbe14ba3668cb8bc19139b01b9e079801..36911b1fddcf00653eae02ff2c645ef4938b9a52 100644 (file)
@@ -261,6 +261,7 @@ config UML_NET_VECTOR
 config UML_NET_VDE
        bool "VDE transport (obsolete)"
        depends on UML_NET
+       depends on !MODVERSIONS
        select MAY_HAVE_RUNTIME_DEPS
        help
          This User-Mode Linux network transport allows one or more running
@@ -309,6 +310,7 @@ config UML_NET_MCAST
 config UML_NET_PCAP
        bool "pcap transport (obsolete)"
        depends on UML_NET
+       depends on !MODVERSIONS
        select MAY_HAVE_RUNTIME_DEPS
        help
          The pcap transport makes a pcap packet stream on the host look
index cfe4cb17694cc611fdb738b255421951945ad53f..25ee2c97ca21e49e500fc07da009611f256d5c5c 100644 (file)
@@ -15,7 +15,7 @@ struct pcap_init {
        char *filter;
 };
 
-void pcap_init(struct net_device *dev, void *data)
+void pcap_init_kern(struct net_device *dev, void *data)
 {
        struct uml_net_private *pri;
        struct pcap_data *ppri;
@@ -44,7 +44,7 @@ static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 }
 
 static const struct net_kern_info pcap_kern_info = {
-       .init                   = pcap_init,
+       .init                   = pcap_init_kern,
        .protocol               = eth_protocol,
        .read                   = pcap_read,
        .write                  = pcap_write,
index ded7c47d2fbe5a87da2ff550224694baa95d6053..131b7cb29576727cf116402ac526a1c3079489b4 100644 (file)
@@ -767,6 +767,7 @@ static int vector_config(char *str, char **error_out)
 
        if (parsed == NULL) {
                *error_out = "vector_config failed to parse parameters";
+               kfree(params);
                return -EINVAL;
        }
 
index 3a73d17a0161dbe342241000f7ccc4d685e38e1b..59ed5f9e6e41d0663f715baaba1de169c762c9d7 100644 (file)
@@ -68,8 +68,6 @@ struct vector_fds {
 };
 
 #define VECTOR_READ    1
-#define VECTOR_WRITE   (1 < 1)
-#define VECTOR_HEADERS (1 < 2)
 
 extern struct arglist *uml_parse_vector_ifspec(char *arg);
 
index 3ac220dafec4a506ac7c2ed993f2baa58043ba6a..7699ca5f35d48ab8fd3fc4df3dbcc162ba2ae19e 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/logic_iomem.h>
+#include <linux/of_platform.h>
 #include <linux/irqdomain.h>
 #include <linux/virtio_pcidev.h>
 #include <linux/virtio-uml.h>
@@ -39,6 +40,8 @@ struct um_pci_device {
        unsigned long status;
 
        int irq;
+
+       bool platform;
 };
 
 struct um_pci_device_reg {
@@ -48,13 +51,15 @@ struct um_pci_device_reg {
 
 static struct pci_host_bridge *bridge;
 static DEFINE_MUTEX(um_pci_mtx);
+static struct um_pci_device *um_pci_platform_device;
 static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
 static struct fwnode_handle *um_pci_fwnode;
 static struct irq_domain *um_pci_inner_domain;
 static struct irq_domain *um_pci_msi_domain;
 static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
 
-#define UM_VIRT_PCI_MAXDELAY 40000
+static unsigned int um_pci_max_delay_us = 40000;
+module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644);
 
 struct um_pci_message_buffer {
        struct virtio_pcidev_msg hdr;
@@ -132,8 +137,11 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                                out ? 1 : 0,
                                posted ? cmd : HANDLE_NO_FREE(cmd),
                                GFP_ATOMIC);
-       if (ret)
+       if (ret) {
+               if (posted)
+                       kfree(cmd);
                goto out;
+       }
 
        if (posted) {
                virtqueue_kick(dev->cmd_vq);
@@ -155,7 +163,7 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                        kfree(completed);
 
                if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
-                             ++delay_count > UM_VIRT_PCI_MAXDELAY,
+                             ++delay_count > um_pci_max_delay_us,
                              "um virt-pci delay: %d", delay_count)) {
                        ret = -EIO;
                        break;
@@ -480,6 +488,9 @@ static void um_pci_handle_irq_message(struct virtqueue *vq,
        struct virtio_device *vdev = vq->vdev;
        struct um_pci_device *dev = vdev->priv;
 
+       if (!dev->irq)
+               return;
+
        /* we should properly chain interrupts, but on ARCH=um we don't care */
 
        switch (msg->op) {
@@ -533,6 +544,25 @@ static void um_pci_irq_vq_cb(struct virtqueue *vq)
        }
 }
 
+/* Copied from arch/x86/kernel/devicetree.c */
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+       struct device_node *np;
+
+       for_each_node_by_type(np, "pci") {
+               const void *prop;
+               unsigned int bus_min;
+
+               prop = of_get_property(np, "bus-range", NULL);
+               if (!prop)
+                       continue;
+               bus_min = be32_to_cpup(prop);
+               if (bus->number == bus_min)
+                       return np;
+       }
+       return NULL;
+}
+
 static int um_pci_init_vqs(struct um_pci_device *dev)
 {
        struct virtqueue *vqs[2];
@@ -561,6 +591,55 @@ static int um_pci_init_vqs(struct um_pci_device *dev)
        return 0;
 }
 
+static void __um_pci_virtio_platform_remove(struct virtio_device *vdev,
+                                           struct um_pci_device *dev)
+{
+       virtio_reset_device(vdev);
+       vdev->config->del_vqs(vdev);
+
+       mutex_lock(&um_pci_mtx);
+       um_pci_platform_device = NULL;
+       mutex_unlock(&um_pci_mtx);
+
+       kfree(dev);
+}
+
+static int um_pci_virtio_platform_probe(struct virtio_device *vdev,
+                                       struct um_pci_device *dev)
+{
+       int ret;
+
+       dev->platform = true;
+
+       mutex_lock(&um_pci_mtx);
+
+       if (um_pci_platform_device) {
+               mutex_unlock(&um_pci_mtx);
+               ret = -EBUSY;
+               goto out_free;
+       }
+
+       ret = um_pci_init_vqs(dev);
+       if (ret) {
+               mutex_unlock(&um_pci_mtx);
+               goto out_free;
+       }
+
+       um_pci_platform_device = dev;
+
+       mutex_unlock(&um_pci_mtx);
+
+       ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
+       if (ret)
+               __um_pci_virtio_platform_remove(vdev, dev);
+
+       return ret;
+
+out_free:
+       kfree(dev);
+       return ret;
+}
+
 static int um_pci_virtio_probe(struct virtio_device *vdev)
 {
        struct um_pci_device *dev;
@@ -574,6 +653,9 @@ static int um_pci_virtio_probe(struct virtio_device *vdev)
        dev->vdev = vdev;
        vdev->priv = dev;
 
+       if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
+               return um_pci_virtio_platform_probe(vdev, dev);
+
        mutex_lock(&um_pci_mtx);
        for (i = 0; i < MAX_DEVICES; i++) {
                if (um_pci_devices[i].dev)
@@ -623,9 +705,11 @@ static void um_pci_virtio_remove(struct virtio_device *vdev)
        struct um_pci_device *dev = vdev->priv;
        int i;
 
-        /* Stop all virtqueues */
-        virtio_reset_device(vdev);
-        vdev->config->del_vqs(vdev);
+       if (dev->platform) {
+               of_platform_depopulate(&vdev->dev);
+               __um_pci_virtio_platform_remove(vdev, dev);
+               return;
+       }
 
        device_set_wakeup_enable(&vdev->dev, false);
 
@@ -633,12 +717,27 @@ static void um_pci_virtio_remove(struct virtio_device *vdev)
        for (i = 0; i < MAX_DEVICES; i++) {
                if (um_pci_devices[i].dev != dev)
                        continue;
+
                um_pci_devices[i].dev = NULL;
                irq_free_desc(dev->irq);
+
+               break;
        }
        mutex_unlock(&um_pci_mtx);
 
-       um_pci_rescan();
+       if (i < MAX_DEVICES) {
+               struct pci_dev *pci_dev;
+
+               pci_dev = pci_get_slot(bridge->bus, i);
+               if (pci_dev)
+                       pci_stop_and_remove_bus_device_locked(pci_dev);
+       }
+
+       /* Stop all virtqueues */
+       virtio_reset_device(vdev);
+       dev->cmd_vq = NULL;
+       dev->irq_vq = NULL;
+       vdev->config->del_vqs(vdev);
 
        kfree(dev);
 }
@@ -860,6 +959,30 @@ void *pci_root_bus_fwnode(struct pci_bus *bus)
        return um_pci_fwnode;
 }
 
+static long um_pci_map_platform(unsigned long offset, size_t size,
+                               const struct logic_iomem_ops **ops,
+                               void **priv)
+{
+       if (!um_pci_platform_device)
+               return -ENOENT;
+
+       *ops = &um_pci_device_bar_ops;
+       *priv = &um_pci_platform_device->resptr[0];
+
+       return 0;
+}
+
+static const struct logic_iomem_region_ops um_pci_platform_ops = {
+       .map = um_pci_map_platform,
+};
+
+static struct resource virt_platform_resource = {
+       .name = "platform",
+       .start = 0x10000000,
+       .end = 0x1fffffff,
+       .flags = IORESOURCE_MEM,
+};
+
 static int __init um_pci_init(void)
 {
        int err, i;
@@ -868,6 +991,8 @@ static int __init um_pci_init(void)
                                       &um_pci_cfgspace_ops));
        WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
                                       &um_pci_iomem_ops));
+       WARN_ON(logic_iomem_add_region(&virt_platform_resource,
+                                      &um_pci_platform_ops));
 
        if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
                 "No virtio device ID configured for PCI - no PCI support\n"))
index 588930a0ced17d8ae1026e62474e15cc41b7f881..8adca2000e519febbb2fab7a509e1792711cf405 100644 (file)
@@ -168,7 +168,8 @@ static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
        if (!vu_dev->registered)
                return;
 
-       virtio_break_device(&vu_dev->vdev);
+       vu_dev->registered = 0;
+
        schedule_work(&pdata->conn_broken_wk);
 }
 
@@ -412,7 +413,7 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
                if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
                        vhost_user_reply(vu_dev, &msg.msg, response);
                irq_rc = IRQ_HANDLED;
-       };
+       }
        /* mask EAGAIN as we try non-blocking read until socket is empty */
        vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc;
        return irq_rc;
@@ -1136,6 +1137,15 @@ void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
 
 static void vu_of_conn_broken(struct work_struct *wk)
 {
+       struct virtio_uml_platform_data *pdata;
+       struct virtio_uml_device *vu_dev;
+
+       pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+
+       vu_dev = platform_get_drvdata(pdata->pdev);
+
+       virtio_break_device(&vu_dev->vdev);
+
        /*
         * We can't remove the device from the devicetree so the only thing we
         * can do is warn.
@@ -1266,8 +1276,14 @@ static int vu_unregister_cmdline_device(struct device *dev, void *data)
 static void vu_conn_broken(struct work_struct *wk)
 {
        struct virtio_uml_platform_data *pdata;
+       struct virtio_uml_device *vu_dev;
 
        pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+
+       vu_dev = platform_get_drvdata(pdata->pdev);
+
+       virtio_break_device(&vu_dev->vdev);
+
        vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
 }
 
index bb5f06480da95237ff37a5b58394522f40d031cf..7414154b8e9aeaeb74a4ad765aaa6db1549a3510 100644 (file)
@@ -91,7 +91,7 @@ struct cpuinfo_um {
 
 extern struct cpuinfo_um boot_cpu_data;
 
-#define cpu_data (&boot_cpu_data)
+#define cpu_data(cpu)    boot_cpu_data
 #define current_cpu_data boot_cpu_data
 #define cache_line_size()      (boot_cpu_data.cache_alignment)
 
index 58938d75871af77e7c93f408653e1924a599304b..827a0d3fa589086aba0fc729e9dad2888c8f5ceb 100644 (file)
@@ -29,8 +29,8 @@ void flush_thread(void)
 
        ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
        if (ret) {
-               printk(KERN_ERR "flush_thread - clearing address space failed, "
-                      "err = %d\n", ret);
+               printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
+                      __func__, ret);
                force_sig(SIGKILL);
        }
        get_safe_registers(current_pt_regs()->regs.gp,
index ad449173a1a1cdf20f3973fdfea96539392bc343..7d050ab0f78afa0d6a5324fcd5111b28accb6269 100644 (file)
@@ -314,8 +314,8 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
        return ret;
 }
 
-void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-                     unsigned long end_addr, int force)
+static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
+                            unsigned long end_addr, int force)
 {
        pgd_t *pgd;
        struct host_vm_change hvc;
@@ -597,6 +597,8 @@ void force_flush_all(void)
        struct vm_area_struct *vma;
        VMA_ITERATOR(vmi, mm, 0);
 
+       mmap_read_lock(mm);
        for_each_vma(vmi, vma)
                fix_range(mm, vma->vm_start, vma->vm_end, 1);
+       mmap_read_unlock(mm);
 }
index 786b44dc20c98c14bdb704f7b34ae20137212eaa..8dcda617b8bf6d4734f3bbf0a6a1009aaaf896ee 100644 (file)
@@ -96,7 +96,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-       return *pos < nr_cpu_ids ? cpu_data + *pos : NULL;
+       return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
index 16e49bfa2b426c6a324b7102a877a0fb14e94a4e..53d719c04ba9424d5e782a3f560a0426456be59a 100644 (file)
@@ -1,4 +1,4 @@
-
+#define RUNTIME_DISCARD_EXIT
 KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
 
 #ifdef CONFIG_LD_SCRIPT_STATIC
index 98ea910ef87cd34b62338d2d90bb00d440f00273..cf7e49c08b210d7c0902b9e416955317f3f04584 100644 (file)
@@ -127,12 +127,10 @@ int os_mod_epoll_fd(int events, int fd, void *data)
 int os_del_epoll_fd(int fd)
 {
        struct epoll_event event;
-       int result;
        /* This is quiet as we use this as IO ON/OFF - so it is often
         * invoked on a non-existent fd
         */
-       result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
-       return result;
+       return epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
 }
 
 void os_set_ioignore(void)
index 3b4975ee67e2d6872f3d7e88a37f1a215f95e47d..953fb10f3f93183229ab9b818335b0ff9eb14848 100644 (file)
@@ -60,8 +60,8 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
                printk(UM_KERN_ERR "Registers - \n");
                for (i = 0; i < MAX_REG_NR; i++)
                        printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-               panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
-                     -n);
+               panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+                     __func__, -n);
        }
 
        err = ptrace(PTRACE_CONT, pid, 0, 0);
@@ -81,20 +81,17 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
        offset = *((unsigned long *) mm_idp->stack + 1);
        if (offset) {
                data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
-               printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, "
-                      "data = %p\n", ret, offset, data);
+               printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
+                      __func__, ret, offset, data);
                syscall = (unsigned long *)((unsigned long)data + data[0]);
-               printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, "
-                      "return value = 0x%lx, expected return value = 0x%lx\n",
-                      syscall[0], ret, syscall[7]);
-               printk(UM_KERN_ERR "    syscall parameters: "
-                      "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+               printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
+                      __func__, syscall[0], ret, syscall[7]);
+               printk(UM_KERN_ERR "    syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
                       syscall[1], syscall[2], syscall[3],
                       syscall[4], syscall[5], syscall[6]);
                for (n = 1; n < data[0]/sizeof(long); n++) {
                        if (n == 1)
-                               printk(UM_KERN_ERR "    additional syscall "
-                                      "data:");
+                               printk(UM_KERN_ERR "    additional syscall data:");
                        if (n % 4 == 1)
                                printk("\n" UM_KERN_ERR "      ");
                        printk("  0x%lx", data[n]);
index b24db6017ded6878217cb698911be2c8982c15b4..b1ea53285af1ba9f80d5dd95d568c9b35d3a7f50 100644 (file)
@@ -118,8 +118,8 @@ void wait_stub_done(int pid)
 
                err = ptrace(PTRACE_CONT, pid, 0, 0);
                if (err) {
-                       printk(UM_KERN_ERR "wait_stub_done : continue failed, "
-                              "errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s : continue failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
        }
@@ -130,11 +130,10 @@ void wait_stub_done(int pid)
 bad_wait:
        err = ptrace_dump_regs(pid);
        if (err)
-               printk(UM_KERN_ERR "Failed to get registers from stub, "
-                      "errno = %d\n", -err);
-       printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
-              "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
-              status);
+               printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n",
+                      -err);
+       printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n",
+              __func__, pid, n, errno, status);
        fatal_sigsegv();
 }
 
@@ -195,15 +194,15 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
                err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
                             __NR_getpid);
                if (err < 0) {
-                       printk(UM_KERN_ERR "handle_trap - nullifying syscall "
-                              "failed, errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
                err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
                if (err < 0) {
-                       printk(UM_KERN_ERR "handle_trap - continuing to end of "
-                              "syscall failed, errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
@@ -212,11 +211,10 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
                    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
                        err = ptrace_dump_regs(pid);
                        if (err)
-                               printk(UM_KERN_ERR "Failed to get registers "
-                                      "from process, errno = %d\n", -err);
-                       printk(UM_KERN_ERR "handle_trap - failed to wait at "
-                              "end of syscall, errno = %d, status = %d\n",
-                              errno, status);
+                               printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n",
+                                      -err);
+                       printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n",
+                              __func__, errno, status);
                        fatal_sigsegv();
                }
        }
@@ -256,8 +254,8 @@ static int userspace_tramp(void *stack)
        addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
                      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
        if (addr == MAP_FAILED) {
-               printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
-                      "errno = %d\n", STUB_CODE, errno);
+               printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n",
+                      STUB_CODE, errno);
                exit(1);
        }
 
@@ -267,8 +265,7 @@ static int userspace_tramp(void *stack)
                            UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
                            MAP_FIXED | MAP_SHARED, fd, offset);
                if (addr == MAP_FAILED) {
-                       printk(UM_KERN_ERR "mapping segfault stack "
-                              "at 0x%lx failed, errno = %d\n",
+                       printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
                               STUB_DATA, errno);
                        exit(1);
                }
@@ -286,8 +283,8 @@ static int userspace_tramp(void *stack)
                sa.sa_sigaction = (void *) v;
                sa.sa_restorer = NULL;
                if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-                       printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
-                              "handler failed - errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
+                              __func__, errno);
                        exit(1);
                }
        }
@@ -322,8 +319,8 @@ int start_userspace(unsigned long stub_stack)
                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        if (stack == MAP_FAILED) {
                err = -errno;
-               printk(UM_KERN_ERR "start_userspace : mmap failed, "
-                      "errno = %d\n", errno);
+               printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n",
+                      __func__, errno);
                return err;
        }
 
@@ -336,8 +333,8 @@ int start_userspace(unsigned long stub_stack)
        pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
        if (pid < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "start_userspace : clone failed, "
-                      "errno = %d\n", errno);
+               printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
+                      __func__, errno);
                return err;
        }
 
@@ -345,31 +342,31 @@ int start_userspace(unsigned long stub_stack)
                CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
                if (n < 0) {
                        err = -errno;
-                       printk(UM_KERN_ERR "start_userspace : wait failed, "
-                              "errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+                              __func__, errno);
                        goto out_kill;
                }
        } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
 
        if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
                err = -EINVAL;
-               printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got "
-                      "status = %d\n", status);
+               printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+                      __func__, status);
                goto out_kill;
        }
 
        if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
                   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS "
-                      "failed, errno = %d\n", errno);
+               printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+                      __func__, errno);
                goto out_kill;
        }
 
        if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "start_userspace : munmap failed, "
-                      "errno = %d\n", errno);
+               printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n",
+                      __func__, errno);
                goto out_kill;
        }
 
@@ -403,14 +400,14 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
                 * just kill the process.
                 */
                if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
-                       printk(UM_KERN_ERR "userspace - ptrace set regs "
-                              "failed, errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
                if (put_fp_registers(pid, regs->fp)) {
-                       printk(UM_KERN_ERR "userspace - ptrace set fp regs "
-                              "failed, errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
@@ -421,28 +418,28 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
                                             singlestepping(NULL));
 
                if (ptrace(op, pid, 0, 0)) {
-                       printk(UM_KERN_ERR "userspace - ptrace continue "
-                              "failed, op = %d, errno = %d\n", op, errno);
+                       printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+                              __func__, op, errno);
                        fatal_sigsegv();
                }
 
                CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
                if (err < 0) {
-                       printk(UM_KERN_ERR "userspace - wait failed, "
-                              "errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
                regs->is_user = 1;
                if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-                       printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, "
-                              "errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
                if (get_fp_registers(pid, regs->fp)) {
-                       printk(UM_KERN_ERR "userspace -  get_fp_registers failed, "
-                              "errno = %d\n", errno);
+                       printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
+                              __func__, errno);
                        fatal_sigsegv();
                }
 
@@ -494,8 +491,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
                                unblock_signals_trace();
                                break;
                        default:
-                               printk(UM_KERN_ERR "userspace - child stopped "
-                                      "with signal %d\n", sig);
+                               printk(UM_KERN_ERR "%s - child stopped with signal %d\n",
+                                      __func__, sig);
                                fatal_sigsegv();
                        }
                        pid = userspace_pid[0];
@@ -555,15 +552,15 @@ int copy_context_skas0(unsigned long new_stack, int pid)
        err = ptrace_setregs(pid, thread_regs);
        if (err < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS "
-                      "failed, pid = %d, errno = %d\n", pid, -err);
+               printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
+                     __func__, pid, -err);
                return err;
        }
 
        err = put_fp_registers(pid, thread_fp_regs);
        if (err < 0) {
-               printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers "
-                      "failed, pid = %d, err = %d\n", pid, err);
+               printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
+                      __func__, pid, err);
                return err;
        }
 
@@ -574,8 +571,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
        err = ptrace(PTRACE_CONT, pid, 0, 0);
        if (err) {
                err = -errno;
-               printk(UM_KERN_ERR "Failed to continue new process, pid = %d, "
-                      "errno = %d\n", pid, errno);
+               printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
+                      pid, errno);
                return err;
        }
 
@@ -583,8 +580,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 
        pid = data->parent_err;
        if (pid < 0) {
-               printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
-                      "error %d\n", -pid);
+               printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
+                     __func__, -pid);
                return pid;
        }
 
@@ -594,8 +591,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
         */
        wait_stub_done(pid);
        if (child_data->child_err != STUB_DATA) {
-               printk(UM_KERN_ERR "copy_context_skas0 - stub-child %d reports "
-                      "error %ld\n", pid, data->child_err);
+               printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
+                      __func__, pid, data->child_err);
                err = data->child_err;
                goto out_kill;
        }
@@ -603,8 +600,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
        if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
                   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS "
-                      "failed, errno = %d\n", errno);
+               printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+                      __func__, errno);
                goto out_kill;
        }
 
@@ -672,8 +669,8 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
                kmalloc_ok = 0;
                return 1;
        default:
-               printk(UM_KERN_ERR "Bad sigsetjmp return in "
-                      "start_idle_thread - %d\n", n);
+               printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n",
+                      __func__, n);
                fatal_sigsegv();
        }
        longjmp(*switch_buf, 1);
index b89e2e0024c574fbec4b17c5a226a7d5d4e4879e..b70559b821df80a1737d913c04a434d1b6511f79 100644 (file)
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 core-y += arch/x86/crypto/
 
+#
+# Disable SSE and other FP/SIMD instructions to match normal x86
+#
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
+KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+
 ifeq ($(CONFIG_X86_32),y)
 START := 0x8048000
 
index 7ecd2aeeeffc6a43718dd211a36c345d4c0b041e..eccc3431e515a674b584f2984679aa8140bcc631 100644 (file)
@@ -385,7 +385,14 @@ SYM_CODE_END(xen_error_entry)
  */
 .macro idtentry vector asmsym cfunc has_error_code:req
 SYM_CODE_START(\asmsym)
-       UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+
+       .if \vector == X86_TRAP_BP
+               /* #BP advances %rip to the next instruction */
+               UNWIND_HINT_IRET_REGS offset=\has_error_code*8 signal=0
+       .else
+               UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+       .endif
+
        ENDBR
        ASM_CLAC
        cld
index e04313e89f4f5925ef3962f5433c05a83a37a425..3ef70e54a858a5baf88b1711abefa833a53f882b 100644 (file)
 .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
        ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
                __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
-               __stringify(__FILL_ONE_RETURN), \ftr2
+               __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
 
 .Lskip_rsb_\@:
 .endm
index 5a2baf28a1dcdaf546cd213e4d0eef389a43ad50..1343a62106de9ec3cca2f86f62b1bfe3d4cbb99b 100644 (file)
@@ -57,12 +57,14 @@ struct orc_entry {
        unsigned        sp_reg:4;
        unsigned        bp_reg:4;
        unsigned        type:2;
+       unsigned        signal:1;
        unsigned        end:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
        unsigned        bp_reg:4;
        unsigned        sp_reg:4;
-       unsigned        unused:5;
+       unsigned        unused:4;
        unsigned        end:1;
+       unsigned        signal:1;
        unsigned        type:2;
 #endif
 } __packed;
index 52788f79786fad74f2c373e5ce7895e79af26942..255a78d9d90672afb053875184d89b05bab52a0b 100644 (file)
@@ -49,7 +49,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  *   simple as possible.
  * Must be called with preemption disabled.
  */
-static void __resctrl_sched_in(void)
+static inline void __resctrl_sched_in(struct task_struct *tsk)
 {
        struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
        u32 closid = state->default_closid;
@@ -61,13 +61,13 @@ static void __resctrl_sched_in(void)
         * Else use the closid/rmid assigned to this cpu.
         */
        if (static_branch_likely(&rdt_alloc_enable_key)) {
-               tmp = READ_ONCE(current->closid);
+               tmp = READ_ONCE(tsk->closid);
                if (tmp)
                        closid = tmp;
        }
 
        if (static_branch_likely(&rdt_mon_enable_key)) {
-               tmp = READ_ONCE(current->rmid);
+               tmp = READ_ONCE(tsk->rmid);
                if (tmp)
                        rmid = tmp;
        }
@@ -88,17 +88,17 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
        return val * scale;
 }
 
-static inline void resctrl_sched_in(void)
+static inline void resctrl_sched_in(struct task_struct *tsk)
 {
        if (static_branch_likely(&rdt_enable_key))
-               __resctrl_sched_in();
+               __resctrl_sched_in(tsk);
 }
 
 void resctrl_cpu_detect(struct cpuinfo_x86 *c);
 
 #else
 
-static inline void resctrl_sched_in(void) {}
+static inline void resctrl_sched_in(struct task_struct *tsk) {}
 static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
 
 #endif /* CONFIG_X86_CPU_RESCTRL */
index 888731ccf1f67c0dafea4b7f63424b29e299508a..c1e14cee0722d12fa01d1b4b77865125bedfc387 100644 (file)
@@ -85,25 +85,6 @@ char *strcpy(char *dest, const char *src);
 char *strcat(char *dest, const char *src);
 int strcmp(const char *cs, const char *ct);
 
-#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
-/*
- * For files that not instrumented (e.g. mm/slub.c) we
- * should use not instrumented version of mem* functions.
- */
-
-#undef memcpy
-#define memcpy(dst, src, len) __memcpy(dst, src, len)
-#undef memmove
-#define memmove(dst, src, len) __memmove(dst, src, len)
-#undef memset
-#define memset(s, c, n) __memset(s, c, n)
-
-#ifndef __NO_FORTIFY
-#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
-#endif
-
-#endif
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
 void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
index f66fbe6537dd7abac82b6bca5e6258f88a81d4b7..e7c71750b3093d591f79508758db3017ebc872b3 100644 (file)
@@ -15,7 +15,7 @@
        UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
 .endm
 
-.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1
        .if \base == %rsp
                .if \indirect
                        .set sp_reg, ORC_REG_SP_INDIRECT
                .set type, UNWIND_HINT_TYPE_REGS
        .endif
 
-       UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+       UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal
 .endm
 
-.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
-       UNWIND_HINT_REGS base=\base offset=\offset partial=1
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1
+       UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal
 .endm
 
 .macro UNWIND_HINT_FUNC
@@ -67,7 +67,7 @@
 #else
 
 #define UNWIND_HINT_FUNC \
-       UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0)
+       UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0, 0)
 
 #endif /* __ASSEMBLY__ */
 
index cf81848b72f4544ff963936fc0acc72f6c3460a9..f9d060e71c3eecfc77c7acda99f8e4a4e4b2d27f 100644 (file)
@@ -1133,14 +1133,18 @@ spectre_v2_parse_user_cmdline(void)
        return SPECTRE_V2_USER_CMD_AUTO;
 }
 
-static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
 {
-       return mode == SPECTRE_V2_IBRS ||
-              mode == SPECTRE_V2_EIBRS ||
+       return mode == SPECTRE_V2_EIBRS ||
               mode == SPECTRE_V2_EIBRS_RETPOLINE ||
               mode == SPECTRE_V2_EIBRS_LFENCE;
 }
 
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
+{
+       return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
+}
+
 static void __init
 spectre_v2_user_select_mitigation(void)
 {
@@ -1203,12 +1207,19 @@ spectre_v2_user_select_mitigation(void)
        }
 
        /*
-        * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
-        * STIBP is not required.
+        * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+        * is not required.
+        *
+        * Enhanced IBRS also protects against cross-thread branch target
+        * injection in user-mode as the IBRS bit remains always set which
+        * implicitly enables cross-thread protections.  However, in legacy IBRS
+        * mode, the IBRS bit is set only on kernel entry and cleared on return
+        * to userspace. This disables the implicit cross-thread protection,
+        * so allow for STIBP to be selected in that case.
         */
        if (!boot_cpu_has(X86_FEATURE_STIBP) ||
            !smt_possible ||
-           spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+           spectre_v2_in_eibrs_mode(spectre_v2_enabled))
                return;
 
        /*
@@ -2340,7 +2351,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
 
 static char *stibp_state(void)
 {
-       if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
+       if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
                return "";
 
        switch (spectre_v2_user_stibp) {
index e2c1599d1b373d8ff9fb4d0af5e1e6d4787bfded..884b6e9a7e31c36281afe21b2f507f9b223781b2 100644 (file)
@@ -314,7 +314,7 @@ static void update_cpu_closid_rmid(void *info)
         * executing task might have its own closid selected. Just reuse
         * the context switch code.
         */
-       resctrl_sched_in();
+       resctrl_sched_in(current);
 }
 
 /*
@@ -530,7 +530,7 @@ static void _update_task_closid_rmid(void *task)
         * Otherwise, the MSR is updated when the task is scheduled in.
         */
        if (task == current)
-               resctrl_sched_in();
+               resctrl_sched_in(task);
 }
 
 static void update_task_closid_rmid(struct task_struct *t)
index 470c128759eab3c82b05539f13e204354452cdc1..708c87b88cc150ee64145de90de938e0482afa3f 100644 (file)
@@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        switch_fpu_finish();
 
        /* Load the Intel cache allocation PQR MSR. */
-       resctrl_sched_in();
+       resctrl_sched_in(next_p);
 
        return prev_p;
 }
index 4e34b3b68ebdc96a76c9bf265415ecac3a5314a8..bb65a68b4b49968c6d33ea9324b37c3a242e1458 100644 (file)
@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        }
 
        /* Load the Intel cache allocation PQR MSR. */
-       resctrl_sched_in();
+       resctrl_sched_in(next_p);
 
        return prev_p;
 }
index cdf6c6060170008d2e2bf18d6ba7d2703776dc29..37307b40f8daffdd78861ba9eb865b31689fd32a 100644 (file)
@@ -484,6 +484,8 @@ bool unwind_next_frame(struct unwind_state *state)
                goto the_end;
        }
 
+       state->signal = orc->signal;
+
        /* Find the previous frame's stack: */
        switch (orc->sp_reg) {
        case ORC_REG_SP:
@@ -563,7 +565,6 @@ bool unwind_next_frame(struct unwind_state *state)
                state->sp = sp;
                state->regs = NULL;
                state->prev_regs = NULL;
-               state->signal = false;
                break;
 
        case UNWIND_HINT_TYPE_REGS:
@@ -587,7 +588,6 @@ bool unwind_next_frame(struct unwind_state *state)
                state->regs = (struct pt_regs *)sp;
                state->prev_regs = NULL;
                state->full_regs = true;
-               state->signal = true;
                break;
 
        case UNWIND_HINT_TYPE_REGS_PARTIAL:
@@ -604,7 +604,6 @@ bool unwind_next_frame(struct unwind_state *state)
                        state->prev_regs = state->regs;
                state->regs = (void *)sp - IRET_FRAME_OFFSET;
                state->full_regs = false;
-               state->signal = true;
                break;
 
        default:
index 6fbe97c52c991e2f71cd917d1cbaa6b4bf9ffcf4..6825e146a62ff76535133fb62d75e81a2f37a9fa 100644 (file)
@@ -61,7 +61,7 @@ CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
 #
 quiet_cmd_vdso = VDSO    $@
       cmd_vdso = $(CC) -nostdlib -o $@ \
-                      $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+                      $(CC_FLAGS_LTO) $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
                       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
                 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
index 2112b8d146688834897dde9595af02ac89d2c327..ff0f3b4b6c45ed567173766b5d370e2c6fc392a0 100644 (file)
@@ -17,8 +17,10 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
 {
        long ret;
 
-       asm("syscall" : "=a" (ret) :
-               "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+       asm("syscall"
+               : "=a" (ret)
+               : "0" (__NR_clock_gettime), "D" (clock), "S" (ts)
+               : "rcx", "r11", "memory");
 
        return ret;
 }
@@ -29,8 +31,10 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
        long ret;
 
-       asm("syscall" : "=a" (ret) :
-               "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+       asm("syscall"
+               : "=a" (ret)
+               : "0" (__NR_gettimeofday), "D" (tv), "S" (tz)
+               : "rcx", "r11", "memory");
 
        return ret;
 }
index 2e421c0dad13a0fd972d53156f4ef742fe759f9e..fd11614bba4dc5f2c845c09695f167e800a2ae51 100644 (file)
@@ -772,6 +772,7 @@ static inline void bio_put_percpu_cache(struct bio *bio)
 
        if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
                bio->bi_next = cache->free_list;
+               bio->bi_bdev = NULL;
                cache->free_list = bio;
                cache->nr++;
        } else {
index 82b5b2c53f1ee8e91150a03d3b4fa0453cbfad06..9e5e0277a4d95a5e2f09b6cafaf4d8cd97ee5983 100644 (file)
@@ -858,10 +858,16 @@ EXPORT_SYMBOL(submit_bio);
  */
 int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 {
-       struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
+       struct block_device *bdev;
+       struct request_queue *q;
        int ret = 0;
 
+       bdev = READ_ONCE(bio->bi_bdev);
+       if (!bdev)
+               return 0;
+
+       q = bdev_get_queue(bdev);
        if (cookie == BLK_QC_T_NONE ||
            !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
                return 0;
@@ -930,7 +936,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
         */
        rcu_read_lock();
        bio = READ_ONCE(kiocb->private);
-       if (bio && bio->bi_bdev)
+       if (bio)
                ret = bio_poll(bio, iob, flags);
        rcu_read_unlock();
 
index ff534e9d92dca26bd61ea521ed60ede455a132f0..4442c7a8511256988b95d17837ac4cdde131d695 100644 (file)
@@ -800,7 +800,11 @@ static void ioc_refresh_period_us(struct ioc *ioc)
        ioc_refresh_margins(ioc);
 }
 
-static int ioc_autop_idx(struct ioc *ioc)
+/*
+ *  ioc->rqos.disk isn't initialized when this function is called from
+ *  the init path.
+ */
+static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
 {
        int idx = ioc->autop_idx;
        const struct ioc_params *p = &autop[idx];
@@ -808,11 +812,11 @@ static int ioc_autop_idx(struct ioc *ioc)
        u64 now_ns;
 
        /* rotational? */
-       if (!blk_queue_nonrot(ioc->rqos.disk->queue))
+       if (!blk_queue_nonrot(disk->queue))
                return AUTOP_HDD;
 
        /* handle SATA SSDs w/ broken NCQ */
-       if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
+       if (blk_queue_depth(disk->queue) == 1)
                return AUTOP_SSD_QD1;
 
        /* use one of the normal ssd sets */
@@ -901,14 +905,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc)
                    &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
 }
 
-static bool ioc_refresh_params(struct ioc *ioc, bool force)
+/*
+ * struct gendisk is required as an argument because ioc->rqos.disk
+ * is not properly initialized when called from the init path.
+ */
+static bool ioc_refresh_params_disk(struct ioc *ioc, bool force,
+                                   struct gendisk *disk)
 {
        const struct ioc_params *p;
        int idx;
 
        lockdep_assert_held(&ioc->lock);
 
-       idx = ioc_autop_idx(ioc);
+       idx = ioc_autop_idx(ioc, disk);
        p = &autop[idx];
 
        if (idx == ioc->autop_idx && !force)
@@ -939,6 +948,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
        return true;
 }
 
+static bool ioc_refresh_params(struct ioc *ioc, bool force)
+{
+       return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk);
+}
+
 /*
  * When an iocg accumulates too much vtime or gets deactivated, we throw away
  * some vtime, which lowers the overall device utilization. As the exact amount
@@ -2880,7 +2894,7 @@ static int blk_iocost_init(struct gendisk *disk)
 
        spin_lock_irq(&ioc->lock);
        ioc->autop_idx = AUTOP_INVALID;
-       ioc_refresh_params(ioc, true);
+       ioc_refresh_params_disk(ioc, true, disk);
        spin_unlock_irq(&ioc->lock);
 
        /*
index 1ac782fdc55c58c7356753ba4777f9cf6c076519..6460abdb24267cc054350b44781a4d386f8e1edf 100644 (file)
@@ -587,13 +587,6 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
 }
 EXPORT_SYMBOL(__blk_rq_map_sg);
 
-static inline unsigned int blk_rq_get_max_segments(struct request *rq)
-{
-       if (req_op(rq) == REQ_OP_DISCARD)
-               return queue_max_discard_segments(rq->q);
-       return queue_max_segments(rq->q);
-}
-
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
                                                  sector_t offset)
 {
index d3494a796ba8049da719215cc4351e24da6052d0..d0cb2ef18fe21dfa07fe4eec5c76bde6d6286f05 100644 (file)
@@ -3000,6 +3000,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
        unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+       unsigned int max_segments = blk_rq_get_max_segments(rq);
        blk_status_t ret;
 
        if (blk_rq_sectors(rq) > max_sectors) {
@@ -3026,9 +3027,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
         * original queue.
         */
        rq->nr_phys_segments = blk_recalc_rq_segments(rq);
-       if (rq->nr_phys_segments > queue_max_segments(q)) {
-               printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
-                       __func__, rq->nr_phys_segments, queue_max_segments(q));
+       if (rq->nr_phys_segments > max_segments) {
+               printk(KERN_ERR "%s: over max segments limit. (%u > %u)\n",
+                       __func__, rq->nr_phys_segments, max_segments);
                return BLK_STS_IOERR;
        }
 
index 614b575be899ba09b03e0d36c54d79b7d5aca1ed..fce9082384d65dfcdcdf49d2d4fe40ec49dc217e 100644 (file)
@@ -334,17 +334,12 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 {
        void __user *argp = (void __user *)arg;
        struct zone_report_args args;
-       struct request_queue *q;
        struct blk_zone_report rep;
        int ret;
 
        if (!argp)
                return -EINVAL;
 
-       q = bdev_get_queue(bdev);
-       if (!q)
-               return -ENXIO;
-
        if (!bdev_is_zoned(bdev))
                return -ENOTTY;
 
@@ -391,7 +386,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
                           unsigned int cmd, unsigned long arg)
 {
        void __user *argp = (void __user *)arg;
-       struct request_queue *q;
        struct blk_zone_range zrange;
        enum req_op op;
        int ret;
@@ -399,10 +393,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
        if (!argp)
                return -EINVAL;
 
-       q = bdev_get_queue(bdev);
-       if (!q)
-               return -ENXIO;
-
        if (!bdev_is_zoned(bdev))
                return -ENOTTY;
 
index 4c3b3325219a5b3424201d5e8897bc77bb5c3279..cc4e8873dfdea13b2fb9722b6718138768f75d03 100644 (file)
@@ -156,6 +156,13 @@ static inline bool blk_discard_mergable(struct request *req)
        return false;
 }
 
+static inline unsigned int blk_rq_get_max_segments(struct request *rq)
+{
+       if (req_op(rq) == REQ_OP_DISCARD)
+               return queue_max_discard_segments(rq->q);
+       return queue_max_segments(rq->q);
+}
+
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
                                                     enum req_op op)
 {
@@ -427,7 +434,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 
 struct request_queue *blk_alloc_queue(int node_id);
 
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner);
+int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
 
 int disk_alloc_events(struct gendisk *disk);
 void disk_add_events(struct gendisk *disk);
index d09d775c222a94c36c89fcba4426c0989b2183e7..3ee5577e15860ceb1221c609160cc6b5f56158f1 100644 (file)
@@ -356,9 +356,10 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
 }
 EXPORT_SYMBOL_GPL(disk_uevent);
 
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner)
+int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 {
        struct block_device *bdev;
+       int ret = 0;
 
        if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
                return -EINVAL;
@@ -366,16 +367,29 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner)
                return -EINVAL;
        if (disk->open_partitions)
                return -EBUSY;
-       /* Someone else has bdev exclusively open? */
-       if (disk->part0->bd_holder && disk->part0->bd_holder != owner)
-               return -EBUSY;
 
        set_bit(GD_NEED_PART_SCAN, &disk->state);
-       bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
+       /*
+        * If the device is opened exclusively by current thread already, it's
+        * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
+        * synchronize with other exclusive openers and other partition
+        * scanners.
+        */
+       if (!(mode & FMODE_EXCL)) {
+               ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+               if (ret)
+                       return ret;
+       }
+
+       bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
        if (IS_ERR(bdev))
-               return PTR_ERR(bdev);
-       blkdev_put(bdev, mode);
-       return 0;
+               ret =  PTR_ERR(bdev);
+       else
+               blkdev_put(bdev, mode);
+
+       if (!(mode & FMODE_EXCL))
+               bd_abort_claiming(disk->part0, disk_scan_partitions);
+       return ret;
 }
 
 /**
@@ -497,9 +511,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
                if (ret)
                        goto out_unregister_bdi;
 
+               /* Make sure the first partition scan will be proceed */
+               if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
+                   !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
+                       set_bit(GD_NEED_PART_SCAN, &disk->state);
+
                bdev_add(disk->part0, ddev->devt);
                if (get_capacity(disk))
-                       disk_scan_partitions(disk, FMODE_READ, NULL);
+                       disk_scan_partitions(disk, FMODE_READ);
 
                /*
                 * Announce the disk and partitions after all partitions are
index 96617512982e57d98a6372400df94114fe4c2447..9c5f637ff153f8b0010a6812ab8565fdacfbc5fe 100644 (file)
@@ -467,10 +467,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
  * user space. Note the separate arg/argp parameters that are needed
  * to deal with the compat_ptr() conversion.
  */
-static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd,
-                              unsigned long arg, void __user *argp)
+static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
+                              unsigned int cmd, unsigned long arg,
+                              void __user *argp)
 {
-       struct block_device *bdev = I_BDEV(file->f_mapping->host);
        unsigned int max_sectors;
 
        switch (cmd) {
@@ -528,8 +528,7 @@ static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd,
                        return -EACCES;
                if (bdev_is_partition(bdev))
                        return -EINVAL;
-               return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL,
-                                           file);
+               return disk_scan_partitions(bdev->bd_disk, mode);
        case BLKTRACESTART:
        case BLKTRACESTOP:
        case BLKTRACETEARDOWN:
@@ -607,7 +606,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                break;
        }
 
-       ret = blkdev_common_ioctl(file, mode, cmd, arg, argp);
+       ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
        if (ret != -ENOIOCTLCMD)
                return ret;
 
@@ -676,7 +675,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                break;
        }
 
-       ret = blkdev_common_ioctl(file, mode, cmd, arg, argp);
+       ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
        if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl)
                ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
 
index 463873f61e01ea688a56b5f99efd5b30bc953126..c320093c14f1249e81937453adf1323bd3e3674b 100644 (file)
@@ -487,6 +487,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
                        break;
                case FC_SINGLEUSER:
                        single_user = check_sum(body->features);
+                       if (single_user)
+                               dev->flags |= OPAL_FL_SUM_SUPPORTED;
                        break;
                case FC_GEOMETRY:
                        check_geometry(dev, body);
index a222bda7e15b0dcc46a787fd094cd25ffba900f6..7c9125df5a651cfaca6da86adc9d6932cd97fdf4 100644 (file)
@@ -439,6 +439,13 @@ static const struct dmi_system_id asus_laptop[] = {
                        DMI_MATCH(DMI_BOARD_NAME, "B2402CBA"),
                },
        },
+       {
+               .ident = "Asus ExpertBook B2402FBA",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_BOARD_NAME, "B2402FBA"),
+               },
+       },
        {
                .ident = "Asus ExpertBook B2502",
                .matches = {
index c7afce465a0710d0f57bfc37506857b6a8345830..e499c60c45791f8d8e02705502e2c1e8981ebd36 100644 (file)
@@ -384,29 +384,6 @@ static const struct acpi_device_id amd_hid_ids[] = {
        {}
 };
 
-static int lps0_prefer_amd(const struct dmi_system_id *id)
-{
-       pr_debug("Using AMD GUID w/ _REV 2.\n");
-       rev_id = 2;
-       return 0;
-}
-static const struct dmi_system_id s2idle_dmi_table[] __initconst = {
-       {
-               /*
-                * AMD Rembrandt based HP EliteBook 835/845/865 G9
-                * Contains specialized AML in AMD/_REV 2 path to avoid
-                * triggering a bug in Qualcomm WLAN firmware. This may be
-                * removed in the future if that firmware is fixed.
-                */
-               .callback = lps0_prefer_amd,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
-                       DMI_MATCH(DMI_BOARD_NAME, "8990"),
-               },
-       },
-       {}
-};
-
 static int lps0_device_attach(struct acpi_device *adev,
                              const struct acpi_device_id *not_used)
 {
@@ -586,7 +563,6 @@ static const struct platform_s2idle_ops acpi_s2idle_ops_lps0 = {
 
 void __init acpi_s2idle_setup(void)
 {
-       dmi_check_system(s2idle_dmi_table);
        acpi_scan_add_handler(&lps0_handler);
        s2idle_set_ops(&acpi_s2idle_ops_lps0);
 }
index 4e816bb402f68cce61dd4272a3b57310515e1155..e45285d4e62a423532414f2c052a6af963c39230 100644 (file)
@@ -200,39 +200,28 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s
  * a hardcoded allowlist for D3 support, which was used for these platforms.
  *
  * This allows quirking on Linux in a similar fashion.
+ *
+ * Cezanne systems shouldn't *normally* need this as the BIOS includes
+ * StorageD3Enable.  But for two reasons we have added it.
+ * 1) The BIOS on a number of Dell systems have ambiguity
+ *    between the same value used for _ADR on ACPI nodes GPP1.DEV0 and GPP1.NVME.
+ *    GPP1.NVME is needed to get StorageD3Enable node set properly.
+ *    https://bugzilla.kernel.org/show_bug.cgi?id=216440
+ *    https://bugzilla.kernel.org/show_bug.cgi?id=216773
+ *    https://bugzilla.kernel.org/show_bug.cgi?id=217003
+ * 2) On at least one HP system StorageD3Enable is missing on the second NVME
+      disk in the system.
  */
 static const struct x86_cpu_id storage_d3_cpu_ids[] = {
        X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL),  /* Renoir */
        X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */
-       {}
-};
-
-static const struct dmi_system_id force_storage_d3_dmi[] = {
-       {
-               /*
-                * _ADR is ambiguous between GPP1.DEV0 and GPP1.NVME
-                * but .NVME is needed to get StorageD3Enable node
-                * https://bugzilla.kernel.org/show_bug.cgi?id=216440
-                */
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14 7425 2-in-1"),
-               }
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 16 5625"),
-               }
-       },
+       X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL),  /* Cezanne */
        {}
 };
 
 bool force_storage_d3(void)
 {
-       const struct dmi_system_id *dmi_id = dmi_first_match(force_storage_d3_dmi);
-
-       return dmi_id || x86_match_cpu(storage_d3_cpu_ids);
+       return x86_match_cpu(storage_d3_cpu_ids);
 }
 
 /*
index 3bb9bb483fe37b7f4fed867c302661f0f2ab49a5..14a1c0d14916f928bbbd34f235af7f2a3b3febd2 100644 (file)
@@ -421,7 +421,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
        { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
        { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
-       { PCI_VDEVICE(INTEL, 0xa0d3), board_ahci_low_power }, /* Tiger Lake UP{3,4} AHCI */
 
        /* JMicron 360/1/3/5/6, match class to avoid IDE function */
        { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
index 8b2a0eb3f32a48a8aef0bbc73fe5d6bf62400c76..d56a5d508ccd7b4b6a4630b296dc0ef48dea4895 100644 (file)
@@ -322,8 +322,10 @@ fail1:
 static int hd44780_remove(struct platform_device *pdev)
 {
        struct charlcd *lcd = platform_get_drvdata(pdev);
+       struct hd44780_common *hdc = lcd->drvdata;
 
        charlcd_unregister(lcd);
+       kfree(hdc->hd44780);
        kfree(lcd->drvdata);
 
        kfree(lcd);
index cfe8615d5106f03014503bf3107b9132a0a30d9a..dd4b82d7510f68fbfaf6ef2d8656fd413a84f1b4 100644 (file)
@@ -62,7 +62,7 @@ static struct subsys_private *bus_to_subsys(const struct bus_type *bus)
        struct subsys_private *sp = NULL;
        struct kobject *kobj;
 
-       if (!bus)
+       if (!bus || !bus_kset)
                return NULL;
 
        spin_lock(&bus_kset->list_lock);
index e54a10b5dbd713d1ea1f29f4634f1af4be284b27..6878dfcbf0d60b1a3c0f9d2113fbfcbcc3b4d02c 100644 (file)
@@ -98,7 +98,7 @@ static int __fwnode_link_add(struct fwnode_handle *con,
 
        list_add(&link->s_hook, &sup->consumers);
        list_add(&link->c_hook, &con->suppliers);
-       pr_debug("%pfwP Linked as a fwnode consumer to %pfwP\n",
+       pr_debug("%pfwf Linked as a fwnode consumer to %pfwf\n",
                 con, sup);
 
        return 0;
@@ -122,7 +122,7 @@ int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup)
  */
 static void __fwnode_link_del(struct fwnode_link *link)
 {
-       pr_debug("%pfwP Dropping the fwnode link to %pfwP\n",
+       pr_debug("%pfwf Dropping the fwnode link to %pfwf\n",
                 link->consumer, link->supplier);
        list_del(&link->s_hook);
        list_del(&link->c_hook);
@@ -1062,7 +1062,7 @@ int device_links_check_suppliers(struct device *dev)
                if (!dev_is_best_effort(dev)) {
                        fwnode_ret = -EPROBE_DEFER;
                        dev_err_probe(dev, -EPROBE_DEFER,
-                                   "wait for supplier %pfwP\n", sup_fw);
+                                   "wait for supplier %pfwf\n", sup_fw);
                } else {
                        fwnode_ret = -EAGAIN;
                }
@@ -2046,9 +2046,9 @@ static int fw_devlink_create_devlink(struct device *con,
                        goto out;
                }
 
-               if (!device_link_add(con, sup_dev, flags)) {
-                       dev_err(con, "Failed to create device link with %s\n",
-                               dev_name(sup_dev));
+               if (con != sup_dev && !device_link_add(con, sup_dev, flags)) {
+                       dev_err(con, "Failed to create device link (0x%x) with %s\n",
+                               flags, dev_name(sup_dev));
                        ret = -EINVAL;
                }
 
index 5883e7634a2b70a63ffa15dff0d2947dbc9fed69..f37ad34c80ec486bda49b57e265299dbb37b7c60 100644 (file)
@@ -324,6 +324,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir
        struct platform_msi_priv_data *data = domain->host_data;
 
        msi_lock_descs(data->dev);
+       msi_domain_depopulate_descs(data->dev, virq, nr_irqs);
        irq_domain_free_irqs_common(domain, virq, nr_irqs);
        msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
        msi_unlock_descs(data->dev);
index 5f04235e4ff75f7cfe977827589a7029420d72a1..839373451c2b7dc8d2db845decfeca198fce0452 100644 (file)
@@ -977,13 +977,13 @@ loop_set_status_from_info(struct loop_device *lo,
                return -EINVAL;
        }
 
+       /* Avoid assigning overflow values */
+       if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
+               return -EOVERFLOW;
+
        lo->lo_offset = info->lo_offset;
        lo->lo_sizelimit = info->lo_sizelimit;
 
-       /* loff_t vars have been assigned __u64 */
-       if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
-               return -EOVERFLOW;
-
        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
        lo->lo_file_name[LO_NAME_SIZE-1] = 0;
        lo->lo_flags = info->lo_flags;
index 1faca7e07a4d52b9330995f23a8f6eaa98c73998..5cb008b9700a01dbe7d067854d3b1b2b3dc7c89b 100644 (file)
@@ -5291,8 +5291,7 @@ static void rbd_dev_release(struct device *dev)
                module_put(THIS_MODULE);
 }
 
-static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
-                                          struct rbd_spec *spec)
+static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
 {
        struct rbd_device *rbd_dev;
 
@@ -5337,9 +5336,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
        rbd_dev->dev.parent = &rbd_root_dev;
        device_initialize(&rbd_dev->dev);
 
-       rbd_dev->rbd_client = rbdc;
-       rbd_dev->spec = spec;
-
        return rbd_dev;
 }
 
@@ -5352,12 +5348,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 {
        struct rbd_device *rbd_dev;
 
-       rbd_dev = __rbd_dev_create(rbdc, spec);
+       rbd_dev = __rbd_dev_create(spec);
        if (!rbd_dev)
                return NULL;
 
-       rbd_dev->opts = opts;
-
        /* get an id and fill in device name */
        rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
                                         minor_to_rbd_dev_id(1 << MINORBITS),
@@ -5374,6 +5368,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
        /* we have a ref from do_rbd_add() */
        __module_get(THIS_MODULE);
 
+       rbd_dev->rbd_client = rbdc;
+       rbd_dev->spec = spec;
+       rbd_dev->opts = opts;
+
        dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
        return rbd_dev;
 
@@ -6735,7 +6733,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
                goto out_err;
        }
 
-       parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
+       parent = __rbd_dev_create(rbd_dev->parent_spec);
        if (!parent) {
                ret = -ENOMEM;
                goto out_err;
@@ -6745,8 +6743,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
         * Images related by parent/child relationships always share
         * rbd_client and spec/parent_spec, so bump their refcounts.
         */
-       __rbd_get_client(rbd_dev->rbd_client);
-       rbd_spec_get(rbd_dev->parent_spec);
+       parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
+       parent->spec = rbd_spec_get(rbd_dev->parent_spec);
 
        __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
 
index b9c759cef00e6ec6d3b444b69ff11a6d5b236755..d1d1c8d606c8d8e9c79f094ffc626e2909df7b14 100644 (file)
@@ -1271,9 +1271,6 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
                        __func__, cmd->cmd_op, ub_cmd->q_id, tag,
                        ub_cmd->result);
 
-       if (!(issue_flags & IO_URING_F_SQE128))
-               goto out;
-
        if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues)
                goto out;
 
index ce3ccd172cc868d9f89ea2c5489a600ea08e2a0d..253f2ddb891308591db05ada2de72aa8703043da 100644 (file)
@@ -1311,7 +1311,7 @@ static void __cold try_to_generate_entropy(void)
                        /* Basic CPU round-robin, which avoids the current CPU. */
                        do {
                                cpu = cpumask_next(cpu, &timer_cpus);
-                               if (cpu == nr_cpumask_bits)
+                               if (cpu >= nr_cpu_ids)
                                        cpu = cpumask_first(&timer_cpus);
                        } while (cpu == smp_processor_id() && num_cpus > 1);
 
index 45c88894fd8e65ab7ebed2e48cd889202ff8a402..73c7643b26972b4f79fa6dc65ee745e48a9aee88 100644 (file)
@@ -1263,7 +1263,7 @@ static int __init amd_pstate_init(void)
         * with amd_pstate=passive or other modes in kernel command line
         */
        if (cppc_state == AMD_PSTATE_DISABLE) {
-               pr_debug("driver load is disabled, boot with specific mode to enable this\n");
+               pr_info("driver load is disabled, boot with specific mode to enable this\n");
                return -ENODEV;
        }
 
@@ -1353,4 +1353,3 @@ early_param("amd_pstate", amd_pstate_param);
 
 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
-MODULE_LICENSE("GPL");
index c11d22fd84c3738584144377d2934b83d27c8f6a..021f423705e1b14a59d76e85170001cd483b1f59 100644 (file)
@@ -189,8 +189,8 @@ static int apple_soc_cpufreq_find_cluster(struct cpufreq_policy *policy,
        *info = match->data;
 
        *reg_base = of_iomap(args.np, 0);
-       if (IS_ERR(*reg_base))
-               return PTR_ERR(*reg_base);
+       if (!*reg_base)
+               return -ENOMEM;
 
        return 0;
 }
index cb4beec27555a0bdef8f1ddd49e9361ab8a79295..48a4613cef1e1e14a56cc2f07f0e40f4fcfba7f7 100644 (file)
@@ -3358,6 +3358,7 @@ static const struct x86_cpu_id intel_epp_balance_perf[] = {
         * AlderLake Mobile CPUs.
         */
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
+       X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32),
        {}
 };
 
@@ -3516,4 +3517,3 @@ early_param("intel_pstate", intel_pstate_setup);
 
 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
-MODULE_LICENSE("GPL");
index 4a9b998a8d268e367402d677e8910fb9584f3970..12b1c8346243d35d9d1cf3451edf5fadc0ae5774 100644 (file)
 #include <crypto/xts.h>
 #include <asm/unaligned.h>
 #include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 /*
  * crypto alg
@@ -1000,6 +1004,13 @@ static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err,
                crypto_finalize_aead_request(jrp->engine, req, ecode);
 }
 
+static inline u8 *skcipher_edesc_iv(struct skcipher_edesc *edesc)
+{
+
+       return PTR_ALIGN((u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
+                        dma_get_cache_alignment());
+}
+
 static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err,
                                void *context)
 {
@@ -1027,8 +1038,7 @@ static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err,
         * This is used e.g. by the CTS mode.
         */
        if (ivsize && !ecode) {
-               memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
-                      ivsize);
+               memcpy(req->iv, skcipher_edesc_iv(edesc), ivsize);
 
                print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
                                     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
@@ -1683,18 +1693,19 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
        /*
         * allocate space for base edesc and hw desc commands, link tables, IV
         */
-       aligned_size = ALIGN(ivsize, __alignof__(*edesc));
-       aligned_size += sizeof(*edesc) + desc_bytes + sec4_sg_bytes;
+       aligned_size = sizeof(*edesc) + desc_bytes + sec4_sg_bytes;
        aligned_size = ALIGN(aligned_size, dma_get_cache_alignment());
-       iv = kzalloc(aligned_size, flags);
-       if (!iv) {
+       aligned_size += ~(ARCH_KMALLOC_MINALIGN - 1) &
+                       (dma_get_cache_alignment() - 1);
+       aligned_size += ALIGN(ivsize, dma_get_cache_alignment());
+       edesc = kzalloc(aligned_size, flags);
+       if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
                caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
                           0, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
-       edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc)));
        edesc->src_nents = src_nents;
        edesc->dst_nents = dst_nents;
        edesc->mapped_src_nents = mapped_src_nents;
@@ -1706,6 +1717,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 
        /* Make sure IV is located in a DMAable area */
        if (ivsize) {
+               iv = skcipher_edesc_iv(edesc);
                memcpy(iv, req->iv, ivsize);
 
                iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL);
index 5e218bf20d5bb3bdb53c2b003788a99e721c8430..743ce50c14f2e076dbe222357b0611c3c2979552 100644 (file)
 #include "caamalg_desc.h"
 #include <crypto/xts.h>
 #include <asm/unaligned.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
+#include <linux/string.h>
 
 /*
  * crypto alg
@@ -1204,6 +1207,12 @@ static int ipsec_gcm_decrypt(struct aead_request *req)
                                           false);
 }
 
+static inline u8 *skcipher_edesc_iv(struct skcipher_edesc *edesc)
+{
+       return PTR_ALIGN((u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes,
+                        dma_get_cache_alignment());
+}
+
 static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
 {
        struct skcipher_edesc *edesc;
@@ -1236,8 +1245,7 @@ static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
         * This is used e.g. by the CTS mode.
         */
        if (!ecode)
-               memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes,
-                      ivsize);
+               memcpy(req->iv, skcipher_edesc_iv(edesc), ivsize);
 
        qi_cache_free(edesc);
        skcipher_request_complete(req, ecode);
@@ -1259,6 +1267,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
        int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
        struct qm_sg_entry *sg_table, *fd_sgt;
        struct caam_drv_ctx *drv_ctx;
+       unsigned int len;
 
        drv_ctx = get_drv_ctx(ctx, encrypt ? ENCRYPT : DECRYPT);
        if (IS_ERR(drv_ctx))
@@ -1319,9 +1328,12 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
                qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents);
 
        qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
-       if (unlikely(ALIGN(ivsize, __alignof__(*edesc)) +
-                    offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes >
-                    CAAM_QI_MEMCACHE_SIZE)) {
+
+       len = offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes;
+       len = ALIGN(len, dma_get_cache_alignment());
+       len += ivsize;
+
+       if (unlikely(len > CAAM_QI_MEMCACHE_SIZE)) {
                dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
                        qm_sg_ents, ivsize);
                caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
@@ -1330,18 +1342,24 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
        }
 
        /* allocate space for base edesc, link tables and IV */
-       iv = qi_cache_alloc(flags);
-       if (unlikely(!iv)) {
+       edesc = qi_cache_alloc(flags);
+       if (unlikely(!edesc)) {
                dev_err(qidev, "could not allocate extended descriptor\n");
                caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
                           0, DMA_NONE, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
-       edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc)));
+       edesc->src_nents = src_nents;
+       edesc->dst_nents = dst_nents;
+       edesc->qm_sg_bytes = qm_sg_bytes;
+       edesc->drv_req.app_ctx = req;
+       edesc->drv_req.cbk = skcipher_done;
+       edesc->drv_req.drv_ctx = drv_ctx;
 
        /* Make sure IV is located in a DMAable area */
        sg_table = &edesc->sgt[0];
+       iv = skcipher_edesc_iv(edesc);
        memcpy(iv, req->iv, ivsize);
 
        iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL);
@@ -1353,13 +1371,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
                return ERR_PTR(-ENOMEM);
        }
 
-       edesc->src_nents = src_nents;
-       edesc->dst_nents = dst_nents;
        edesc->iv_dma = iv_dma;
-       edesc->qm_sg_bytes = qm_sg_bytes;
-       edesc->drv_req.app_ctx = req;
-       edesc->drv_req.cbk = skcipher_done;
-       edesc->drv_req.drv_ctx = drv_ctx;
 
        dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
        sg_to_qm_sg(req->src, req->cryptlen, sg_table + 1, 0);
index 4c52c9365558db85b92e422ddc32241df6804561..2ad2c10358563a865d6cf9cb088399cc590557b8 100644 (file)
@@ -8,7 +8,13 @@
  */
 
 #include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include <soc/fsl/qman.h>
 
 #include "debugfs.h"
@@ -755,8 +761,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
                napi_enable(irqtask);
        }
 
-       qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0,
-                                    0, NULL);
+       qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE,
+                                    dma_get_cache_alignment(), 0, NULL);
        if (!qi_cache) {
                dev_err(qidev, "Can't allocate CAAM cache\n");
                free_rsp_fqs();
index 5341b6b242c3bdd0d6c1cd8d6ba7608fb1055d17..a82d36ea88e252c55225c06f049e88cf48cdafe7 100644 (file)
@@ -6,6 +6,7 @@ config DRM_AMDGPU
        select FW_LOADER
        select DRM_DISPLAY_DP_HELPER
        select DRM_DISPLAY_HDMI_HELPER
+       select DRM_DISPLAY_HDCP_HELPER
        select DRM_DISPLAY_HELPER
        select DRM_KMS_HELPER
        select DRM_SCHED
index 458362e4ea0112a9d4f1a241c6b9155d0f6989a4..d4196fcb85a08a364a4a7b235c30a62dde7fa8f8 100644 (file)
@@ -1073,6 +1073,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
            (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
                return false;
 
+       if (adev->asic_type < CHIP_RAVEN)
+               return false;
+
        /*
         * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
         * risky to do any special firmware-related preparations for entering
index 86fbb41382854008865ef0517c21e14bca8b4ca3..f5ffca24def4000f42043d4c09e5f05fe86f8876 100644 (file)
  * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock
  *            Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock
  *   3.51.0 - Return the PCIe gen and lanes from the INFO ioctl
+ *   3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
+ *            tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
+ *            gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
  */
 #define KMS_DRIVER_MAJOR       3
-#define KMS_DRIVER_MINOR       51
+#define KMS_DRIVER_MINOR       52
 #define KMS_DRIVER_PATCHLEVEL  0
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -921,7 +924,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444);
  * result in the GPU entering bad status when the number of total
  * faulty pages by ECC exceeds the threshold value.
  */
-MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement, -2 = ignore bad page threshold)");
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)");
 module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
 
 MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
@@ -2414,8 +2417,10 @@ static int amdgpu_pmops_suspend(struct device *dev)
 
        if (amdgpu_acpi_is_s0ix_active(adev))
                adev->in_s0ix = true;
-       else
+       else if (amdgpu_acpi_is_s3_active(adev))
                adev->in_s3 = true;
+       if (!adev->in_s0ix && !adev->in_s3)
+               return 0;
        return amdgpu_device_suspend(drm_dev, true);
 }
 
@@ -2436,6 +2441,9 @@ static int amdgpu_pmops_resume(struct device *dev)
        struct amdgpu_device *adev = drm_to_adev(drm_dev);
        int r;
 
+       if (!adev->in_s0ix && !adev->in_s3)
+               return 0;
+
        /* Avoids registers access if device is physically gone */
        if (!pci_device_is_present(adev->pdev))
                adev->no_hw_access = true;
index 86ec9d0d12c8eebf881f2680cdbb93c48207905c..de9e7a00bb1504a19f94a626e82fe784bdda73cd 100644 (file)
@@ -178,6 +178,8 @@ struct amdgpu_gfx_config {
        uint32_t num_sc_per_sh;
        uint32_t num_packer_per_sc;
        uint32_t pa_sc_tile_steering_override;
+       /* Whether texture coordinate truncation is conformant. */
+       bool ta_cntl2_truncate_coord_mode;
        uint64_t tcc_disabled_mask;
        uint32_t gc_num_tcp_per_sa;
        uint32_t gc_num_sdp_interface;
index 94f10ac0eef743007f2b02e164d60bbd686b84a4..12a6826caef47a5fc8a27fbf50a10ee2516024e7 100644 (file)
@@ -552,6 +552,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
        case IP_VERSION(10, 3, 2):
        case IP_VERSION(10, 3, 4):
        case IP_VERSION(10, 3, 5):
+       case IP_VERSION(10, 3, 6):
        /* VANGOGH */
        case IP_VERSION(10, 3, 1):
        /* YELLOW_CARP*/
index ca945055e683654a47385c31746873744faea295..0efb38539d70cc6dff5da3e222cf3b4a2ac3a25f 100644 (file)
@@ -808,6 +808,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                        dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
                if (amdgpu_is_tmz(adev))
                        dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+               if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
+                       dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
 
                vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
                vm_size -= AMDGPU_VA_RESERVED_SIZE;
@@ -865,6 +867,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                        adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
                        adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
 
+               dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
+               dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
+               dev_info->sqc_data_cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+               dev_info->sqc_inst_cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+               dev_info->gl1c_cache_size = adev->gfx.config.gc_gl1c_size_per_instance *
+                                           adev->gfx.config.gc_gl1c_per_sa;
+               dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+               dev_info->mall_size = adev->gmc.mall_size;
+
                ret = copy_to_user(out, dev_info,
                                   min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
                kfree(dev_info);
index 981010de0a2829a7e05fc343e8eed6e57900066f..e3e1ed4314dd65a84a918e4fc343a023abc713f5 100644 (file)
@@ -139,7 +139,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 
                if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
                        places[c].lpfn = visible_pfn;
-               else
+               else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size)
                        places[c].flags |= TTM_PL_FLAG_TOPDOWN;
 
                if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
index 15e601f09648606f85ec597a0171422055e8c09e..28fe6d94105409b9783985183ddae9606b1e7c84 100644 (file)
@@ -1683,7 +1683,7 @@ static int psp_hdcp_initialize(struct psp_context *psp)
        psp->hdcp_context.context.mem_context.shared_mem_size = PSP_HDCP_SHARED_MEM_SIZE;
        psp->hdcp_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
 
-       if (!psp->hdcp_context.context.initialized) {
+       if (!psp->hdcp_context.context.mem_context.shared_buf) {
                ret = psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
                if (ret)
                        return ret;
@@ -1750,7 +1750,7 @@ static int psp_dtm_initialize(struct psp_context *psp)
        psp->dtm_context.context.mem_context.shared_mem_size = PSP_DTM_SHARED_MEM_SIZE;
        psp->dtm_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
 
-       if (!psp->dtm_context.context.initialized) {
+       if (!psp->dtm_context.context.mem_context.shared_buf) {
                ret = psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
                if (ret)
                        return ret;
@@ -1818,7 +1818,7 @@ static int psp_rap_initialize(struct psp_context *psp)
        psp->rap_context.context.mem_context.shared_mem_size = PSP_RAP_SHARED_MEM_SIZE;
        psp->rap_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
 
-       if (!psp->rap_context.context.initialized) {
+       if (!psp->rap_context.context.mem_context.shared_buf) {
                ret = psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
                if (ret)
                        return ret;
index 6e543558386da3e09175541ce87f725813455dc7..63dfcc98152d57d3032a03513f7552275042891a 100644 (file)
@@ -176,7 +176,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
        if (amdgpu_bad_page_threshold != 0) {
                amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
                                         err_data.err_addr_cnt);
-               amdgpu_ras_save_bad_pages(adev);
+               amdgpu_ras_save_bad_pages(adev, NULL);
        }
 
        dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
@@ -2084,22 +2084,32 @@ out:
 /*
  * write error record array to eeprom, the function should be
  * protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
  */
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+               unsigned long *new_cnt)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data *data;
        struct amdgpu_ras_eeprom_control *control;
        int save_count;
 
-       if (!con || !con->eh_data)
+       if (!con || !con->eh_data) {
+               if (new_cnt)
+                       *new_cnt = 0;
+
                return 0;
+       }
 
        mutex_lock(&con->recovery_lock);
        control = &con->eeprom_control;
        data = con->eh_data;
        save_count = data->count - control->ras_num_recs;
        mutex_unlock(&con->recovery_lock);
+
+       if (new_cnt)
+               *new_cnt = save_count / adev->umc.retire_unit;
+
        /* only new entries are saved */
        if (save_count > 0) {
                if (amdgpu_ras_eeprom_append(control,
@@ -2186,11 +2196,12 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
        /*
         * Justification of value bad_page_cnt_threshold in ras structure
         *
-        * Generally, -1 <= amdgpu_bad_page_threshold <= max record length
-        * in eeprom, and introduce two scenarios accordingly.
+        * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
+        * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
+        * scenarios accordingly.
         *
         * Bad page retirement enablement:
-        *    - If amdgpu_bad_page_threshold = -1,
+        *    - If amdgpu_bad_page_threshold = -2,
         *      bad_page_cnt_threshold = typical value by formula.
         *
         *    - When the value from user is 0 < amdgpu_bad_page_threshold <
index f2ad999993f66c72873cd2db9d3612f0dce86359..ef38f4c93df0a664fc7ac014283db4470efa5ad5 100644 (file)
@@ -547,7 +547,8 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
                struct eeprom_table_record *bps, int pages);
 
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+               unsigned long *new_cnt);
 
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
index 2d9f3f4cd79e9275ed1729bb306e512b676154ce..2e08fce875217996f70c4cf0b9a7d581d759b007 100644 (file)
@@ -417,7 +417,8 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!__is_ras_eeprom_supported(adev))
+       if (!__is_ras_eeprom_supported(adev) ||
+           !amdgpu_bad_page_threshold)
                return false;
 
        /* skip check eeprom table for VEGA20 Gaming */
@@ -428,10 +429,18 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
                        return false;
 
        if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
-               dev_warn(adev->dev, "This GPU is in BAD status.");
-               dev_warn(adev->dev, "Please retire it or set a larger "
-                        "threshold value when reloading driver.\n");
-               return true;
+               if (amdgpu_bad_page_threshold == -1) {
+                       dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
+                               con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
+                       dev_warn(adev->dev,
+                               "But GPU can be operated due to bad_page_threshold = -1.\n");
+                       return false;
+               } else {
+                       dev_warn(adev->dev, "This GPU is in BAD status.");
+                       dev_warn(adev->dev, "Please retire it or set a larger "
+                                "threshold value when reloading driver.\n");
+                       return true;
+               }
        }
 
        return false;
@@ -1191,8 +1200,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
                } else {
                        dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
                                control->ras_num_recs, ras->bad_page_cnt_threshold);
-                       if (amdgpu_bad_page_threshold == -2) {
-                               dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -2.");
+                       if (amdgpu_bad_page_threshold == -1) {
+                               dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
                                res = 0;
                        } else {
                                *exceed_err_limit = true;
index 1c7fcb4f238089c888516d071f2349977a5cc736..1b8574bc4463d876852e5aa36a9241431d39860b 100644 (file)
@@ -68,7 +68,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
        if (amdgpu_bad_page_threshold != 0) {
                amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
                                                err_data.err_addr_cnt);
-               amdgpu_ras_save_bad_pages(adev);
+               amdgpu_ras_save_bad_pages(adev, NULL);
        }
 
 out:
@@ -147,7 +147,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
                        err_data->err_addr_cnt) {
                        amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
                                                err_data->err_addr_cnt);
-                       amdgpu_ras_save_bad_pages(adev);
+                       amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
 
                        amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
 
index a6951160f13af64c37c9033cc6513034cb1cf35d..f2bf979af5883596169c12aba6db42ceaaa66f30 100644 (file)
@@ -74,6 +74,8 @@ struct amdgpu_umc {
 
        /* UMC regiser per channel offset */
        uint32_t channel_offs;
+       /* how many pages are retired in one UE */
+       uint32_t retire_unit;
        /* channel index table of interleaved memory */
        const uint32_t *channel_idx_tbl;
        struct ras_common_if *ras_if;
index 9fa1d814508a6cd42e31f3f0fa8b0150835af548..43d6a9d6a5384f886b4460a539bb97450cf1a0e2 100644 (file)
@@ -453,7 +453,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
                /* Limit maximum size to 2GiB due to SG table limitations */
                size = min(remaining_size, 2ULL << 30);
 
-               if (size >= (u64)pages_per_block << PAGE_SHIFT)
+               if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+                               !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
                        min_block_size = (u64)pages_per_block << PAGE_SHIFT;
 
                cur_size = size;
index 8ad8a0bffcacbf3f189a3689713ca7a84fcdd129..3bf697a80cf2fc30a4dcc49e154ee87b5ffe4370 100644 (file)
@@ -1503,44 +1503,70 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
        WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
 }
 
-static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
 {
-       u32 data, mask;
+       u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+       gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+       gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+                                          CC_GC_SA_UNIT_DISABLE,
+                                          SA_DISABLE);
+       gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
+       gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+                                                GC_USER_SA_UNIT_DISABLE,
+                                                SA_DISABLE);
+       sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+                                           adev->gfx.config.max_shader_engines);
 
-       data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
-       data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+       return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
 
-       data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-       data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+       u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+       u32 rb_mask;
 
-       mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
-                                        adev->gfx.config.max_sh_per_se);
+       gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+       gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+                                           CC_RB_BACKEND_DISABLE,
+                                           BACKEND_DISABLE);
+       gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+       gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+                                                GC_USER_RB_BACKEND_DISABLE,
+                                                BACKEND_DISABLE);
+       rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+                                           adev->gfx.config.max_shader_engines);
 
-       return (~data) & mask;
+       return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
 }
 
 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
 {
-       int i, j;
-       u32 data;
-       u32 active_rbs = 0;
-       u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
-                                       adev->gfx.config.max_sh_per_se;
+       u32 rb_bitmap_width_per_sa;
+       u32 max_sa;
+       u32 active_sa_bitmap;
+       u32 global_active_rb_bitmap;
+       u32 active_rb_bitmap = 0;
+       u32 i;
 
-       mutex_lock(&adev->grbm_idx_mutex);
-       for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-               for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
-                       data = gfx_v11_0_get_rb_active_bitmap(adev);
-                       active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
-                                              rb_bitmap_width_per_sh);
-               }
+       /* query sa bitmap from SA_UNIT_DISABLE registers */
+       active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+       /* query rb bitmap from RB_BACKEND_DISABLE registers */
+       global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+       /* generate active rb bitmap according to active sa bitmap */
+       max_sa = adev->gfx.config.max_shader_engines *
+                adev->gfx.config.max_sh_per_se;
+       rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+                                adev->gfx.config.max_sh_per_se;
+       for (i = 0; i < max_sa; i++) {
+               if (active_sa_bitmap & (1 << i))
+                       active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
        }
-       gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-       mutex_unlock(&adev->grbm_idx_mutex);
 
-       adev->gfx.config.backend_enable_mask = active_rbs;
-       adev->gfx.config.num_rbs = hweight32(active_rbs);
+       active_rb_bitmap |= global_active_rb_bitmap;
+       adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+       adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
 }
 
 #define DEFAULT_SH_MEM_BASES   (0x6000)
@@ -1633,6 +1659,11 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
        gfx_v11_0_get_tcc_info(adev);
        adev->gfx.config.pa_sc_tile_steering_override = 0;
 
+       /* Set whether texture coordinate truncation is conformant. */
+       tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
+       adev->gfx.config.ta_cntl2_truncate_coord_mode =
+               REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
+
        /* XXX SH_MEM regs */
        /* where to put LDS, scratch, GPUVM in FSA64 space */
        mutex_lock(&adev->srbm_mutex);
index 7db1f1a7e33c305bf414af1bbb51f5c4e3155d60..ab2556ca984e1551be6f078d9056866e9f7e8a16 100644 (file)
@@ -692,6 +692,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
                adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+               adev->umc.retire_unit = 1;
                adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
                adev->umc.ras = &umc_v8_7_ras;
                break;
index 0a31a341aa43b586c97e4ea9ea39522b6164e696..85e0afc3d4f7f336715629779c44170eb4ab7c8f 100644 (file)
@@ -570,6 +570,7 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.node_inst_num = adev->gmc.num_umc;
                adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
                adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+               adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
                if (adev->umc.node_inst_num == 4)
                        adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0];
                else
index d65c6cea344515453a95c4fe35a21d71482d55af..b06170c00dfca9e174306f363a44e4b30b584e07 100644 (file)
@@ -1288,6 +1288,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+               adev->umc.retire_unit = 1;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
                adev->umc.ras = &umc_v6_1_ras;
                break;
@@ -1296,6 +1297,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+               adev->umc.retire_unit = 1;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
                adev->umc.ras = &umc_v6_1_ras;
                break;
@@ -1305,6 +1307,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
                adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+               adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
                if (!adev->gmc.xgmi.connected_to_cpu)
                        adev->umc.ras = &umc_v6_7_ras;
                if (1 & adev->smuio.funcs->get_die_id(adev))
index 31776b12e4c45e70fd570a19aa72d93685a16ec1..4b0d563c6522c7b050591c49f0cd89595b96d7d0 100644 (file)
@@ -382,6 +382,11 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
                if (def != data)
                        WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
                break;
+       case IP_VERSION(7, 5, 1):
+               data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
+               data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
+               WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
+               fallthrough;
        default:
                def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
                data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
index da394bc06bbaaf95837b048cc2146501940fa478..fb55e8cb9967ad9b2d2d6b6fb166fba9c211c26f 100644 (file)
@@ -209,6 +209,45 @@ static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
        return 0;
 }
 
+static void umc_v8_10_convert_error_address(struct amdgpu_device *adev,
+                                           struct ras_err_data *err_data, uint64_t err_addr,
+                                           uint32_t ch_inst, uint32_t umc_inst,
+                                           uint32_t node_inst, uint64_t mc_umc_status)
+{
+       uint64_t na_err_addr_base;
+       uint64_t na_err_addr, retired_page_addr;
+       uint32_t channel_index, addr_lsb, col = 0;
+       int ret = 0;
+
+       channel_index =
+               adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+                                       adev->umc.channel_inst_num +
+                                       umc_inst * adev->umc.channel_inst_num +
+                                       ch_inst];
+
+       /* the lowest lsb bits should be ignored */
+       addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
+       err_addr &= ~((0x1ULL << addr_lsb) - 1);
+       na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+
+       /* loop for all possibilities of [C6 C5] in normal address. */
+       for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+               na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+               /* Mapping normal error address to retired soc physical address. */
+               ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+                                               na_err_addr, &retired_page_addr);
+               if (ret) {
+                       dev_err(adev->dev, "Failed to map pa from umc na.\n");
+                       break;
+               }
+               dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+                       retired_page_addr);
+               amdgpu_umc_fill_error_record(err_data, na_err_addr,
+                               retired_page_addr, channel_index, umc_inst);
+       }
+}
+
 static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
                                         struct ras_err_data *err_data,
                                         uint32_t umc_reg_offset,
@@ -218,10 +257,7 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
 {
        uint64_t mc_umc_status_addr;
        uint64_t mc_umc_status, err_addr;
-       uint64_t mc_umc_addrt0, na_err_addr_base;
-       uint64_t na_err_addr, retired_page_addr;
-       uint32_t channel_index, addr_lsb, col = 0;
-       int ret = 0;
+       uint64_t mc_umc_addrt0;
 
        mc_umc_status_addr =
                SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -236,12 +272,6 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
                return;
        }
 
-       channel_index =
-               adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
-                                       adev->umc.channel_inst_num +
-                                       umc_inst * adev->umc.channel_inst_num +
-                                       ch_inst];
-
        /* calculate error address if ue error is detected */
        if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
            REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
@@ -251,27 +281,8 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
                err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
                err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
 
-               /* the lowest lsb bits should be ignored */
-               addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
-               err_addr &= ~((0x1ULL << addr_lsb) - 1);
-               na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
-
-               /* loop for all possibilities of [C6 C5] in normal address. */
-               for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
-                       na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
-
-                       /* Mapping normal error address to retired soc physical address. */
-                       ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
-                                                       na_err_addr, &retired_page_addr);
-                       if (ret) {
-                               dev_err(adev->dev, "Failed to map pa from umc na.\n");
-                               break;
-                       }
-                       dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
-                               retired_page_addr);
-                       amdgpu_umc_fill_error_record(err_data, na_err_addr,
-                                       retired_page_addr, channel_index, umc_inst);
-               }
+               umc_v8_10_convert_error_address(adev, err_data, err_addr,
+                                       ch_inst, umc_inst, node_inst, mc_umc_status);
        }
 
        /* clear umc status */
@@ -349,6 +360,133 @@ static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
        return true;
 }
 
+static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+                                     uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+                                     unsigned long *error_count)
+{
+       uint64_t mc_umc_status;
+       uint32_t eccinfo_table_idx;
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+                                 adev->umc.channel_inst_num +
+                                 umc_inst * adev->umc.channel_inst_num +
+                                 ch_inst;
+
+       /* check the MCUMC_STATUS */
+       mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+       if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+               *error_count += 1;
+       }
+}
+
+static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
+                                     uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+                                     unsigned long *error_count)
+{
+       uint64_t mc_umc_status;
+       uint32_t eccinfo_table_idx;
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+                                 adev->umc.channel_inst_num +
+                                 umc_inst * adev->umc.channel_inst_num +
+                                 ch_inst;
+
+       /* check the MCUMC_STATUS */
+       mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+       if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+               *error_count += 1;
+       }
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+                                       void *ras_error_status)
+{
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+       uint32_t node_inst       = 0;
+       uint32_t umc_inst        = 0;
+       uint32_t ch_inst         = 0;
+
+       /* TODO: driver needs to toggle DF Cstate to ensure
+        * safe access of UMC registers. Will add the protection
+        */
+       LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+               umc_v8_10_ecc_info_query_correctable_error_count(adev,
+                                                       node_inst, umc_inst, ch_inst,
+                                                       &(err_data->ce_count));
+               umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
+                                                       node_inst, umc_inst, ch_inst,
+                                                       &(err_data->ue_count));
+       }
+}
+
+static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
+                                       struct ras_err_data *err_data,
+                                       uint32_t ch_inst,
+                                       uint32_t umc_inst,
+                                       uint32_t node_inst)
+{
+       uint32_t eccinfo_table_idx;
+       uint64_t mc_umc_status, err_addr;
+
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+                                 adev->umc.channel_inst_num +
+                                 umc_inst * adev->umc.channel_inst_num +
+                                 ch_inst;
+
+       mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+       if (mc_umc_status == 0)
+               return;
+
+       if (!err_data->err_addr)
+               return;
+
+       /* calculate error address if ue error is detected */
+       if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+           REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+           (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1)) {
+
+               err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+               err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+               umc_v8_10_convert_error_address(adev, err_data, err_addr,
+                                       ch_inst, umc_inst, node_inst, mc_umc_status);
+       }
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+                                       void *ras_error_status)
+{
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+       uint32_t node_inst       = 0;
+       uint32_t umc_inst        = 0;
+       uint32_t ch_inst         = 0;
+
+       /* TODO: driver needs to toggle DF Cstate to ensure
+        * safe access of UMC resgisters. Will add the protection
+        * when firmware interface is ready
+        */
+       LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+               umc_v8_10_ecc_info_query_error_address(adev,
+                                               err_data,
+                                               ch_inst,
+                                               umc_inst,
+                                               node_inst);
+       }
+}
+
 const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
        .query_ras_error_count = umc_v8_10_query_ras_error_count,
        .query_ras_error_address = umc_v8_10_query_ras_error_address,
@@ -360,4 +498,6 @@ struct amdgpu_umc_ras umc_v8_10_ras = {
        },
        .err_cnt_init = umc_v8_10_err_cnt_init,
        .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
+       .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
+       .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
 };
index 22a41766a8c71cc7cbad317318bca108d82c1ba0..43d587404c3e1f33d3472bdec0bf7ae9221cede1 100644 (file)
@@ -78,9 +78,17 @@ static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_early_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int i;
 
-       if (amdgpu_sriov_vf(adev))
+       if (amdgpu_sriov_vf(adev)) {
                adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
+               for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                       if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
+                               adev->vcn.harvest_config |= 1 << i;
+                               dev_info(adev->dev, "VCN%d is disabled by hypervisor\n", i);
+                       }
+               }
+       }
 
        /* re-use enc ring as unified ring */
        adev->vcn.num_enc_rings = 1;
@@ -238,16 +246,11 @@ static int vcn_v4_0_hw_init(void *handle)
                                continue;
 
                        ring = &adev->vcn.inst[i].ring_enc[0];
-                       if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
-                               ring->sched.ready = false;
-                               ring->no_scheduler = true;
-                               dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
-                       } else {
-                               ring->wptr = 0;
-                               ring->wptr_old = 0;
-                               vcn_v4_0_unified_ring_set_wptr(ring);
-                               ring->sched.ready = true;
-                       }
+                       ring->wptr = 0;
+                       ring->wptr_old = 0;
+                       vcn_v4_0_unified_ring_set_wptr(ring);
+                       ring->sched.ready = true;
+
                }
        } else {
                for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
index c06ada0844ba154bfe61782c46abef90b4e8af7d..7a95698d83f73e82519d3a92e65885997162bbc4 100644 (file)
@@ -2373,7 +2373,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
        if (init_mqd_managers(dqm))
                goto out_free;
 
-       if (allocate_hiq_sdma_mqd(dqm)) {
+       if (!dev->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
                pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
                goto out_free;
        }
@@ -2397,7 +2397,8 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
 void device_queue_manager_uninit(struct device_queue_manager *dqm)
 {
        dqm->ops.uninitialize(dqm);
-       deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
+       if (!dqm->dev->shared_resources.enable_mes)
+               deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
        kfree(dqm);
 }
 
index dd0436bf349a464f9529e5082742db9cc69d467b..c894cf8f7c50d468c6ef57a6a93b1d07dbb15975 100644 (file)
@@ -778,16 +778,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
        struct kfd_event_waiter *event_waiters;
        uint32_t i;
 
-       event_waiters = kmalloc_array(num_events,
-                                       sizeof(struct kfd_event_waiter),
-                                       GFP_KERNEL);
+       event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
+                               GFP_KERNEL);
        if (!event_waiters)
                return NULL;
 
-       for (i = 0; (event_waiters) && (i < num_events) ; i++) {
+       for (i = 0; i < num_events; i++)
                init_wait(&event_waiters[i].wait);
-               event_waiters[i].activated = false;
-       }
 
        return event_waiters;
 }
index 4f6390f3236ef17100a856ea8163c4f4a02bb3f2..4a9af800b1f1cea83a1fc2d707f06b1dede61cfc 100644 (file)
@@ -308,11 +308,16 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
                struct queue_properties *q)
 {
        struct v11_sdma_mqd *m;
+       int size;
 
        m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
 
-       memset(m, 0, sizeof(struct v11_sdma_mqd));
+       if (mm->dev->shared_resources.enable_mes)
+               size = PAGE_SIZE;
+       else
+               size = sizeof(struct v11_sdma_mqd);
 
+       memset(m, 0, size);
        *mqd = m;
        if (gart_addr)
                *gart_addr = mqd_mem_obj->gpu_addr;
@@ -443,6 +448,14 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
+               /*
+                * To allocate SDMA MQDs by generic functions
+                * when MES is enabled.
+                */
+               if (dev->shared_resources.enable_mes) {
+                       mqd->allocate_mqd = allocate_mqd;
+                       mqd->free_mqd = kfd_free_mqd_cp;
+               }
                pr_debug("%s@%i\n", __func__, __LINE__);
                break;
        default:
index 2efe93f74f8401c60eb9da4d7e004411045a6921..0c9bd0a53e603f9674b1dd5877a3f8b45e4b49ef 100644 (file)
@@ -28,7 +28,6 @@ config DRM_AMD_DC_DCN
 config DRM_AMD_DC_HDCP
        bool "Enable HDCP support in DC"
        depends on DRM_AMD_DC
-       select DRM_DISPLAY_HDCP_HELPER
        help
          Choose this option if you want to support HDCP authentication.
 
index c420bce47acb591f859a88cb64f47879c479d280..009ef917dad47b3eb1df6c10d82663f7ccac35a6 100644 (file)
@@ -41,6 +41,8 @@
 #include "dpcd_defs.h"
 #include "link/protocols/link_dpcd.h"
 #include "link_service_types.h"
+#include "link/protocols/link_dp_capability.h"
+#include "link/protocols/link_ddc.h"
 
 #include "vid.h"
 #include "amdgpu.h"
@@ -2302,6 +2304,14 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
                if (suspend) {
                        drm_dp_mst_topology_mgr_suspend(mgr);
                } else {
+                       /* if extended timeout is supported in hardware,
+                        * default to LTTPR timeout (3.2ms) first as a W/A for DP link layer
+                        * CTS 4.2.1.1 regression introduced by CTS specs requirement update.
+                        */
+                       try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD);
+                       if (!dp_is_lttpr_present(aconnector->dc_link))
+                               try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
+
                        ret = drm_dp_mst_topology_mgr_resume(mgr, true);
                        if (ret < 0) {
                                dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
@@ -4265,6 +4275,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
        /* Update the actual used number of crtc */
        adev->mode_info.num_crtc = adev->dm.display_indexes_num;
 
+       amdgpu_dm_set_irq_funcs(adev);
+
        link_cnt = dm->dc->caps.max_links;
        if (amdgpu_dm_mode_config_init(dm->adev)) {
                DRM_ERROR("DM: Failed to initialize mode config\n");
@@ -4757,8 +4769,6 @@ static int dm_early_init(void *handle)
                break;
        }
 
-       amdgpu_dm_set_irq_funcs(adev);
-
        if (adev->mode_info.funcs == NULL)
                adev->mode_info.funcs = &dm_display_funcs;
 
@@ -7235,7 +7245,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
                drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
 
        /* This defaults to the max in the range, but we want 8bpc for non-edp. */
-       aconnector->base.state->max_bpc = (connector_type == DRM_MODE_CONNECTOR_eDP) ? 16 : 8;
+       aconnector->base.state->max_bpc = 16;
        aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc;
 
        if (connector_type == DRM_MODE_CONNECTOR_eDP &&
index 6fdc2027c2b47646753fbbdbab99710c021e44b4..1583157da355b2c7c500ee4b9fb51e7298cd4e71 100644 (file)
@@ -1149,6 +1149,8 @@ static bool dm_is_freesync_pcon_whitelist(const uint32_t branch_dev_id)
 
        switch (branch_dev_id) {
        case DP_BRANCH_DEVICE_ID_0060AD:
+       case DP_BRANCH_DEVICE_ID_00E04C:
+       case DP_BRANCH_DEVICE_ID_90CC24:
                ret_val = true;
                break;
        default:
index fb3fd5b7c78b995ee7f0eaf411aa30e58063be41..0d4d3d586166d54c4f6559d999deeeeb3ba9559e 100644 (file)
@@ -779,10 +779,8 @@ void dce110_edp_wait_for_hpd_ready(
 
        dal_gpio_destroy_irq(&hpd);
 
-       if (false == edp_hpd_high) {
-               DC_LOG_WARNING(
-                               "%s: wait timed out!\n", __func__);
-       }
+       /* ensure that the panel is detected */
+       ASSERT(edp_hpd_high);
 }
 
 void dce110_edp_power_control(
index df787fcf8e86e06e17646d09f26cdb938d51a318..3b4d4d68359bb8f8165ea6b126a3fa6969a6246e 100644 (file)
@@ -998,5 +998,8 @@ void dcn30_prepare_bandwidth(struct dc *dc,
                        dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
 
        dcn20_prepare_bandwidth(dc, context);
+
+       dc_dmub_srv_p_state_delegate(dc,
+               context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching, context);
 }
 
index 379729b02847480afb93a5e7cfe1c8a9b4ae46da..c3d75e56410cc75113961941e23ae14f0c822343 100644 (file)
@@ -1802,7 +1802,10 @@ static unsigned int CalculateVMAndRowBytes(
        }
 
        if (SurfaceTiling == dm_sw_linear) {
-               *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+               if (PTEBufferSizeInRequests == 0)
+                       *dpte_row_height = 1;
+               else
+                       *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
                *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
                *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
        } else if (ScanDirection != dm_vert) {
index 86e9d2e886d6f3cc4ef6a4dd30fdc610cd287ce5..aaa5064408ba4500a1052ec35513b5aa80bc6ecd 100644 (file)
@@ -33,6 +33,7 @@
 #define DPVGA_DONGLE_AUX_DEFER_WA_DELAY 40
 #define I2C_OVER_AUX_DEFER_WA_DELAY_1MS 1
 #define LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD 3200 /*us*/
+#define LINK_AUX_DEFAULT_TIMEOUT_PERIOD 552 /*us*/
 
 #define EDID_SEGMENT_SIZE 256
 
index 4874d1bf1dcb01c2a926041edb2f952936d9c730..d4370856f164abd7a2773a24a30f3a8e28a65941 100644 (file)
@@ -60,8 +60,6 @@
 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
 #endif
 
-#define LINK_AUX_DEFAULT_TIMEOUT_PERIOD 552 /*us*/
-
 struct dp_lt_fallback_entry {
        enum dc_lane_count lane_count;
        enum dc_link_rate link_rate;
index 834d146c4991fae99156113ef223858c95c57540..0652b001ad549606c48dc4d446ae4901f036b020 100644 (file)
@@ -1202,10 +1202,17 @@ static int smu_smc_hw_setup(struct smu_context *smu)
                return ret;
        }
 
-       ret = smu_setup_pptable(smu);
-       if (ret) {
-               dev_err(adev->dev, "Failed to setup pptable!\n");
-               return ret;
+       /*
+        * It is assumed the pptable used before runpm is same as
+        * the one used afterwards. Thus, we can reuse the stored
+        * copy and do not need to resetup the pptable again.
+        */
+       if (!adev->in_runpm) {
+               ret = smu_setup_pptable(smu);
+               if (ret) {
+                       dev_err(adev->dev, "Failed to setup pptable!\n");
+                       return ret;
+               }
        }
 
        /* smu_dump_pptable(smu); */
index 6492d69e2e60fa4fd504dff8620f75b8f32c4dbf..e1ef88ee1ed393fb768934dab53c16ae3cff981a 100644 (file)
@@ -256,7 +256,7 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
         * to be backward compatible.
         * 2. New fw usually brings some optimizations. But that's visible
         * only on the paired driver.
-        * Considering above, we just leave user a warning message instead
+        * Considering above, we just leave user a verbal message instead
         * of halt driver loading.
         */
        if (if_version != smu->smc_driver_if_version) {
@@ -264,7 +264,7 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
                        "smu fw program = %d, version = 0x%08x (%d.%d.%d)\n",
                        smu->smc_driver_if_version, if_version,
                        smu_program, smu_version, smu_major, smu_minor, smu_debug);
-               dev_warn(smu->adev->dev, "SMU driver if version not matched\n");
+               dev_info(smu->adev->dev, "SMU driver if version not matched\n");
        }
 
        return ret;
index 56a02bc60ceeedd10daa4fdf5973439f3f1d1341..c788aa7a99a9e745d43edeabe2a5740d2e0d10ac 100644 (file)
@@ -93,7 +93,7 @@ int smu_v12_0_check_fw_version(struct smu_context *smu)
         * to be backward compatible.
         * 2. New fw usually brings some optimizations. But that's visible
         * only on the paired driver.
-        * Considering above, we just leave user a warning message instead
+        * Considering above, we just leave user a verbal message instead
         * of halt driver loading.
         */
        if (if_version != smu->smc_driver_if_version) {
@@ -101,7 +101,7 @@ int smu_v12_0_check_fw_version(struct smu_context *smu)
                        "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n",
                        smu->smc_driver_if_version, if_version,
                        smu_program, smu_version, smu_major, smu_minor, smu_debug);
-               dev_warn(smu->adev->dev, "SMU driver if version not matched\n");
+               dev_info(smu->adev->dev, "SMU driver if version not matched\n");
        }
 
        return ret;
index 78945e79dbee15fb04401b5c64285cd0b45bbad4..a52ed0580fd7ef74373b9532be2420c64b6cc650 100644 (file)
@@ -311,7 +311,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
         * to be backward compatible.
         * 2. New fw usually brings some optimizations. But that's visible
         * only on the paired driver.
-        * Considering above, we just leave user a warning message instead
+        * Considering above, we just leave user a verbal message instead
         * of halt driver loading.
         */
        if (if_version != smu->smc_driver_if_version) {
@@ -319,7 +319,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
                         "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n",
                         smu->smc_driver_if_version, if_version,
                         smu_program, smu_version, smu_major, smu_minor, smu_debug);
-               dev_warn(adev->dev, "SMU driver if version not matched\n");
+               dev_info(adev->dev, "SMU driver if version not matched\n");
        }
 
        return ret;
@@ -2229,10 +2229,23 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
                                      enum smu_baco_seq baco_seq)
 {
-       return smu_cmn_send_smc_msg_with_param(smu,
-                                              SMU_MSG_ArmD3,
-                                              baco_seq,
-                                              NULL);
+       struct smu_baco_context *smu_baco = &smu->smu_baco;
+       int ret;
+
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                                             SMU_MSG_ArmD3,
+                                             baco_seq,
+                                             NULL);
+       if (ret)
+               return ret;
+
+       if (baco_seq == BACO_SEQ_BAMACO ||
+           baco_seq == BACO_SEQ_BACO)
+               smu_baco->state = SMU_BACO_STATE_ENTER;
+       else
+               smu_baco->state = SMU_BACO_STATE_EXIT;
+
+       return 0;
 }
 
 bool smu_v13_0_baco_is_support(struct smu_context *smu)
index 7c906ab3ddd2f68ff0dceddce63796c21bf47141..923a9fb3c8873cd4eb175710ec11887d08b27848 100644 (file)
@@ -147,6 +147,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
                            PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,   0),
        MSG_MAP(AllowGpo,                       PPSMC_MSG_SetGpoAllow,           0),
        MSG_MAP(AllowIHHostInterrupt,           PPSMC_MSG_AllowIHHostInterrupt,       0),
+       MSG_MAP(ReenableAcDcInterrupt,          PPSMC_MSG_ReenableAcDcInterrupt,       0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
index 7e5c6a8d02123d80f9ff7349f4c1ce9496dd3af3..75185a960fc408f1042999e4c9b6c04baef6831b 100644 (file)
@@ -744,7 +744,7 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem)
 
        return sgt;
 }
-EXPORT_SYMBOL(drm_gem_shmem_get_pages_sgt);
+EXPORT_SYMBOL_GPL(drm_gem_shmem_get_pages_sgt);
 
 /**
  * drm_gem_shmem_prime_import_sg_table - Produce a shmem GEM object from
index 3d1cd04ac5fab16070530b55ae58d11eef5b1514..98f4e44976e090562a9e0036eb0306fff80a719f 100644 (file)
@@ -118,9 +118,6 @@ config DRM_I915_USERPTR
 
          If in doubt, say "Y".
 
-config DRM_I915_GVT
-       bool
-
 config DRM_I915_GVT_KVMGT
        tristate "Enable KVM host support Intel GVT-g graphics virtualization"
        depends on DRM_I915
@@ -172,3 +169,6 @@ menu "drm/i915 Unstable Evolution"
        depends on DRM_I915
        source "drivers/gpu/drm/i915/Kconfig.unstable"
 endmenu
+
+config DRM_I915_GVT
+       bool
index 6e48d3bcdfec5acf4c1d582bf6f7b6cf95d46fdd..a280448df771a923dc4551eb758185abced27bc6 100644 (file)
@@ -199,6 +199,8 @@ static struct intel_quirk intel_quirks[] = {
        /* ECS Liva Q2 */
        { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
        { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+       /* HP Notebook - 14-r206nv */
+       { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
 };
 
 void intel_init_quirks(struct drm_i915_private *i915)
index 169393a7ad88b7f0a48c98109d72d19b9c322689..3bb1c701d5ff9f77551ef81941ffa83d7646c42a 100644 (file)
@@ -559,12 +559,15 @@ static bool reg_needs_read_steering(struct intel_gt *gt,
                                    i915_mcr_reg_t reg,
                                    enum intel_steering_type type)
 {
-       const u32 offset = i915_mmio_reg_offset(reg);
+       u32 offset = i915_mmio_reg_offset(reg);
        const struct intel_mmio_range *entry;
 
        if (likely(!gt->steering_table[type]))
                return false;
 
+       if (IS_GSI_REG(offset))
+               offset += gt->uncore->gsi_offset;
+
        for (entry = gt->steering_table[type]; entry->end; entry++) {
                if (offset >= entry->start && offset <= entry->end)
                        return true;
index 15ec64d881c4464dc062d73cd3b654d6b169ab7f..fb99143be98e7327af823de10635563bccaca566 100644 (file)
@@ -53,7 +53,7 @@ int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
        if (unlikely(ret))
                goto err_unpin;
 
-       if (i915_vma_is_map_and_fenceable(vma)) {
+       if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) {
                addr = (void __force *)i915_vma_pin_iomap(vma);
        } else {
                int type = i915_coherent_map_type(vma->vm->i915, vma->obj, false);
@@ -98,7 +98,7 @@ void intel_ring_unpin(struct intel_ring *ring)
                return;
 
        i915_vma_unset_ggtt_write(vma);
-       if (i915_vma_is_map_and_fenceable(vma))
+       if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
                i915_vma_unpin_iomap(vma);
        else
                i915_gem_object_unpin_map(vma->obj);
@@ -116,7 +116,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
 
        obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
                                          I915_BO_ALLOC_PM_VOLATILE);
-       if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
+       if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
                obj = i915_gem_object_create_stolen(i915, size);
        if (IS_ERR(obj))
                obj = i915_gem_object_create_internal(i915, size);
index 0616b73175f3e929e36682745c72e73c5b561da0..baccbf1761b77108ea2fdd327103b667f9e86289 100644 (file)
@@ -147,9 +147,9 @@ vgpu_scan_nonprivbb_set(void *data, u64 val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
-                       vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set,
-                       "0x%llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
+                        vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set,
+                        "0x%llx\n");
 
 static int vgpu_status_get(void *data, u64 *val)
 {
@@ -165,7 +165,7 @@ static int vgpu_status_get(void *data, u64 *val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n");
 
 /**
  * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU
@@ -180,10 +180,10 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
 
        debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu,
                            &vgpu_mmio_diff_fops);
-       debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, vgpu,
-                           &vgpu_scan_nonprivbb_fops);
-       debugfs_create_file("status", 0644, vgpu->debugfs, vgpu,
-                           &vgpu_status_fops);
+       debugfs_create_file_unsafe("scan_nonprivbb", 0644, vgpu->debugfs, vgpu,
+                                  &vgpu_scan_nonprivbb_fops);
+       debugfs_create_file_unsafe("status", 0644, vgpu->debugfs, vgpu,
+                                  &vgpu_status_fops);
 }
 
 /**
index dce93738e98a470eb976bb4ceecfe485416883b1..4dd52ac2043e7ac4b3ee23ff6c2fa6e3dfbc693d 100644 (file)
@@ -171,7 +171,7 @@ static int verify_firmware(struct intel_gvt *gvt,
        mem = (fw->data + h->cfg_space_offset);
 
        id = *(u16 *)(mem + PCI_VENDOR_ID);
-       VERIFY("vender id", id, pdev->vendor);
+       VERIFY("vendor id", id, pdev->vendor);
 
        id = *(u16 *)(mem + PCI_DEVICE_ID);
        VERIFY("device id", id, pdev->device);
index 8ae7039b3683257d73a38cc1ee72299aea6b7c4d..de675d799c7d80ae767c97716b8afb953d220179 100644 (file)
@@ -699,7 +699,7 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 
        clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
 
-       debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
+       debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs);
 
        kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
                                           &vgpu->track_node);
index a5497440484f129c1525c8e4ceb137eabcf3bb9c..08ad1bd651f103f4217bfd830321e8d64b7c211e 100644 (file)
@@ -323,7 +323,7 @@ int intel_gvt_create_vgpu(struct intel_vgpu *vgpu,
        ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
                GFP_KERNEL);
        if (ret < 0)
-               goto out_unlock;;
+               goto out_unlock;
 
        vgpu->id = ret;
        vgpu->sched_ctl.weight = conf->weight;
index c1356aff87da3dc64c205681993dfb67a07eeb15..d26aa52217ce198f1b99f4e86b6269c1053c9977 100644 (file)
@@ -136,7 +136,7 @@ static const struct drm_fb_helper_funcs msm_fb_helper_funcs = {
 struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev)
 {
        struct msm_drm_private *priv = dev->dev_private;
-       struct msm_fbdev *fbdev = NULL;
+       struct msm_fbdev *fbdev;
        struct drm_fb_helper *helper;
        int ret;
 
index 84429728347f72beebebb996bb0b417264e53a3e..a6c8542087ecf5f76448f3f345c4dbc4b8bf8bd0 100644 (file)
@@ -233,7 +233,7 @@ void omap_fbdev_init(struct drm_device *dev)
 
        fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
        if (!fbdev)
-               goto fail;
+               return;
 
        INIT_WORK(&fbdev->work, pan_worker);
 
index 1471c3a966020af18e87c17808883aee3ec8ad19..4aca09cab4b8cd2ccf63070d9b2296b19fa44e80 100644 (file)
@@ -2123,11 +2123,12 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx)
 
        /*
         * On DCE32 any encoder can drive any block so usually just use crtc id,
-        * but Apple thinks different at least on iMac10,1, so there use linkb,
+        * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb,
         * otherwise the internal eDP panel will stay dark.
         */
        if (ASIC_IS_DCE32(rdev)) {
-               if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
+               if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") ||
+                   dmi_match(DMI_PRODUCT_NAME, "iMac11,2"))
                        enc_idx = (dig->linkb) ? 1 : 0;
                else
                        enc_idx = radeon_crtc->crtc_id;
index 842afc88a9496c382cb936fd24b78baef054e2bd..22623eb4f72f2979b8f282b85c7fb006faa47241 100644 (file)
@@ -256,6 +256,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 {
        struct hid_report *report;
        struct hid_field *field;
+       unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
        unsigned int usages;
        unsigned int offset;
        unsigned int i;
@@ -286,8 +287,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
        offset = report->size;
        report->size += parser->global.report_size * parser->global.report_count;
 
+       if (parser->device->ll_driver->max_buffer_size)
+               max_buffer_size = parser->device->ll_driver->max_buffer_size;
+
        /* Total size check: Allow for possible report index byte */
-       if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
+       if (report->size > (max_buffer_size - 1) << 3) {
                hid_err(parser->device, "report is too long\n");
                return -1;
        }
@@ -1963,6 +1967,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
        struct hid_report_enum *report_enum = hid->report_enum + type;
        struct hid_report *report;
        struct hid_driver *hdrv;
+       int max_buffer_size = HID_MAX_BUFFER_SIZE;
        u32 rsize, csize = size;
        u8 *cdata = data;
        int ret = 0;
@@ -1978,10 +1983,13 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
 
        rsize = hid_compute_report_size(report);
 
-       if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
-               rsize = HID_MAX_BUFFER_SIZE - 1;
-       else if (rsize > HID_MAX_BUFFER_SIZE)
-               rsize = HID_MAX_BUFFER_SIZE;
+       if (hid->ll_driver->max_buffer_size)
+               max_buffer_size = hid->ll_driver->max_buffer_size;
+
+       if (report_enum->numbered && rsize >= max_buffer_size)
+               rsize = max_buffer_size - 1;
+       else if (rsize > max_buffer_size)
+               rsize = max_buffer_size;
 
        if (csize < rsize) {
                dbg_hid("report %d is too short, (%d < %d)\n", report->id,
@@ -2396,7 +2404,12 @@ int hid_hw_raw_request(struct hid_device *hdev,
                       unsigned char reportnum, __u8 *buf,
                       size_t len, enum hid_report_type rtype, enum hid_class_request reqtype)
 {
-       if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
+       unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+
+       if (hdev->ll_driver->max_buffer_size)
+               max_buffer_size = hdev->ll_driver->max_buffer_size;
+
+       if (len < 1 || len > max_buffer_size || !buf)
                return -EINVAL;
 
        return hdev->ll_driver->raw_request(hdev, reportnum, buf, len,
@@ -2415,7 +2428,12 @@ EXPORT_SYMBOL_GPL(hid_hw_raw_request);
  */
 int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len)
 {
-       if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
+       unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+
+       if (hdev->ll_driver->max_buffer_size)
+               max_buffer_size = hdev->ll_driver->max_buffer_size;
+
+       if (len < 1 || len > max_buffer_size || !buf)
                return -EINVAL;
 
        if (hdev->ll_driver->output_report)
index 1e16b0fa310d16994b61845df5428fe007ac6693..27cadadda7c9d8c781f6d7d8575bc1fbf54a1e7b 100644 (file)
@@ -1354,6 +1354,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
        girq->parents = NULL;
        girq->default_type = IRQ_TYPE_NONE;
        girq->handler = handle_simple_irq;
+       girq->threaded = true;
 
        ret = gpiochip_add_data(&dev->gc, dev);
        if (ret < 0) {
index 25dcda76d6c7bb481c366bdb971b87827860f049..5fc88a063297856931ce625aba974af3c07bb7e5 100644 (file)
@@ -4399,6 +4399,8 @@ static const struct hid_device_id hidpp_devices[] = {
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb02a) },
        { /* MX Master 3 mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) },
+       { /* MX Master 3S mouse over Bluetooth */
+         HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb034) },
        {}
 };
 
index 15e14239af829bd7574b6440e4a5b097eaca893f..a49c6affd7c4c48cdd09e3bdcca95139d0c066b8 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (c) 2014-2016, Intel Corporation.
  */
 
+#include <linux/devm-helpers.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
@@ -621,7 +622,6 @@ static void recv_ipc(struct ishtp_device *dev, uint32_t doorbell_val)
        case MNG_RESET_NOTIFY:
                if (!ishtp_dev) {
                        ishtp_dev = dev;
-                       INIT_WORK(&fw_reset_work, fw_reset_work_fn);
                }
                schedule_work(&fw_reset_work);
                break;
@@ -940,6 +940,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
 {
        struct ishtp_device *dev;
        int     i;
+       int     ret;
 
        dev = devm_kzalloc(&pdev->dev,
                           sizeof(struct ishtp_device) + sizeof(struct ish_hw),
@@ -975,6 +976,12 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
                list_add_tail(&tx_buf->link, &dev->wr_free_list);
        }
 
+       ret = devm_work_autocancel(&pdev->dev, &fw_reset_work, fw_reset_work_fn);
+       if (ret) {
+               dev_err(dev->devc, "Failed to initialise FW reset work\n");
+               return NULL;
+       }
+
        dev->ops = &ish_hw_ops;
        dev->devc = &pdev->dev;
        dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr);
index f161c95a1ad2e9bc65316a06898eafc27ec18a5e..4588d2cd4ea4444fdbb5bb5388d9eeffc6cc55eb 100644 (file)
@@ -395,6 +395,7 @@ static const struct hid_ll_driver uhid_hid_driver = {
        .parse = uhid_hid_parse,
        .raw_request = uhid_hid_raw_request,
        .output_report = uhid_hid_output_report,
+       .max_buffer_size = UHID_DATA_MAX,
 };
 
 #ifdef CONFIG_COMPAT
index 9b8e84f20604f436b77494b7ed45bbed7bc53bf5..25eb4e8fd22fdde6b3fbf0603757ec2fa38beaa1 100644 (file)
@@ -914,6 +914,7 @@ config I2C_PASEMI
 
 config I2C_APPLE
        tristate "Apple SMBus platform driver"
+       depends on !I2C_PASEMI
        depends on ARCH_APPLE || COMPILE_TEST
        default ARCH_APPLE
        help
index da4c8e5a803904ddab6e703d9491ae971c8a47f9..d4b55d989a268b261742c8be6b1ed0e5fe8ff004 100644 (file)
@@ -126,22 +126,13 @@ static int gxp_i2c_master_xfer(struct i2c_adapter *adapter,
        time_left = wait_for_completion_timeout(&drvdata->completion,
                                                adapter->timeout);
        ret = num - drvdata->msgs_remaining;
-       if (time_left == 0) {
-               switch (drvdata->state) {
-               case GXP_I2C_WDATA_PHASE:
-                       break;
-               case GXP_I2C_RDATA_PHASE:
-                       break;
-               case GXP_I2C_ADDR_PHASE:
-                       break;
-               default:
-                       break;
-               }
+       if (time_left == 0)
                return -ETIMEDOUT;
-       }
 
-       if (drvdata->state == GXP_I2C_ADDR_NACK ||
-           drvdata->state == GXP_I2C_DATA_NACK)
+       if (drvdata->state == GXP_I2C_ADDR_NACK)
+               return -ENXIO;
+
+       if (drvdata->state == GXP_I2C_DATA_NACK)
                return -EIO;
 
        return ret;
@@ -525,7 +516,7 @@ static int gxp_i2c_probe(struct platform_device *pdev)
                i2cg_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
                                                           "hpe,sysreg");
                if (IS_ERR(i2cg_map)) {
-                       return dev_err_probe(&pdev->dev, IS_ERR(i2cg_map),
+                       return dev_err_probe(&pdev->dev, PTR_ERR(i2cg_map),
                                             "failed to map i2cg_handle\n");
                }
 
index 7a60e1c5e5874692dd79067f4b9fed36a3510e93..54e4c34b4a22074f4d9b75977f825a8fb3536b12 100644 (file)
@@ -1438,6 +1438,7 @@ static int i3c_master_early_i3c_dev_add(struct i3c_master_controller *master,
 {
        struct i3c_device_info info = {
                .static_addr = boardinfo->static_addr,
+               .pid = boardinfo->pid,
        };
        struct i3c_dev_desc *i3cdev;
        int ret;
index 51a8608203de78eac9af7899f33f994d69a8189e..48954d3e65714f4e9810aa12bfd5c501104d79f9 100644 (file)
@@ -531,7 +531,7 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master)
        if (hcnt < SCL_I3C_TIMING_CNT_MIN)
                hcnt = SCL_I3C_TIMING_CNT_MIN;
 
-       lcnt = DIV_ROUND_UP(core_rate, I3C_BUS_TYP_I3C_SCL_RATE) - hcnt;
+       lcnt = DIV_ROUND_UP(core_rate, master->base.bus.scl_rate.i3c) - hcnt;
        if (lcnt < SCL_I3C_TIMING_CNT_MIN)
                lcnt = SCL_I3C_TIMING_CNT_MIN;
 
@@ -541,7 +541,8 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master)
        if (!(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_I2C_SLAVE_PRESENT))
                writel(BUS_I3C_MST_FREE(lcnt), master->regs + BUS_FREE_TIMING);
 
-       lcnt = DIV_ROUND_UP(I3C_BUS_TLOW_OD_MIN_NS, core_period);
+       lcnt = max_t(u8,
+                    DIV_ROUND_UP(I3C_BUS_TLOW_OD_MIN_NS, core_period), lcnt);
        scl_timing = SCL_I3C_TIMING_HCNT(hcnt) | SCL_I3C_TIMING_LCNT(lcnt);
        writel(scl_timing, master->regs + SCL_I3C_OD_TIMING);
 
index 75eaecc8639f00b32ad925167a41410792ad3a89..1de87062c67b9b54ca6664abeadefaf4971dbafb 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/mtd/ubi.h>
-#include <linux/workqueue.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/hdreg.h>
@@ -62,7 +61,6 @@ struct ubiblock_param {
 };
 
 struct ubiblock_pdu {
-       struct work_struct work;
        struct ubi_sgl usgl;
 };
 
@@ -82,8 +80,6 @@ struct ubiblock {
        struct gendisk *gd;
        struct request_queue *rq;
 
-       struct workqueue_struct *wq;
-
        struct mutex dev_mutex;
        struct list_head list;
        struct blk_mq_tag_set tag_set;
@@ -181,20 +177,29 @@ static struct ubiblock *find_dev_nolock(int ubi_num, int vol_id)
        return NULL;
 }
 
-static int ubiblock_read(struct ubiblock_pdu *pdu)
+static blk_status_t ubiblock_read(struct request *req)
 {
-       int ret, leb, offset, bytes_left, to_read;
-       u64 pos;
-       struct request *req = blk_mq_rq_from_pdu(pdu);
+       struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req);
        struct ubiblock *dev = req->q->queuedata;
+       u64 pos = blk_rq_pos(req) << 9;
+       int to_read = blk_rq_bytes(req);
+       int bytes_left = to_read;
+       /* Get LEB:offset address to read from */
+       int offset = do_div(pos, dev->leb_size);
+       int leb = pos;
+       struct req_iterator iter;
+       struct bio_vec bvec;
+       int ret;
 
-       to_read = blk_rq_bytes(req);
-       pos = blk_rq_pos(req) << 9;
+       blk_mq_start_request(req);
 
-       /* Get LEB:offset address to read from */
-       offset = do_div(pos, dev->leb_size);
-       leb = pos;
-       bytes_left = to_read;
+       /*
+        * It is safe to ignore the return value of blk_rq_map_sg() because
+        * the number of sg entries is limited to UBI_MAX_SG_COUNT
+        * and ubi_read_sg() will check that limit.
+        */
+       ubi_sgl_init(&pdu->usgl);
+       blk_rq_map_sg(req->q, req, pdu->usgl.sg);
 
        while (bytes_left) {
                /*
@@ -206,14 +211,17 @@ static int ubiblock_read(struct ubiblock_pdu *pdu)
 
                ret = ubi_read_sg(dev->desc, leb, &pdu->usgl, offset, to_read);
                if (ret < 0)
-                       return ret;
+                       break;
 
                bytes_left -= to_read;
                to_read = bytes_left;
                leb += 1;
                offset = 0;
        }
-       return 0;
+
+       rq_for_each_segment(bvec, req, iter)
+               flush_dcache_page(bvec.bv_page);
+       return errno_to_blk_status(ret);
 }
 
 static int ubiblock_open(struct block_device *bdev, fmode_t mode)
@@ -289,47 +297,15 @@ static const struct block_device_operations ubiblock_ops = {
        .getgeo = ubiblock_getgeo,
 };
 
-static void ubiblock_do_work(struct work_struct *work)
-{
-       int ret;
-       struct ubiblock_pdu *pdu = container_of(work, struct ubiblock_pdu, work);
-       struct request *req = blk_mq_rq_from_pdu(pdu);
-       struct req_iterator iter;
-       struct bio_vec bvec;
-
-       blk_mq_start_request(req);
-
-       /*
-        * It is safe to ignore the return value of blk_rq_map_sg() because
-        * the number of sg entries is limited to UBI_MAX_SG_COUNT
-        * and ubi_read_sg() will check that limit.
-        */
-       blk_rq_map_sg(req->q, req, pdu->usgl.sg);
-
-       ret = ubiblock_read(pdu);
-
-       rq_for_each_segment(bvec, req, iter)
-               flush_dcache_page(bvec.bv_page);
-
-       blk_mq_end_request(req, errno_to_blk_status(ret));
-}
-
 static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
                             const struct blk_mq_queue_data *bd)
 {
-       struct request *req = bd->rq;
-       struct ubiblock *dev = hctx->queue->queuedata;
-       struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req);
-
-       switch (req_op(req)) {
+       switch (req_op(bd->rq)) {
        case REQ_OP_READ:
-               ubi_sgl_init(&pdu->usgl);
-               queue_work(dev->wq, &pdu->work);
-               return BLK_STS_OK;
+               return ubiblock_read(bd->rq);
        default:
                return BLK_STS_IOERR;
        }
-
 }
 
 static int ubiblock_init_request(struct blk_mq_tag_set *set,
@@ -339,8 +315,6 @@ static int ubiblock_init_request(struct blk_mq_tag_set *set,
        struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req);
 
        sg_init_table(pdu->usgl.sg, UBI_MAX_SG_COUNT);
-       INIT_WORK(&pdu->work, ubiblock_do_work);
-
        return 0;
 }
 
@@ -354,9 +328,12 @@ static int calc_disk_capacity(struct ubi_volume_info *vi, u64 *disk_capacity)
        u64 size = vi->used_bytes >> 9;
 
        if (vi->used_bytes % 512) {
-               pr_warn("UBI: block: volume size is not a multiple of 512, "
-                       "last %llu bytes are ignored!\n",
-                       vi->used_bytes - (size << 9));
+               if (vi->vol_type == UBI_DYNAMIC_VOLUME)
+                       pr_warn("UBI: block: volume size is not a multiple of 512, last %llu bytes are ignored!\n",
+                               vi->used_bytes - (size << 9));
+               else
+                       pr_info("UBI: block: volume size is not a multiple of 512, last %llu bytes are ignored!\n",
+                               vi->used_bytes - (size << 9));
        }
 
        if ((sector_t)size != size)
@@ -401,7 +378,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
        dev->tag_set.ops = &ubiblock_mq_ops;
        dev->tag_set.queue_depth = 64;
        dev->tag_set.numa_node = NUMA_NO_NODE;
-       dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+       dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
        dev->tag_set.cmd_size = sizeof(struct ubiblock_pdu);
        dev->tag_set.driver_data = dev;
        dev->tag_set.nr_hw_queues = 1;
@@ -439,32 +416,20 @@ int ubiblock_create(struct ubi_volume_info *vi)
        dev->rq = gd->queue;
        blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
 
-       /*
-        * Create one workqueue per volume (per registered block device).
-        * Remember workqueues are cheap, they're not threads.
-        */
-       dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name);
-       if (!dev->wq) {
-               ret = -ENOMEM;
-               goto out_remove_minor;
-       }
-
        list_add_tail(&dev->list, &ubiblock_devices);
 
        /* Must be the last step: anyone can call file ops from now on */
-       ret = add_disk(dev->gd);
+       ret = device_add_disk(vi->dev, dev->gd, NULL);
        if (ret)
-               goto out_destroy_wq;
+               goto out_remove_minor;
 
        dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)",
                 dev->ubi_num, dev->vol_id, vi->name);
        mutex_unlock(&devices_mutex);
        return 0;
 
-out_destroy_wq:
-       list_del(&dev->list);
-       destroy_workqueue(dev->wq);
 out_remove_minor:
+       list_del(&dev->list);
        idr_remove(&ubiblock_minor_idr, gd->first_minor);
 out_cleanup_disk:
        put_disk(dev->gd);
@@ -482,8 +447,6 @@ static void ubiblock_cleanup(struct ubiblock *dev)
 {
        /* Stop new requests to arrive */
        del_gendisk(dev->gd);
-       /* Flush pending work */
-       destroy_workqueue(dev->wq);
        /* Finally destroy the blk queue */
        dev_info(disk_to_dev(dev->gd), "released");
        put_disk(dev->gd);
index a901f8edfa41d4c1228df067eee00af1fe3f3fbc..0904eb40c95fa133c1cdc4454beb6d4d3d90fc6a 100644 (file)
@@ -35,7 +35,7 @@
 #define MTD_PARAM_LEN_MAX 64
 
 /* Maximum number of comma-separated items in the 'mtd=' parameter */
-#define MTD_PARAM_MAX_COUNT 4
+#define MTD_PARAM_MAX_COUNT 5
 
 /* Maximum value for the number of bad PEBs per 1024 PEBs */
 #define MAX_MTD_UBI_BEB_LIMIT 768
  * @ubi_num: UBI number
  * @vid_hdr_offs: VID header offset
  * @max_beb_per1024: maximum expected number of bad PEBs per 1024 PEBs
+ * @enable_fm: enable fastmap when value is non-zero
  */
 struct mtd_dev_param {
        char name[MTD_PARAM_LEN_MAX];
        int ubi_num;
        int vid_hdr_offs;
        int max_beb_per1024;
+       int enable_fm;
 };
 
 /* Numbers of elements set in the @mtd_dev_param array */
@@ -468,6 +470,7 @@ static int uif_init(struct ubi_device *ubi)
                        err = ubi_add_volume(ubi, ubi->volumes[i]);
                        if (err) {
                                ubi_err(ubi, "cannot add volume %d", i);
+                               ubi->volumes[i] = NULL;
                                goto out_volumes;
                        }
                }
@@ -663,6 +666,12 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
        ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
        ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
 
+       if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) >
+           ubi->vid_hdr_alsize)) {
+               ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset);
+               return -EINVAL;
+       }
+
        dbg_gen("min_io_size      %d", ubi->min_io_size);
        dbg_gen("max_write_size   %d", ubi->max_write_size);
        dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
@@ -906,6 +915,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
        ubi->dev.release = dev_release;
        ubi->dev.class = &ubi_class;
        ubi->dev.groups = ubi_dev_groups;
+       ubi->dev.parent = &mtd->dev;
 
        ubi->mtd = mtd;
        ubi->ubi_num = ubi_num;
@@ -1248,7 +1258,7 @@ static int __init ubi_init(void)
                mutex_lock(&ubi_devices_mutex);
                err = ubi_attach_mtd_dev(mtd, p->ubi_num,
                                         p->vid_hdr_offs, p->max_beb_per1024,
-                                        false);
+                                        p->enable_fm == 0 ? true : false);
                mutex_unlock(&ubi_devices_mutex);
                if (err < 0) {
                        pr_err("UBI error: cannot attach mtd%d\n",
@@ -1427,7 +1437,7 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp)
                int err = kstrtoint(token, 10, &p->max_beb_per1024);
 
                if (err) {
-                       pr_err("UBI error: bad value for max_beb_per1024 parameter: %s",
+                       pr_err("UBI error: bad value for max_beb_per1024 parameter: %s\n",
                               token);
                        return -EINVAL;
                }
@@ -1438,13 +1448,25 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp)
                int err = kstrtoint(token, 10, &p->ubi_num);
 
                if (err) {
-                       pr_err("UBI error: bad value for ubi_num parameter: %s",
+                       pr_err("UBI error: bad value for ubi_num parameter: %s\n",
                               token);
                        return -EINVAL;
                }
        } else
                p->ubi_num = UBI_DEV_NUM_AUTO;
 
+       token = tokens[4];
+       if (token) {
+               int err = kstrtoint(token, 10, &p->enable_fm);
+
+               if (err) {
+                       pr_err("UBI error: bad value for enable_fm parameter: %s\n",
+                               token);
+                       return -EINVAL;
+               }
+       } else
+               p->enable_fm = 0;
+
        mtd_devs += 1;
        return 0;
 }
@@ -1457,11 +1479,13 @@ MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|pa
                      "Optional \"max_beb_per1024\" parameter specifies the maximum expected bad eraseblock per 1024 eraseblocks. (default value ("
                      __stringify(CONFIG_MTD_UBI_BEB_LIMIT) ") if 0)\n"
                      "Optional \"ubi_num\" parameter specifies UBI device number which have to be assigned to the newly created UBI device (assigned automatically by default)\n"
+                     "Optional \"enable_fm\" parameter determines whether to enable fastmap during attach. If the value is non-zero, fastmap is enabled. Default value is 0.\n"
                      "\n"
                      "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n"
                      "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n"
                      "Example 3: mtd=/dev/mtd1,0,25 - attach MTD device /dev/mtd1 using default VID header offset and reserve 25*nand_size_in_blocks/1024 erase blocks for bad block handling.\n"
                      "Example 4: mtd=/dev/mtd1,0,0,5 - attach MTD device /dev/mtd1 to UBI 5 and using default values for the other fields.\n"
+                     "example 5: mtd=1,0,0,5 mtd=2,0,0,6,1 - attach MTD device /dev/mtd1 to UBI 5 and disable fastmap; attach MTD device /dev/mtd2 to UBI 6 and enable fastmap.(only works when fastmap is enabled and fm_autoconvert=Y).\n"
                      "\t(e.g. if the NAND *chipset* has 4096 PEB, 100 will be reserved for this UBI device).");
 #ifdef CONFIG_MTD_UBI_FASTMAP
 module_param(fm_autoconvert, bool, 0644);
index fcca6942dbdd0d7324c579c1d11e666ba1a06e26..27168f511d6d4602d857cad9f25b3b97417d2ba3 100644 (file)
@@ -504,6 +504,7 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
 {
        unsigned long ubi_num = ubi->ubi_num;
        struct ubi_debug_info *d = &ubi->dbg;
+       umode_t mode = S_IRUSR | S_IWUSR;
        int n;
 
        if (!IS_ENABLED(CONFIG_DEBUG_FS))
@@ -518,41 +519,41 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
 
        d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
 
-       d->dfs_chk_gen = debugfs_create_file("chk_gen", S_IWUSR, d->dfs_dir,
+       d->dfs_chk_gen = debugfs_create_file("chk_gen", mode, d->dfs_dir,
                                             (void *)ubi_num, &dfs_fops);
 
-       d->dfs_chk_io = debugfs_create_file("chk_io", S_IWUSR, d->dfs_dir,
+       d->dfs_chk_io = debugfs_create_file("chk_io", mode, d->dfs_dir,
                                            (void *)ubi_num, &dfs_fops);
 
-       d->dfs_chk_fastmap = debugfs_create_file("chk_fastmap", S_IWUSR,
+       d->dfs_chk_fastmap = debugfs_create_file("chk_fastmap", mode,
                                                 d->dfs_dir, (void *)ubi_num,
                                                 &dfs_fops);
 
-       d->dfs_disable_bgt = debugfs_create_file("tst_disable_bgt", S_IWUSR,
+       d->dfs_disable_bgt = debugfs_create_file("tst_disable_bgt", mode,
                                                 d->dfs_dir, (void *)ubi_num,
                                                 &dfs_fops);
 
        d->dfs_emulate_bitflips = debugfs_create_file("tst_emulate_bitflips",
-                                                     S_IWUSR, d->dfs_dir,
+                                                     mode, d->dfs_dir,
                                                      (void *)ubi_num,
                                                      &dfs_fops);
 
        d->dfs_emulate_io_failures = debugfs_create_file("tst_emulate_io_failures",
-                                                        S_IWUSR, d->dfs_dir,
+                                                        mode, d->dfs_dir,
                                                         (void *)ubi_num,
                                                         &dfs_fops);
 
        d->dfs_emulate_power_cut = debugfs_create_file("tst_emulate_power_cut",
-                                                      S_IWUSR, d->dfs_dir,
+                                                      mode, d->dfs_dir,
                                                       (void *)ubi_num,
                                                       &dfs_fops);
 
        d->dfs_power_cut_min = debugfs_create_file("tst_emulate_power_cut_min",
-                                                  S_IWUSR, d->dfs_dir,
+                                                  mode, d->dfs_dir,
                                                   (void *)ubi_num, &dfs_fops);
 
        d->dfs_power_cut_max = debugfs_create_file("tst_emulate_power_cut_max",
-                                                  S_IWUSR, d->dfs_dir,
+                                                  mode, d->dfs_dir,
                                                   (void *)ubi_num, &dfs_fops);
 
        debugfs_create_file("detailed_erase_block_info", S_IRUSR, d->dfs_dir,
index 09c408c45a62186a67cc1efe4bfcd66b258dc49d..403b79d6efd5ad56868e35f7e10e5903ec7fb7b5 100644 (file)
@@ -61,7 +61,7 @@ struct ubi_eba_table {
 };
 
 /**
- * next_sqnum - get next sequence number.
+ * ubi_next_sqnum - get next sequence number.
  * @ubi: UBI device description object
  *
  * This function returns next sequence number to use, which is just the current
index 0ee452275578d9e5b968a41b41e9cbbc66be794d..863f571f1adb545a4991c0d22efc9889968d22f3 100644 (file)
@@ -146,13 +146,15 @@ void ubi_refill_pools(struct ubi_device *ubi)
        if (ubi->fm_anchor) {
                wl_tree_add(ubi->fm_anchor, &ubi->free);
                ubi->free_count++;
+               ubi->fm_anchor = NULL;
        }
 
-       /*
-        * All available PEBs are in ubi->free, now is the time to get
-        * the best anchor PEBs.
-        */
-       ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
+       if (!ubi->fm_disabled)
+               /*
+                * All available PEBs are in ubi->free, now is the time to get
+                * the best anchor PEBs.
+                */
+               ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
 
        for (;;) {
                enough = 0;
index ca2d9efe62c3c75063b41ecacb6cbbd6f107146d..28c8151a0725d5b5c9abbc31bdab3e48fad3253b 100644 (file)
@@ -93,7 +93,7 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi)
 
 
 /**
- * new_fm_vhdr - allocate a new volume header for fastmap usage.
+ * new_fm_vbuf() - allocate a new volume header for fastmap usage.
  * @ubi: UBI device description object
  * @vol_id: the VID of the new header
  *
index 0fce99ff29b583639723676b24341231f731952a..5db653eacbd451ba6efc08e4ce96c192159fef44 100644 (file)
@@ -79,6 +79,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
        vi->name_len = vol->name_len;
        vi->name = vol->name;
        vi->cdev = vol->cdev.dev;
+       vi->dev = &vol->dev;
 }
 
 /**
index 7b30c8ee3e82d7bfb5005857175736f485b373b4..1794d66b6eb7232969cd955cbb7927764554563f 100644 (file)
@@ -10,7 +10,7 @@
 #include "ubi.h"
 
 /**
- * calc_data_len - calculate how much real data is stored in a buffer.
+ * ubi_calc_data_len - calculate how much real data is stored in a buffer.
  * @ubi: UBI device description object
  * @buf: a buffer with the contents of the physical eraseblock
  * @length: the buffer length
index 8fcc0bdf06358d8f3c210e56ee43e97c9ee39821..2c867d16f89f7de2942f0db6db9b194bd92ee945 100644 (file)
@@ -464,7 +464,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
                for (i = 0; i < -pebs; i++) {
                        err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i);
                        if (err)
-                               goto out_acc;
+                               goto out_free;
                }
                spin_lock(&ubi->volumes_lock);
                ubi->rsvd_pebs += pebs;
@@ -512,8 +512,10 @@ out_acc:
                ubi->avail_pebs += pebs;
                spin_unlock(&ubi->volumes_lock);
        }
+       return err;
+
 out_free:
-       kfree(new_eba_tbl);
+       ubi_eba_destroy_table(new_eba_tbl);
        return err;
 }
 
@@ -580,6 +582,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
        if (err) {
                ubi_err(ubi, "cannot add character device for volume %d, error %d",
                        vol_id, err);
+               vol_release(&vol->dev);
                return err;
        }
 
@@ -590,15 +593,14 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
        vol->dev.groups = volume_dev_groups;
        dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);
        err = device_register(&vol->dev);
-       if (err)
-               goto out_cdev;
+       if (err) {
+               cdev_del(&vol->cdev);
+               put_device(&vol->dev);
+               return err;
+       }
 
        self_check_volumes(ubi);
        return err;
-
-out_cdev:
-       cdev_del(&vol->cdev);
-       return err;
 }
 
 /**
index 68eb0f21b3fe2150d5a89542daf31679eed7c331..40f39e5d6dfcc068519598452bd8787c7b143c39 100644 (file)
@@ -165,7 +165,7 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
 }
 
 /**
- * wl_tree_destroy - destroy a wear-leveling entry.
+ * wl_entry_destroy - destroy a wear-leveling entry.
  * @ubi: UBI device description object
  * @e: the wear-leveling entry to add
  *
@@ -890,8 +890,11 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
        err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
        if (err) {
-               if (e2)
+               if (e2) {
+                       spin_lock(&ubi->wl_lock);
                        wl_entry_destroy(ubi, e2);
+                       spin_unlock(&ubi->wl_lock);
+               }
                goto out_ro;
        }
 
@@ -973,11 +976,11 @@ out_error:
        spin_lock(&ubi->wl_lock);
        ubi->move_from = ubi->move_to = NULL;
        ubi->move_to_put = ubi->wl_scheduled = 0;
+       wl_entry_destroy(ubi, e1);
+       wl_entry_destroy(ubi, e2);
        spin_unlock(&ubi->wl_lock);
 
        ubi_free_vid_buf(vidb);
-       wl_entry_destroy(ubi, e1);
-       wl_entry_destroy(ubi, e2);
 
 out_ro:
        ubi_ro_mode(ubi);
@@ -1130,14 +1133,18 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
                /* Re-schedule the LEB for erasure */
                err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
                if (err1) {
+                       spin_lock(&ubi->wl_lock);
                        wl_entry_destroy(ubi, e);
+                       spin_unlock(&ubi->wl_lock);
                        err = err1;
                        goto out_ro;
                }
                return err;
        }
 
+       spin_lock(&ubi->wl_lock);
        wl_entry_destroy(ubi, e);
+       spin_unlock(&ubi->wl_lock);
        if (err != -EIO)
                /*
                 * If this is not %-EIO, we have no idea what to do. Scheduling
@@ -1253,6 +1260,18 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
 retry:
        spin_lock(&ubi->wl_lock);
        e = ubi->lookuptbl[pnum];
+       if (!e) {
+               /*
+                * This wl entry has been removed for some errors by other
+                * process (eg. wear leveling worker), corresponding process
+                * (except __erase_worker, which cannot concurrent with
+                * ubi_wl_put_peb) will set ubi ro_mode at the same time,
+                * just ignore this wl entry.
+                */
+               spin_unlock(&ubi->wl_lock);
+               up_read(&ubi->fm_protect);
+               return 0;
+       }
        if (e == ubi->move_from) {
                /*
                 * User is putting the physical eraseblock which was selected to
index 3a15015bc409eb72f76aeb81668ae4ffbd717d6d..a508402c4ecbf60379097106a47a7dc53bb96ac8 100644 (file)
@@ -393,6 +393,24 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
                mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]);
 }
 
+/* Set up switch core clock for MT7530 */
+static void mt7530_pll_setup(struct mt7530_priv *priv)
+{
+       /* Disable PLL */
+       core_write(priv, CORE_GSWPLL_GRP1, 0);
+
+       /* Set core clock into 500Mhz */
+       core_write(priv, CORE_GSWPLL_GRP2,
+                  RG_GSWPLL_POSDIV_500M(1) |
+                  RG_GSWPLL_FBKDIV_500M(25));
+
+       /* Enable PLL */
+       core_write(priv, CORE_GSWPLL_GRP1,
+                  RG_GSWPLL_EN_PRE |
+                  RG_GSWPLL_POSDIV_200M(2) |
+                  RG_GSWPLL_FBKDIV_200M(32));
+}
+
 /* Setup TX circuit including relevant PAD and driving */
 static int
 mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
@@ -453,21 +471,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
        core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
                   REG_GSWCK_EN | REG_TRGMIICK_EN);
 
-       /* Setup core clock for MT7530 */
-       /* Disable PLL */
-       core_write(priv, CORE_GSWPLL_GRP1, 0);
-
-       /* Set core clock into 500Mhz */
-       core_write(priv, CORE_GSWPLL_GRP2,
-                  RG_GSWPLL_POSDIV_500M(1) |
-                  RG_GSWPLL_FBKDIV_500M(25));
-
-       /* Enable PLL */
-       core_write(priv, CORE_GSWPLL_GRP1,
-                  RG_GSWPLL_EN_PRE |
-                  RG_GSWPLL_POSDIV_200M(2) |
-                  RG_GSWPLL_FBKDIV_200M(32));
-
        /* Setup the MT7530 TRGMII Tx Clock */
        core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
        core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
@@ -2196,6 +2199,8 @@ mt7530_setup(struct dsa_switch *ds)
                     SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
                     SYS_CTRL_REG_RST);
 
+       mt7530_pll_setup(priv);
+
        /* Enable Port 6 only; P5 as GMAC5 which currently is not supported */
        val = mt7530_read(priv, MT7530_MHWTRAP);
        val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS;
index 323ec56e8a74c37f1ce859a564d08b14b32545ca..1917da784191988ea5c0cbc995ec946f647a4b29 100644 (file)
@@ -132,6 +132,16 @@ source "drivers/net/ethernet/mscc/Kconfig"
 source "drivers/net/ethernet/microsoft/Kconfig"
 source "drivers/net/ethernet/moxa/Kconfig"
 source "drivers/net/ethernet/myricom/Kconfig"
+
+config FEALNX
+       tristate "Myson MTD-8xx PCI Ethernet support"
+       depends on PCI
+       select CRC32
+       select MII
+       help
+         Say Y here to support the Myson MTD-800 family of PCI-based Ethernet
+         cards. <http://www.myson.com.tw/>
+
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/natsemi/Kconfig"
 source "drivers/net/ethernet/neterion/Kconfig"
index 2fedbaa545eb1cb4aa7de091335733f8d0f12148..0d872d4efcd10b1d29752e7fcd6aede2ec657902 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/
 obj-$(CONFIG_NET_VENDOR_MICROSEMI) += mscc/
 obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
+obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
 obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
index 8da79eedc057c2f68dbbe5cd3ff6656934f915b7..1d4f2f4d10f2967fda9d4a1a17f5b7e7b17111f8 100644 (file)
@@ -850,11 +850,20 @@ static int ena_set_channels(struct net_device *netdev,
        struct ena_adapter *adapter = netdev_priv(netdev);
        u32 count = channels->combined_count;
        /* The check for max value is already done in ethtool */
-       if (count < ENA_MIN_NUM_IO_QUEUES ||
-           (ena_xdp_present(adapter) &&
-           !ena_xdp_legal_queue_count(adapter, count)))
+       if (count < ENA_MIN_NUM_IO_QUEUES)
                return -EINVAL;
 
+       if (!ena_xdp_legal_queue_count(adapter, count)) {
+               if (ena_xdp_present(adapter))
+                       return -EINVAL;
+
+               xdp_clear_features_flag(netdev);
+       } else {
+               xdp_set_features_flag(netdev,
+                                     NETDEV_XDP_ACT_BASIC |
+                                     NETDEV_XDP_ACT_REDIRECT);
+       }
+
        return ena_update_queue_count(adapter, count);
 }
 
index d3999db7c6a29d1f6677ca9de4ff197ee28fda4b..cbfe7f977270f7f5134d766b552449a35bd65927 100644 (file)
@@ -4105,8 +4105,6 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter,
        /* Set offload features */
        ena_set_dev_offloads(feat, netdev);
 
-       netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
-
        adapter->max_mtu = feat->dev_attr.max_mtu;
        netdev->max_mtu = adapter->max_mtu;
        netdev->min_mtu = ENA_MIN_MTU;
@@ -4393,6 +4391,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        ena_config_debug_area(adapter);
 
+       if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+               netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
+                                      NETDEV_XDP_ACT_REDIRECT;
+
        memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
 
        netif_carrier_off(netdev);
index e5c71f90785237348ab57924037dea3e85f04f09..d8d71bf97983b859c68c95899f65fc90dc66bb69 100644 (file)
@@ -735,12 +735,17 @@ static int nicvf_set_channels(struct net_device *dev,
        if (channel->tx_count > nic->max_queues)
                return -EINVAL;
 
-       if (nic->xdp_prog &&
-           ((channel->tx_count + channel->rx_count) > nic->max_queues)) {
-               netdev_err(nic->netdev,
-                          "XDP mode, RXQs + TXQs > Max %d\n",
-                          nic->max_queues);
-               return -EINVAL;
+       if (channel->tx_count + channel->rx_count > nic->max_queues) {
+               if (nic->xdp_prog) {
+                       netdev_err(nic->netdev,
+                                  "XDP mode, RXQs + TXQs > Max %d\n",
+                                  nic->max_queues);
+                       return -EINVAL;
+               }
+
+               xdp_clear_features_flag(nic->netdev);
+       } else if (!pass1_silicon(nic->pdev)) {
+               xdp_set_features_flag(dev, NETDEV_XDP_ACT_BASIC);
        }
 
        if (if_up)
index 8b25313c7f6b8fa28d58cbfb50ff46515880c1f0..eff350e0bc2a8ec7d1f3584028d21762afecfe3d 100644 (file)
@@ -2218,7 +2218,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        netdev->netdev_ops = &nicvf_netdev_ops;
        netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
-       netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
+       if (!pass1_silicon(nic->pdev) &&
+           nic->rx_queues + nic->tx_queues <= nic->max_queues)
+               netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
 
        /* MTU range: 64 - 9200 */
        netdev->min_mtu = NIC_HW_MIN_FRS;
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
new file mode 100644 (file)
index 0000000..ed18450
--- /dev/null
@@ -0,0 +1,1953 @@
+/*
+       Written 1998-2000 by Donald Becker.
+
+       This software may be used and distributed according to the terms of
+       the GNU General Public License (GPL), incorporated herein by reference.
+       Drivers based on or derived from this code fall under the GPL and must
+       retain the authorship, copyright and license notice.  This file is not
+       a complete program and may only be used when the entire operating
+       system is licensed under the GPL.
+
+       The author may be reached as becker@scyld.com, or C/O
+       Scyld Computing Corporation
+       410 Severn Ave., Suite 210
+       Annapolis MD 21403
+
+       Support information and updates available at
+       http://www.scyld.com/network/pci-skeleton.html
+
+       Linux kernel updates:
+
+       Version 2.51, Nov 17, 2001 (jgarzik):
+       - Add ethtool support
+       - Replace some MII-related magic numbers with constants
+
+*/
+
+#define DRV_NAME       "fealnx"
+
+static int debug;              /* 1-> print debug message */
+static int max_interrupt_work = 20;
+
+/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). */
+static int multicast_filter_limit = 32;
+
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme. */
+/* Setting to > 1518 effectively disables this feature.          */
+static int rx_copybreak;
+
+/* Used to pass the media type, etc.                            */
+/* Both 'options[]' and 'full_duplex[]' should exist for driver */
+/* interoperability.                                            */
+/* The media type is usually passed in 'options[]'.             */
+#define MAX_UNITS 8            /* More are supported, limit only on options */
+static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static int full_duplex[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+
+/* Operational parameters that are set at compile time.                 */
+/* Keep the ring sizes a power of two for compile efficiency.           */
+/* The compiler will convert <unsigned>'%'<2^N> into a bit mask.        */
+/* Making the Tx ring too large decreases the effectiveness of channel  */
+/* bonding and packet priority.                                         */
+/* There are no ill effects from too-large receive rings.               */
+// 88-12-9 modify,
+// #define TX_RING_SIZE    16
+// #define RX_RING_SIZE    32
+#define TX_RING_SIZE    6
+#define RX_RING_SIZE    12
+#define TX_TOTAL_SIZE  TX_RING_SIZE*sizeof(struct fealnx_desc)
+#define RX_TOTAL_SIZE  RX_RING_SIZE*sizeof(struct fealnx_desc)
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT      (2*HZ)
+
+#define PKT_BUF_SZ      1536   /* Size of each temporary Rx buffer. */
+
+
+/* Include files, designed to support most kernel versions 2.0.0 and later. */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+
+#include <asm/processor.h>     /* Processor type for cache alignment. */
+#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <asm/byteorder.h>
+
+/* This driver was written to use PCI memory space, however some x86 systems
+   work only with I/O space accesses. */
+#ifndef __alpha__
+#define USE_IO_OPS
+#endif
+
+/* Kernel compatibility defines, some common to David Hinds' PCMCIA package. */
+/* This is only in the support-all-kernels source code. */
+
+#define RUN_AT(x) (jiffies + (x))
+
+MODULE_AUTHOR("Myson or whoever");
+MODULE_DESCRIPTION("Myson MTD-8xx 100/10M Ethernet PCI Adapter Driver");
+MODULE_LICENSE("GPL");
+module_param(max_interrupt_work, int, 0);
+module_param(debug, int, 0);
+module_param(rx_copybreak, int, 0);
+module_param(multicast_filter_limit, int, 0);
+module_param_array(options, int, NULL, 0);
+module_param_array(full_duplex, int, NULL, 0);
+MODULE_PARM_DESC(max_interrupt_work, "fealnx maximum events handled per interrupt");
+MODULE_PARM_DESC(debug, "fealnx enable debugging (0-1)");
+MODULE_PARM_DESC(rx_copybreak, "fealnx copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(multicast_filter_limit, "fealnx maximum number of filtered multicast addresses");
+MODULE_PARM_DESC(options, "fealnx: Bits 0-3: media type, bit 17: full duplex");
+MODULE_PARM_DESC(full_duplex, "fealnx full duplex setting(s) (1)");
+
+enum {
+       MIN_REGION_SIZE         = 136,
+};
+
+/* A chip capabilities table, matching the entries in pci_tbl[] above. */
+enum chip_capability_flags {
+       HAS_MII_XCVR,
+       HAS_CHIP_XCVR,
+};
+
+/* 89/6/13 add, */
+/* for different PHY */
+enum phy_type_flags {
+       MysonPHY = 1,
+       AhdocPHY = 2,
+       SeeqPHY = 3,
+       MarvellPHY = 4,
+       Myson981 = 5,
+       LevelOnePHY = 6,
+       OtherPHY = 10,
+};
+
+struct chip_info {
+       char *chip_name;
+       int flags;
+};
+
+static const struct chip_info skel_netdrv_tbl[] = {
+       { "100/10M Ethernet PCI Adapter",       HAS_MII_XCVR },
+       { "100/10M Ethernet PCI Adapter",       HAS_CHIP_XCVR },
+       { "1000/100/10M Ethernet PCI Adapter",  HAS_MII_XCVR },
+};
+
+/* Offsets to the Command and Status Registers. */
+enum fealnx_offsets {
+       PAR0 = 0x0,             /* physical address 0-3 */
+       PAR1 = 0x04,            /* physical address 4-5 */
+       MAR0 = 0x08,            /* multicast address 0-3 */
+       MAR1 = 0x0C,            /* multicast address 4-7 */
+       FAR0 = 0x10,            /* flow-control address 0-3 */
+       FAR1 = 0x14,            /* flow-control address 4-5 */
+       TCRRCR = 0x18,          /* receive & transmit configuration */
+       BCR = 0x1C,             /* bus command */
+       TXPDR = 0x20,           /* transmit polling demand */
+       RXPDR = 0x24,           /* receive polling demand */
+       RXCWP = 0x28,           /* receive current word pointer */
+       TXLBA = 0x2C,           /* transmit list base address */
+       RXLBA = 0x30,           /* receive list base address */
+       ISR = 0x34,             /* interrupt status */
+       IMR = 0x38,             /* interrupt mask */
+       FTH = 0x3C,             /* flow control high/low threshold */
+       MANAGEMENT = 0x40,      /* bootrom/eeprom and mii management */
+       TALLY = 0x44,           /* tally counters for crc and mpa */
+       TSR = 0x48,             /* tally counter for transmit status */
+       BMCRSR = 0x4c,          /* basic mode control and status */
+       PHYIDENTIFIER = 0x50,   /* phy identifier */
+       ANARANLPAR = 0x54,      /* auto-negotiation advertisement and link
+                                  partner ability */
+       ANEROCR = 0x58,         /* auto-negotiation expansion and pci conf. */
+       BPREMRPSR = 0x5c,       /* bypass & receive error mask and phy status */
+};
+
+/* Bits in the interrupt status/enable registers. */
+/* The bits in the Intr Status/Enable registers, mostly interrupt sources. */
+enum intr_status_bits {
+       RFCON = 0x00020000,     /* receive flow control xon packet */
+       RFCOFF = 0x00010000,    /* receive flow control xoff packet */
+       LSCStatus = 0x00008000, /* link status change */
+       ANCStatus = 0x00004000, /* autonegotiation completed */
+       FBE = 0x00002000,       /* fatal bus error */
+       FBEMask = 0x00001800,   /* mask bit12-11 */
+       ParityErr = 0x00000000, /* parity error */
+       TargetErr = 0x00001000, /* target abort */
+       MasterErr = 0x00000800, /* master error */
+       TUNF = 0x00000400,      /* transmit underflow */
+       ROVF = 0x00000200,      /* receive overflow */
+       ETI = 0x00000100,       /* transmit early int */
+       ERI = 0x00000080,       /* receive early int */
+       CNTOVF = 0x00000040,    /* counter overflow */
+       RBU = 0x00000020,       /* receive buffer unavailable */
+       TBU = 0x00000010,       /* transmit buffer unavilable */
+       TI = 0x00000008,        /* transmit interrupt */
+       RI = 0x00000004,        /* receive interrupt */
+       RxErr = 0x00000002,     /* receive error */
+};
+
+/* Bits in the NetworkConfig register, W for writing, R for reading */
+/* FIXME: some names are invented by me. Marked with (name?) */
+/* If you have docs and know bit names, please fix 'em */
+enum rx_mode_bits {
+       CR_W_ENH        = 0x02000000,   /* enhanced mode (name?) */
+       CR_W_FD         = 0x00100000,   /* full duplex */
+       CR_W_PS10       = 0x00080000,   /* 10 mbit */
+       CR_W_TXEN       = 0x00040000,   /* tx enable (name?) */
+       CR_W_PS1000     = 0x00010000,   /* 1000 mbit */
+     /* CR_W_RXBURSTMASK= 0x00000e00, Im unsure about this */
+       CR_W_RXMODEMASK = 0x000000e0,
+       CR_W_PROM       = 0x00000080,   /* promiscuous mode */
+       CR_W_AB         = 0x00000040,   /* accept broadcast */
+       CR_W_AM         = 0x00000020,   /* accept mutlicast */
+       CR_W_ARP        = 0x00000008,   /* receive runt pkt */
+       CR_W_ALP        = 0x00000004,   /* receive long pkt */
+       CR_W_SEP        = 0x00000002,   /* receive error pkt */
+       CR_W_RXEN       = 0x00000001,   /* rx enable (unicast?) (name?) */
+
+       CR_R_TXSTOP     = 0x04000000,   /* tx stopped (name?) */
+       CR_R_FD         = 0x00100000,   /* full duplex detected */
+       CR_R_PS10       = 0x00080000,   /* 10 mbit detected */
+       CR_R_RXSTOP     = 0x00008000,   /* rx stopped (name?) */
+};
+
+/* The Tulip Rx and Tx buffer descriptors. */
+struct fealnx_desc {
+       s32 status;
+       s32 control;
+       u32 buffer;
+       u32 next_desc;
+       struct fealnx_desc *next_desc_logical;
+       struct sk_buff *skbuff;
+       u32 reserved1;
+       u32 reserved2;
+};
+
+/* Bits in network_desc.status */
+enum rx_desc_status_bits {
+       RXOWN = 0x80000000,     /* own bit */
+       FLNGMASK = 0x0fff0000,  /* frame length */
+       FLNGShift = 16,
+       MARSTATUS = 0x00004000, /* multicast address received */
+       BARSTATUS = 0x00002000, /* broadcast address received */
+       PHYSTATUS = 0x00001000, /* physical address received */
+       RXFSD = 0x00000800,     /* first descriptor */
+       RXLSD = 0x00000400,     /* last descriptor */
+       ErrorSummary = 0x80,    /* error summary */
+       RUNTPKT = 0x40,         /* runt packet received */
+       LONGPKT = 0x20,         /* long packet received */
+       FAE = 0x10,             /* frame align error */
+       CRC = 0x08,             /* crc error */
+       RXER = 0x04,            /* receive error */
+};
+
+enum rx_desc_control_bits {
+       RXIC = 0x00800000,      /* interrupt control */
+       RBSShift = 0,
+};
+
+enum tx_desc_status_bits {
+       TXOWN = 0x80000000,     /* own bit */
+       JABTO = 0x00004000,     /* jabber timeout */
+       CSL = 0x00002000,       /* carrier sense lost */
+       LC = 0x00001000,        /* late collision */
+       EC = 0x00000800,        /* excessive collision */
+       UDF = 0x00000400,       /* fifo underflow */
+       DFR = 0x00000200,       /* deferred */
+       HF = 0x00000100,        /* heartbeat fail */
+       NCRMask = 0x000000ff,   /* collision retry count */
+       NCRShift = 0,
+};
+
+enum tx_desc_control_bits {
+       TXIC = 0x80000000,      /* interrupt control */
+       ETIControl = 0x40000000,        /* early transmit interrupt */
+       TXLD = 0x20000000,      /* last descriptor */
+       TXFD = 0x10000000,      /* first descriptor */
+       CRCEnable = 0x08000000, /* crc control */
+       PADEnable = 0x04000000, /* padding control */
+       RetryTxLC = 0x02000000, /* retry late collision */
+       PKTSMask = 0x3ff800,    /* packet size bit21-11 */
+       PKTSShift = 11,
+       TBSMask = 0x000007ff,   /* transmit buffer bit 10-0 */
+       TBSShift = 0,
+};
+
+/* BootROM/EEPROM/MII Management Register */
+#define MASK_MIIR_MII_READ       0x00000000
+#define MASK_MIIR_MII_WRITE      0x00000008
+#define MASK_MIIR_MII_MDO        0x00000004
+#define MASK_MIIR_MII_MDI        0x00000002
+#define MASK_MIIR_MII_MDC        0x00000001
+
+/* ST+OP+PHYAD+REGAD+TA */
+#define OP_READ             0x6000     /* ST:01+OP:10+PHYAD+REGAD+TA:Z0 */
+#define OP_WRITE            0x5002     /* ST:01+OP:01+PHYAD+REGAD+TA:10 */
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Myson PHY                                              */
+/* ------------------------------------------------------------------------- */
+#define MysonPHYID      0xd0000302
+/* 89-7-27 add, (begin) */
+#define MysonPHYID0     0x0302
+#define StatusRegister  18
+#define SPEED100        0x0400 // bit10
+#define FULLMODE        0x0800 // bit11
+/* 89-7-27 add, (end) */
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Seeq 80225 PHY                                         */
+/* ------------------------------------------------------------------------- */
+#define SeeqPHYID0      0x0016
+
+#define MIIRegister18   18
+#define SPD_DET_100     0x80
+#define DPLX_DET_FULL   0x40
+
+/* ------------------------------------------------------------------------- */
+/*      Constants for Ahdoc 101 PHY                                          */
+/* ------------------------------------------------------------------------- */
+#define AhdocPHYID0     0x0022
+
+#define DiagnosticReg   18
+#define DPLX_FULL       0x0800
+#define Speed_100       0x0400
+
+/* 89/6/13 add, */
+/* -------------------------------------------------------------------------- */
+/*      Constants                                                             */
+/* -------------------------------------------------------------------------- */
+#define MarvellPHYID0           0x0141
+#define LevelOnePHYID0         0x0013
+
+#define MII1000BaseTControlReg  9
+#define MII1000BaseTStatusReg   10
+#define SpecificReg            17
+
+/* for 1000BaseT Control Register */
+#define PHYAbletoPerform1000FullDuplex  0x0200
+#define PHYAbletoPerform1000HalfDuplex  0x0100
+#define PHY1000AbilityMask              0x300
+
+// for phy specific status register, marvell phy.
+#define SpeedMask       0x0c000
+#define Speed_1000M     0x08000
+#define Speed_100M      0x4000
+#define Speed_10M       0
+#define Full_Duplex     0x2000
+
+// 89/12/29 add, for phy specific status register, levelone phy, (begin)
+#define LXT1000_100M    0x08000
+#define LXT1000_1000M   0x0c000
+#define LXT1000_Full    0x200
+// 89/12/29 add, for phy specific status register, levelone phy, (end)
+
+/* for 3-in-1 case, BMCRSR register */
+#define LinkIsUp2      0x00040000
+
+/* for PHY */
+#define LinkIsUp        0x0004
+
+
+struct netdev_private {
+       /* Descriptor rings first for alignment. */
+       struct fealnx_desc *rx_ring;
+       struct fealnx_desc *tx_ring;
+
+       dma_addr_t rx_ring_dma;
+       dma_addr_t tx_ring_dma;
+
+       spinlock_t lock;
+
+       /* Media monitoring timer. */
+       struct timer_list timer;
+
+       /* Reset timer */
+       struct timer_list reset_timer;
+       int reset_timer_armed;
+       unsigned long crvalue_sv;
+       unsigned long imrvalue_sv;
+
+       /* Frequently used values: keep some adjacent for cache effect. */
+       int flags;
+       struct pci_dev *pci_dev;
+       unsigned long crvalue;
+       unsigned long bcrvalue;
+       unsigned long imrvalue;
+       struct fealnx_desc *cur_rx;
+       struct fealnx_desc *lack_rxbuf;
+       int really_rx_count;
+       struct fealnx_desc *cur_tx;
+       struct fealnx_desc *cur_tx_copy;
+       int really_tx_count;
+       int free_tx_count;
+       unsigned int rx_buf_sz; /* Based on MTU+slack. */
+
+       /* These values are keep track of the transceiver/media in use. */
+       unsigned int linkok;
+       unsigned int line_speed;
+       unsigned int duplexmode;
+       unsigned int default_port:4;    /* Last dev->if_port value. */
+       unsigned int PHYType;
+
+       /* MII transceiver section. */
+       int mii_cnt;            /* MII device addresses. */
+       unsigned char phys[2];  /* MII device addresses. */
+       struct mii_if_info mii;
+       void __iomem *mem;
+};
+
+
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
+static int netdev_open(struct net_device *dev);
+static void getlinktype(struct net_device *dev);
+static void getlinkstatus(struct net_device *dev);
+static void netdev_timer(struct timer_list *t);
+static void reset_timer(struct timer_list *t);
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue);
+static void init_ring(struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
+static irqreturn_t intr_handler(int irq, void *dev_instance);
+static int netdev_rx(struct net_device *dev);
+static void set_rx_mode(struct net_device *dev);
+static void __set_rx_mode(struct net_device *dev);
+static struct net_device_stats *get_stats(struct net_device *dev);
+static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static const struct ethtool_ops netdev_ethtool_ops;
+static int netdev_close(struct net_device *dev);
+static void reset_rx_descriptors(struct net_device *dev);
+static void reset_tx_descriptors(struct net_device *dev);
+
+static void stop_nic_rx(void __iomem *ioaddr, long crvalue)
+{
+       int delay = 0x1000;
+       iowrite32(crvalue & ~(CR_W_RXEN), ioaddr + TCRRCR);
+       while (--delay) {
+               if ( (ioread32(ioaddr + TCRRCR) & CR_R_RXSTOP) == CR_R_RXSTOP)
+                       break;
+       }
+}
+
+
+static void stop_nic_rxtx(void __iomem *ioaddr, long crvalue)
+{
+       int delay = 0x1000;
+       iowrite32(crvalue & ~(CR_W_RXEN+CR_W_TXEN), ioaddr + TCRRCR);
+       while (--delay) {
+               if ( (ioread32(ioaddr + TCRRCR) & (CR_R_RXSTOP+CR_R_TXSTOP))
+                                           == (CR_R_RXSTOP+CR_R_TXSTOP) )
+                       break;
+       }
+}
+
+static const struct net_device_ops netdev_ops = {
+       .ndo_open               = netdev_open,
+       .ndo_stop               = netdev_close,
+       .ndo_start_xmit         = start_tx,
+       .ndo_get_stats          = get_stats,
+       .ndo_set_rx_mode        = set_rx_mode,
+       .ndo_eth_ioctl          = mii_ioctl,
+       .ndo_tx_timeout         = fealnx_tx_timeout,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+};
+
+static int fealnx_init_one(struct pci_dev *pdev,
+                          const struct pci_device_id *ent)
+{
+       struct netdev_private *np;
+       int i, option, err, irq;
+       static int card_idx = -1;
+       char boardname[12];
+       void __iomem *ioaddr;
+       unsigned long len;
+       unsigned int chip_id = ent->driver_data;
+       struct net_device *dev;
+       void *ring_space;
+       dma_addr_t ring_dma;
+       u8 addr[ETH_ALEN];
+#ifdef USE_IO_OPS
+       int bar = 0;
+#else
+       int bar = 1;
+#endif
+
+       card_idx++;
+       sprintf(boardname, "fealnx%d", card_idx);
+
+       option = card_idx < MAX_UNITS ? options[card_idx] : 0;
+
+       i = pci_enable_device(pdev);
+       if (i) return i;
+       pci_set_master(pdev);
+
+       len = pci_resource_len(pdev, bar);
+       if (len < MIN_REGION_SIZE) {
+               dev_err(&pdev->dev,
+                          "region size %ld too small, aborting\n", len);
+               return -ENODEV;
+       }
+
+       i = pci_request_regions(pdev, boardname);
+       if (i)
+               return i;
+
+       irq = pdev->irq;
+
+       ioaddr = pci_iomap(pdev, bar, len);
+       if (!ioaddr) {
+               err = -ENOMEM;
+               goto err_out_res;
+       }
+
+       dev = alloc_etherdev(sizeof(struct netdev_private));
+       if (!dev) {
+               err = -ENOMEM;
+               goto err_out_unmap;
+       }
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
+       /* read ethernet id */
+       for (i = 0; i < 6; ++i)
+               addr[i] = ioread8(ioaddr + PAR0 + i);
+       eth_hw_addr_set(dev, addr);
+
+       /* Reset the chip to erase previous misconfiguration. */
+       iowrite32(0x00000001, ioaddr + BCR);
+
+       /* Make certain the descriptor lists are aligned. */
+       np = netdev_priv(dev);
+       np->mem = ioaddr;
+       spin_lock_init(&np->lock);
+       np->pci_dev = pdev;
+       np->flags = skel_netdrv_tbl[chip_id].flags;
+       pci_set_drvdata(pdev, dev);
+       np->mii.dev = dev;
+       np->mii.mdio_read = mdio_read;
+       np->mii.mdio_write = mdio_write;
+       np->mii.phy_id_mask = 0x1f;
+       np->mii.reg_num_mask = 0x1f;
+
+       ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, &ring_dma,
+                                       GFP_KERNEL);
+       if (!ring_space) {
+               err = -ENOMEM;
+               goto err_out_free_dev;
+       }
+       np->rx_ring = ring_space;
+       np->rx_ring_dma = ring_dma;
+
+       ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, &ring_dma,
+                                       GFP_KERNEL);
+       if (!ring_space) {
+               err = -ENOMEM;
+               goto err_out_free_rx;
+       }
+       np->tx_ring = ring_space;
+       np->tx_ring_dma = ring_dma;
+
+       /* find the connected MII xcvrs */
+       if (np->flags == HAS_MII_XCVR) {
+               int phy, phy_idx = 0;
+
+               for (phy = 1; phy < 32 && phy_idx < ARRAY_SIZE(np->phys);
+                              phy++) {
+                       int mii_status = mdio_read(dev, phy, 1);
+
+                       if (mii_status != 0xffff && mii_status != 0x0000) {
+                               np->phys[phy_idx++] = phy;
+                               dev_info(&pdev->dev,
+                                      "MII PHY found at address %d, status "
+                                      "0x%4.4x.\n", phy, mii_status);
+                               /* get phy type */
+                               {
+                                       unsigned int data;
+
+                                       data = mdio_read(dev, np->phys[0], 2);
+                                       if (data == SeeqPHYID0)
+                                               np->PHYType = SeeqPHY;
+                                       else if (data == AhdocPHYID0)
+                                               np->PHYType = AhdocPHY;
+                                       else if (data == MarvellPHYID0)
+                                               np->PHYType = MarvellPHY;
+                                       else if (data == MysonPHYID0)
+                                               np->PHYType = Myson981;
+                                       else if (data == LevelOnePHYID0)
+                                               np->PHYType = LevelOnePHY;
+                                       else
+                                               np->PHYType = OtherPHY;
+                               }
+                       }
+               }
+
+               np->mii_cnt = phy_idx;
+               if (phy_idx == 0)
+                       dev_warn(&pdev->dev,
+                               "MII PHY not found -- this device may "
+                              "not operate correctly.\n");
+       } else {
+               np->phys[0] = 32;
+/* 89/6/23 add, (begin) */
+               /* get phy type */
+               if (ioread32(ioaddr + PHYIDENTIFIER) == MysonPHYID)
+                       np->PHYType = MysonPHY;
+               else
+                       np->PHYType = OtherPHY;
+       }
+       np->mii.phy_id = np->phys[0];
+
+       if (dev->mem_start)
+               option = dev->mem_start;
+
+       /* The lower four bits are the media type. */
+       if (option > 0) {
+               if (option & 0x200)
+                       np->mii.full_duplex = 1;
+               np->default_port = option & 15;
+       }
+
+       if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0)
+               np->mii.full_duplex = full_duplex[card_idx];
+
+       if (np->mii.full_duplex) {
+               dev_info(&pdev->dev, "Media type forced to Full Duplex.\n");
+/* 89/6/13 add, (begin) */
+//      if (np->PHYType==MarvellPHY)
+               if ((np->PHYType == MarvellPHY) || (np->PHYType == LevelOnePHY)) {
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], 9);
+                       data = (data & 0xfcff) | 0x0200;
+                       mdio_write(dev, np->phys[0], 9, data);
+               }
+/* 89/6/13 add, (end) */
+               if (np->flags == HAS_MII_XCVR)
+                       mdio_write(dev, np->phys[0], MII_ADVERTISE, ADVERTISE_FULL);
+               else
+                       iowrite32(ADVERTISE_FULL, ioaddr + ANARANLPAR);
+               np->mii.force_media = 1;
+       }
+
+       dev->netdev_ops = &netdev_ops;
+       dev->ethtool_ops = &netdev_ethtool_ops;
+       dev->watchdog_timeo = TX_TIMEOUT;
+
+       err = register_netdev(dev);
+       if (err)
+               goto err_out_free_tx;
+
+       printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n",
+              dev->name, skel_netdrv_tbl[chip_id].chip_name, ioaddr,
+              dev->dev_addr, irq);
+
+       return 0;
+
+err_out_free_tx:
+       dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+                         np->tx_ring_dma);
+err_out_free_rx:
+       dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+                         np->rx_ring_dma);
+err_out_free_dev:
+       free_netdev(dev);
+err_out_unmap:
+       pci_iounmap(pdev, ioaddr);
+err_out_res:
+       pci_release_regions(pdev);
+       return err;
+}
+
+
+static void fealnx_remove_one(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       if (dev) {
+               struct netdev_private *np = netdev_priv(dev);
+
+               dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring,
+                                 np->tx_ring_dma);
+               dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, np->rx_ring,
+                                 np->rx_ring_dma);
+               unregister_netdev(dev);
+               pci_iounmap(pdev, np->mem);
+               free_netdev(dev);
+               pci_release_regions(pdev);
+       } else
+               printk(KERN_ERR "fealnx: remove for unknown device\n");
+}
+
+
+static ulong m80x_send_cmd_to_phy(void __iomem *miiport, int opcode, int phyad, int regad)
+{
+       ulong miir;
+       int i;
+       unsigned int mask, data;
+
+       /* enable MII output */
+       miir = (ulong) ioread32(miiport);
+       miir &= 0xfffffff0;
+
+       miir |= MASK_MIIR_MII_WRITE + MASK_MIIR_MII_MDO;
+
+       /* send 32 1's preamble */
+       for (i = 0; i < 32; i++) {
+               /* low MDC; MDO is already high (miir) */
+               miir &= ~MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+
+               /* high MDC */
+               miir |= MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+       }
+
+       /* calculate ST+OP+PHYAD+REGAD+TA */
+       data = opcode | (phyad << 7) | (regad << 2);
+
+       /* sent out */
+       mask = 0x8000;
+       while (mask) {
+               /* low MDC, prepare MDO */
+               miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO);
+               if (mask & data)
+                       miir |= MASK_MIIR_MII_MDO;
+
+               iowrite32(miir, miiport);
+               /* high MDC */
+               miir |= MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+               udelay(30);
+
+               /* next */
+               mask >>= 1;
+               if (mask == 0x2 && opcode == OP_READ)
+                       miir &= ~MASK_MIIR_MII_WRITE;
+       }
+       return miir;
+}
+
+
+static int mdio_read(struct net_device *dev, int phyad, int regad)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *miiport = np->mem + MANAGEMENT;
+       ulong miir;
+       unsigned int mask, data;
+
+       miir = m80x_send_cmd_to_phy(miiport, OP_READ, phyad, regad);
+
+       /* read data */
+       mask = 0x8000;
+       data = 0;
+       while (mask) {
+               /* low MDC */
+               miir &= ~MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+
+               /* read MDI */
+               miir = ioread32(miiport);
+               if (miir & MASK_MIIR_MII_MDI)
+                       data |= mask;
+
+               /* high MDC, and wait */
+               miir |= MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+               udelay(30);
+
+               /* next */
+               mask >>= 1;
+       }
+
+       /* low MDC */
+       miir &= ~MASK_MIIR_MII_MDC;
+       iowrite32(miir, miiport);
+
+       return data & 0xffff;
+}
+
+
+static void mdio_write(struct net_device *dev, int phyad, int regad, int data)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *miiport = np->mem + MANAGEMENT;
+       ulong miir;
+       unsigned int mask;
+
+       miir = m80x_send_cmd_to_phy(miiport, OP_WRITE, phyad, regad);
+
+       /* write data */
+       mask = 0x8000;
+       while (mask) {
+               /* low MDC, prepare MDO */
+               miir &= ~(MASK_MIIR_MII_MDC + MASK_MIIR_MII_MDO);
+               if (mask & data)
+                       miir |= MASK_MIIR_MII_MDO;
+               iowrite32(miir, miiport);
+
+               /* high MDC */
+               miir |= MASK_MIIR_MII_MDC;
+               iowrite32(miir, miiport);
+
+               /* next */
+               mask >>= 1;
+       }
+
+       /* low MDC */
+       miir &= ~MASK_MIIR_MII_MDC;
+       iowrite32(miir, miiport);
+}
+
+
+static int netdev_open(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       const int irq = np->pci_dev->irq;
+       int rc, i;
+
+       iowrite32(0x00000001, ioaddr + BCR);    /* Reset */
+
+       rc = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev);
+       if (rc)
+               return -EAGAIN;
+
+       for (i = 0; i < 3; i++)
+               iowrite16(((const unsigned short *)dev->dev_addr)[i],
+                               ioaddr + PAR0 + i*2);
+
+       init_ring(dev);
+
+       iowrite32(np->rx_ring_dma, ioaddr + RXLBA);
+       iowrite32(np->tx_ring_dma, ioaddr + TXLBA);
+
+       /* Initialize other registers. */
+       /* Configure the PCI bus bursts and FIFO thresholds.
+          486: Set 8 longword burst.
+          586: no burst limit.
+          Burst length 5:3
+          0 0 0   1
+          0 0 1   4
+          0 1 0   8
+          0 1 1   16
+          1 0 0   32
+          1 0 1   64
+          1 1 0   128
+          1 1 1   256
+          Wait the specified 50 PCI cycles after a reset by initializing
+          Tx and Rx queues and the address filter list.
+          FIXME (Ueimor): optimistic for alpha + posted writes ? */
+
+       np->bcrvalue = 0x10;    /* little-endian, 8 burst length */
+#ifdef __BIG_ENDIAN
+       np->bcrvalue |= 0x04;   /* big-endian */
+#endif
+
+#if defined(__i386__) && !defined(MODULE) && !defined(CONFIG_UML)
+       if (boot_cpu_data.x86 <= 4)
+               np->crvalue = 0xa00;
+       else
+#endif
+               np->crvalue = 0xe00;    /* rx 128 burst length */
+
+
+// 89/12/29 add,
+// 90/1/16 modify,
+//   np->imrvalue=FBE|TUNF|CNTOVF|RBU|TI|RI;
+       np->imrvalue = TUNF | CNTOVF | RBU | TI | RI;
+       if (np->pci_dev->device == 0x891) {
+               np->bcrvalue |= 0x200;  /* set PROG bit */
+               np->crvalue |= CR_W_ENH;        /* set enhanced bit */
+               np->imrvalue |= ETI;
+       }
+       iowrite32(np->bcrvalue, ioaddr + BCR);
+
+       if (dev->if_port == 0)
+               dev->if_port = np->default_port;
+
+       iowrite32(0, ioaddr + RXPDR);
+// 89/9/1 modify,
+//   np->crvalue = 0x00e40001;    /* tx store and forward, tx/rx enable */
+       np->crvalue |= 0x00e40001;      /* tx store and forward, tx/rx enable */
+       np->mii.full_duplex = np->mii.force_media;
+       getlinkstatus(dev);
+       if (np->linkok)
+               getlinktype(dev);
+       __set_rx_mode(dev);
+
+       netif_start_queue(dev);
+
+       /* Clear and Enable interrupts by setting the interrupt mask. */
+       iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR);
+       iowrite32(np->imrvalue, ioaddr + IMR);
+
+       if (debug)
+               printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name);
+
+       /* Set the timer to check for link beat. */
+       timer_setup(&np->timer, netdev_timer, 0);
+       np->timer.expires = RUN_AT(3 * HZ);
+
+       /* timer handler */
+       add_timer(&np->timer);
+
+       timer_setup(&np->reset_timer, reset_timer, 0);
+       np->reset_timer_armed = 0;
+       return rc;
+}
+
+
+static void getlinkstatus(struct net_device *dev)
+/* function: Routine will read MII Status Register to get link status.       */
+/* input   : dev... pointer to the adapter block.                            */
+/* output  : none.                                                           */
+{
+       struct netdev_private *np = netdev_priv(dev);
+       unsigned int i, DelayTime = 0x1000;
+
+       np->linkok = 0;
+
+       if (np->PHYType == MysonPHY) {
+               for (i = 0; i < DelayTime; ++i) {
+                       if (ioread32(np->mem + BMCRSR) & LinkIsUp2) {
+                               np->linkok = 1;
+                               return;
+                       }
+                       udelay(100);
+               }
+       } else {
+               for (i = 0; i < DelayTime; ++i) {
+                       if (mdio_read(dev, np->phys[0], MII_BMSR) & BMSR_LSTATUS) {
+                               np->linkok = 1;
+                               return;
+                       }
+                       udelay(100);
+               }
+       }
+}
+
+
+static void getlinktype(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+
+       if (np->PHYType == MysonPHY) {  /* 3-in-1 case */
+               if (ioread32(np->mem + TCRRCR) & CR_R_FD)
+                       np->duplexmode = 2;     /* full duplex */
+               else
+                       np->duplexmode = 1;     /* half duplex */
+               if (ioread32(np->mem + TCRRCR) & CR_R_PS10)
+                       np->line_speed = 1;     /* 10M */
+               else
+                       np->line_speed = 2;     /* 100M */
+       } else {
+               if (np->PHYType == SeeqPHY) {   /* this PHY is SEEQ 80225 */
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], MIIRegister18);
+                       if (data & SPD_DET_100)
+                               np->line_speed = 2;     /* 100M */
+                       else
+                               np->line_speed = 1;     /* 10M */
+                       if (data & DPLX_DET_FULL)
+                               np->duplexmode = 2;     /* full duplex mode */
+                       else
+                               np->duplexmode = 1;     /* half duplex mode */
+               } else if (np->PHYType == AhdocPHY) {
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], DiagnosticReg);
+                       if (data & Speed_100)
+                               np->line_speed = 2;     /* 100M */
+                       else
+                               np->line_speed = 1;     /* 10M */
+                       if (data & DPLX_FULL)
+                               np->duplexmode = 2;     /* full duplex mode */
+                       else
+                               np->duplexmode = 1;     /* half duplex mode */
+               }
+/* 89/6/13 add, (begin) */
+               else if (np->PHYType == MarvellPHY) {
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], SpecificReg);
+                       if (data & Full_Duplex)
+                               np->duplexmode = 2;     /* full duplex mode */
+                       else
+                               np->duplexmode = 1;     /* half duplex mode */
+                       data &= SpeedMask;
+                       if (data == Speed_1000M)
+                               np->line_speed = 3;     /* 1000M */
+                       else if (data == Speed_100M)
+                               np->line_speed = 2;     /* 100M */
+                       else
+                               np->line_speed = 1;     /* 10M */
+               }
+/* 89/6/13 add, (end) */
+/* 89/7/27 add, (begin) */
+               else if (np->PHYType == Myson981) {
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], StatusRegister);
+
+                       if (data & SPEED100)
+                               np->line_speed = 2;
+                       else
+                               np->line_speed = 1;
+
+                       if (data & FULLMODE)
+                               np->duplexmode = 2;
+                       else
+                               np->duplexmode = 1;
+               }
+/* 89/7/27 add, (end) */
+/* 89/12/29 add */
+               else if (np->PHYType == LevelOnePHY) {
+                       unsigned int data;
+
+                       data = mdio_read(dev, np->phys[0], SpecificReg);
+                       if (data & LXT1000_Full)
+                               np->duplexmode = 2;     /* full duplex mode */
+                       else
+                               np->duplexmode = 1;     /* half duplex mode */
+                       data &= SpeedMask;
+                       if (data == LXT1000_1000M)
+                               np->line_speed = 3;     /* 1000M */
+                       else if (data == LXT1000_100M)
+                               np->line_speed = 2;     /* 100M */
+                       else
+                               np->line_speed = 1;     /* 10M */
+               }
+               np->crvalue &= (~CR_W_PS10) & (~CR_W_FD) & (~CR_W_PS1000);
+               if (np->line_speed == 1)
+                       np->crvalue |= CR_W_PS10;
+               else if (np->line_speed == 3)
+                       np->crvalue |= CR_W_PS1000;
+               if (np->duplexmode == 2)
+                       np->crvalue |= CR_W_FD;
+       }
+}
+
+
+/* Take lock before calling this */
+static void allocate_rx_buffers(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+
+       /*  allocate skb for rx buffers */
+       while (np->really_rx_count != RX_RING_SIZE) {
+               struct sk_buff *skb;
+
+               skb = netdev_alloc_skb(dev, np->rx_buf_sz);
+               if (skb == NULL)
+                       break;  /* Better luck next round. */
+
+               while (np->lack_rxbuf->skbuff)
+                       np->lack_rxbuf = np->lack_rxbuf->next_desc_logical;
+
+               np->lack_rxbuf->skbuff = skb;
+               np->lack_rxbuf->buffer = dma_map_single(&np->pci_dev->dev,
+                                                       skb->data,
+                                                       np->rx_buf_sz,
+                                                       DMA_FROM_DEVICE);
+               np->lack_rxbuf->status = RXOWN;
+               ++np->really_rx_count;
+       }
+}
+
+
+static void netdev_timer(struct timer_list *t)
+{
+       struct netdev_private *np = from_timer(np, t, timer);
+       struct net_device *dev = np->mii.dev;
+       void __iomem *ioaddr = np->mem;
+       int old_crvalue = np->crvalue;
+       unsigned int old_linkok = np->linkok;
+       unsigned long flags;
+
+       if (debug)
+               printk(KERN_DEBUG "%s: Media selection timer tick, status %8.8x "
+                      "config %8.8x.\n", dev->name, ioread32(ioaddr + ISR),
+                      ioread32(ioaddr + TCRRCR));
+
+       spin_lock_irqsave(&np->lock, flags);
+
+       if (np->flags == HAS_MII_XCVR) {
+               getlinkstatus(dev);
+               if ((old_linkok == 0) && (np->linkok == 1)) {   /* we need to detect the media type again */
+                       getlinktype(dev);
+                       if (np->crvalue != old_crvalue) {
+                               stop_nic_rxtx(ioaddr, np->crvalue);
+                               iowrite32(np->crvalue, ioaddr + TCRRCR);
+                       }
+               }
+       }
+
+       allocate_rx_buffers(dev);
+
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       np->timer.expires = RUN_AT(10 * HZ);
+       add_timer(&np->timer);
+}
+
+
+/* Take lock before calling */
+/* Reset chip and disable rx, tx and interrupts */
+static void reset_and_disable_rxtx(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       int delay=51;
+
+       /* Reset the chip's Tx and Rx processes. */
+       stop_nic_rxtx(ioaddr, 0);
+
+       /* Disable interrupts by clearing the interrupt mask. */
+       iowrite32(0, ioaddr + IMR);
+
+       /* Reset the chip to erase previous misconfiguration. */
+       iowrite32(0x00000001, ioaddr + BCR);
+
+       /* Ueimor: wait for 50 PCI cycles (and flush posted writes btw).
+          We surely wait too long (address+data phase). Who cares? */
+       while (--delay) {
+               ioread32(ioaddr + BCR);
+               rmb();
+       }
+}
+
+
+/* Take lock before calling */
+/* Restore chip after reset */
+static void enable_rxtx(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+
+       reset_rx_descriptors(dev);
+
+       iowrite32(np->tx_ring_dma + ((char*)np->cur_tx - (char*)np->tx_ring),
+               ioaddr + TXLBA);
+       iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring),
+               ioaddr + RXLBA);
+
+       iowrite32(np->bcrvalue, ioaddr + BCR);
+
+       iowrite32(0, ioaddr + RXPDR);
+       __set_rx_mode(dev); /* changes np->crvalue, writes it into TCRRCR */
+
+       /* Clear and Enable interrupts by setting the interrupt mask. */
+       iowrite32(FBE | TUNF | CNTOVF | RBU | TI | RI, ioaddr + ISR);
+       iowrite32(np->imrvalue, ioaddr + IMR);
+
+       iowrite32(0, ioaddr + TXPDR);
+}
+
+
+static void reset_timer(struct timer_list *t)
+{
+       struct netdev_private *np = from_timer(np, t, reset_timer);
+       struct net_device *dev = np->mii.dev;
+       unsigned long flags;
+
+       printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name);
+
+       spin_lock_irqsave(&np->lock, flags);
+       np->crvalue = np->crvalue_sv;
+       np->imrvalue = np->imrvalue_sv;
+
+       reset_and_disable_rxtx(dev);
+       /* works for me without this:
+       reset_tx_descriptors(dev); */
+       enable_rxtx(dev);
+       netif_start_queue(dev); /* FIXME: or netif_wake_queue(dev); ? */
+
+       np->reset_timer_armed = 0;
+
+       spin_unlock_irqrestore(&np->lock, flags);
+}
+
+
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       unsigned long flags;
+       int i;
+
+       printk(KERN_WARNING
+              "%s: Transmit timed out, status %8.8x, resetting...\n",
+              dev->name, ioread32(ioaddr + ISR));
+
+       {
+               printk(KERN_DEBUG "  Rx ring %p: ", np->rx_ring);
+               for (i = 0; i < RX_RING_SIZE; i++)
+                       printk(KERN_CONT " %8.8x",
+                              (unsigned int) np->rx_ring[i].status);
+               printk(KERN_CONT "\n");
+               printk(KERN_DEBUG "  Tx ring %p: ", np->tx_ring);
+               for (i = 0; i < TX_RING_SIZE; i++)
+                       printk(KERN_CONT " %4.4x", np->tx_ring[i].status);
+               printk(KERN_CONT "\n");
+       }
+
+       spin_lock_irqsave(&np->lock, flags);
+
+       reset_and_disable_rxtx(dev);
+       reset_tx_descriptors(dev);
+       enable_rxtx(dev);
+
+       spin_unlock_irqrestore(&np->lock, flags);
+
+       netif_trans_update(dev); /* prevent tx timeout */
+       dev->stats.tx_errors++;
+       netif_wake_queue(dev); /* or .._start_.. ?? */
+}
+
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void init_ring(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       int i;
+
+       /* initialize rx variables */
+       np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
+       np->cur_rx = &np->rx_ring[0];
+       np->lack_rxbuf = np->rx_ring;
+       np->really_rx_count = 0;
+
+       /* initial rx descriptors. */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               np->rx_ring[i].status = 0;
+               np->rx_ring[i].control = np->rx_buf_sz << RBSShift;
+               np->rx_ring[i].next_desc = np->rx_ring_dma +
+                       (i + 1)*sizeof(struct fealnx_desc);
+               np->rx_ring[i].next_desc_logical = &np->rx_ring[i + 1];
+               np->rx_ring[i].skbuff = NULL;
+       }
+
+       /* for the last rx descriptor */
+       np->rx_ring[i - 1].next_desc = np->rx_ring_dma;
+       np->rx_ring[i - 1].next_desc_logical = np->rx_ring;
+
+       /* allocate skb for rx buffers */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz);
+
+               if (skb == NULL) {
+                       np->lack_rxbuf = &np->rx_ring[i];
+                       break;
+               }
+
+               ++np->really_rx_count;
+               np->rx_ring[i].skbuff = skb;
+               np->rx_ring[i].buffer = dma_map_single(&np->pci_dev->dev,
+                                                      skb->data,
+                                                      np->rx_buf_sz,
+                                                      DMA_FROM_DEVICE);
+               np->rx_ring[i].status = RXOWN;
+               np->rx_ring[i].control |= RXIC;
+       }
+
+       /* initialize tx variables */
+       np->cur_tx = &np->tx_ring[0];
+       np->cur_tx_copy = &np->tx_ring[0];
+       np->really_tx_count = 0;
+       np->free_tx_count = TX_RING_SIZE;
+
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               np->tx_ring[i].status = 0;
+               /* do we need np->tx_ring[i].control = XXX; ?? */
+               np->tx_ring[i].next_desc = np->tx_ring_dma +
+                       (i + 1)*sizeof(struct fealnx_desc);
+               np->tx_ring[i].next_desc_logical = &np->tx_ring[i + 1];
+               np->tx_ring[i].skbuff = NULL;
+       }
+
+       /* for the last tx descriptor */
+       np->tx_ring[i - 1].next_desc = np->tx_ring_dma;
+       np->tx_ring[i - 1].next_desc_logical = &np->tx_ring[0];
+}
+
+
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&np->lock, flags);
+
+       np->cur_tx_copy->skbuff = skb;
+
+#define one_buffer
+#define BPT 1022
+#if defined(one_buffer)
+       np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev, skb->data,
+                                                skb->len, DMA_TO_DEVICE);
+       np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable;
+       np->cur_tx_copy->control |= (skb->len << PKTSShift);    /* pkt size */
+       np->cur_tx_copy->control |= (skb->len << TBSShift);     /* buffer size */
+// 89/12/29 add,
+       if (np->pci_dev->device == 0x891)
+               np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+       np->cur_tx_copy->status = TXOWN;
+       np->cur_tx_copy = np->cur_tx_copy->next_desc_logical;
+       --np->free_tx_count;
+#elif defined(two_buffer)
+       if (skb->len > BPT) {
+               struct fealnx_desc *next;
+
+               /* for the first descriptor */
+               np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev,
+                                                        skb->data, BPT,
+                                                        DMA_TO_DEVICE);
+               np->cur_tx_copy->control = TXIC | TXFD | CRCEnable | PADEnable;
+               np->cur_tx_copy->control |= (skb->len << PKTSShift);    /* pkt size */
+               np->cur_tx_copy->control |= (BPT << TBSShift);  /* buffer size */
+
+               /* for the last descriptor */
+               next = np->cur_tx_copy->next_desc_logical;
+               next->skbuff = skb;
+               next->control = TXIC | TXLD | CRCEnable | PADEnable;
+               next->control |= (skb->len << PKTSShift);       /* pkt size */
+               next->control |= ((skb->len - BPT) << TBSShift);        /* buf size */
+// 89/12/29 add,
+               if (np->pci_dev->device == 0x891)
+                       np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+               next->buffer = dma_map_single(&ep->pci_dev->dev,
+                                             skb->data + BPT, skb->len - BPT,
+                                             DMA_TO_DEVICE);
+
+               next->status = TXOWN;
+               np->cur_tx_copy->status = TXOWN;
+
+               np->cur_tx_copy = next->next_desc_logical;
+               np->free_tx_count -= 2;
+       } else {
+               np->cur_tx_copy->buffer = dma_map_single(&np->pci_dev->dev,
+                                                        skb->data, skb->len,
+                                                        DMA_TO_DEVICE);
+               np->cur_tx_copy->control = TXIC | TXLD | TXFD | CRCEnable | PADEnable;
+               np->cur_tx_copy->control |= (skb->len << PKTSShift);    /* pkt size */
+               np->cur_tx_copy->control |= (skb->len << TBSShift);     /* buffer size */
+// 89/12/29 add,
+               if (np->pci_dev->device == 0x891)
+                       np->cur_tx_copy->control |= ETIControl | RetryTxLC;
+               np->cur_tx_copy->status = TXOWN;
+               np->cur_tx_copy = np->cur_tx_copy->next_desc_logical;
+               --np->free_tx_count;
+       }
+#endif
+
+       if (np->free_tx_count < 2)
+               netif_stop_queue(dev);
+       ++np->really_tx_count;
+       iowrite32(0, np->mem + TXPDR);
+
+       spin_unlock_irqrestore(&np->lock, flags);
+       return NETDEV_TX_OK;
+}
+
+
+/* Take lock before calling */
+/* Chip probably hosed tx ring. Clean up. */
+static void reset_tx_descriptors(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       struct fealnx_desc *cur;
+       int i;
+
+       /* initialize tx variables */
+       np->cur_tx = &np->tx_ring[0];
+       np->cur_tx_copy = &np->tx_ring[0];
+       np->really_tx_count = 0;
+       np->free_tx_count = TX_RING_SIZE;
+
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               cur = &np->tx_ring[i];
+               if (cur->skbuff) {
+                       dma_unmap_single(&np->pci_dev->dev, cur->buffer,
+                                        cur->skbuff->len, DMA_TO_DEVICE);
+                       dev_kfree_skb_any(cur->skbuff);
+                       cur->skbuff = NULL;
+               }
+               cur->status = 0;
+               cur->control = 0;       /* needed? */
+               /* probably not needed. We do it for purely paranoid reasons */
+               cur->next_desc = np->tx_ring_dma +
+                       (i + 1)*sizeof(struct fealnx_desc);
+               cur->next_desc_logical = &np->tx_ring[i + 1];
+       }
+       /* for the last tx descriptor */
+       np->tx_ring[TX_RING_SIZE - 1].next_desc = np->tx_ring_dma;
+       np->tx_ring[TX_RING_SIZE - 1].next_desc_logical = &np->tx_ring[0];
+}
+
+
+/* Take lock and stop rx before calling this */
+static void reset_rx_descriptors(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       struct fealnx_desc *cur = np->cur_rx;
+       int i;
+
+       allocate_rx_buffers(dev);
+
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               if (cur->skbuff)
+                       cur->status = RXOWN;
+               cur = cur->next_desc_logical;
+       }
+
+       iowrite32(np->rx_ring_dma + ((char*)np->cur_rx - (char*)np->rx_ring),
+               np->mem + RXLBA);
+}
+
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+static irqreturn_t intr_handler(int irq, void *dev_instance)
+{
+       struct net_device *dev = (struct net_device *) dev_instance;
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       long boguscnt = max_interrupt_work;
+       unsigned int num_tx = 0;
+       int handled = 0;
+
+       spin_lock(&np->lock);
+
+       iowrite32(0, ioaddr + IMR);
+
+       do {
+               u32 intr_status = ioread32(ioaddr + ISR);
+
+               /* Acknowledge all of the current interrupt sources ASAP. */
+               iowrite32(intr_status, ioaddr + ISR);
+
+               if (debug)
+                       printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", dev->name,
+                              intr_status);
+
+               if (!(intr_status & np->imrvalue))
+                       break;
+
+               handled = 1;
+
+// 90/1/16 delete,
+//
+//      if (intr_status & FBE)
+//      {   /* fatal error */
+//          stop_nic_tx(ioaddr, 0);
+//          stop_nic_rx(ioaddr, 0);
+//          break;
+//      };
+
+               if (intr_status & TUNF)
+                       iowrite32(0, ioaddr + TXPDR);
+
+               if (intr_status & CNTOVF) {
+                       /* missed pkts */
+                       dev->stats.rx_missed_errors +=
+                               ioread32(ioaddr + TALLY) & 0x7fff;
+
+                       /* crc error */
+                       dev->stats.rx_crc_errors +=
+                           (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+               }
+
+               if (intr_status & (RI | RBU)) {
+                       if (intr_status & RI)
+                               netdev_rx(dev);
+                       else {
+                               stop_nic_rx(ioaddr, np->crvalue);
+                               reset_rx_descriptors(dev);
+                               iowrite32(np->crvalue, ioaddr + TCRRCR);
+                       }
+               }
+
+               while (np->really_tx_count) {
+                       long tx_status = np->cur_tx->status;
+                       long tx_control = np->cur_tx->control;
+
+                       if (!(tx_control & TXLD)) {     /* this pkt is combined by two tx descriptors */
+                               struct fealnx_desc *next;
+
+                               next = np->cur_tx->next_desc_logical;
+                               tx_status = next->status;
+                               tx_control = next->control;
+                       }
+
+                       if (tx_status & TXOWN)
+                               break;
+
+                       if (!(np->crvalue & CR_W_ENH)) {
+                               if (tx_status & (CSL | LC | EC | UDF | HF)) {
+                                       dev->stats.tx_errors++;
+                                       if (tx_status & EC)
+                                               dev->stats.tx_aborted_errors++;
+                                       if (tx_status & CSL)
+                                               dev->stats.tx_carrier_errors++;
+                                       if (tx_status & LC)
+                                               dev->stats.tx_window_errors++;
+                                       if (tx_status & UDF)
+                                               dev->stats.tx_fifo_errors++;
+                                       if ((tx_status & HF) && np->mii.full_duplex == 0)
+                                               dev->stats.tx_heartbeat_errors++;
+
+                               } else {
+                                       dev->stats.tx_bytes +=
+                                           ((tx_control & PKTSMask) >> PKTSShift);
+
+                                       dev->stats.collisions +=
+                                           ((tx_status & NCRMask) >> NCRShift);
+                                       dev->stats.tx_packets++;
+                               }
+                       } else {
+                               dev->stats.tx_bytes +=
+                                   ((tx_control & PKTSMask) >> PKTSShift);
+                               dev->stats.tx_packets++;
+                       }
+
+                       /* Free the original skb. */
+                       dma_unmap_single(&np->pci_dev->dev,
+                                        np->cur_tx->buffer,
+                                        np->cur_tx->skbuff->len,
+                                        DMA_TO_DEVICE);
+                       dev_consume_skb_irq(np->cur_tx->skbuff);
+                       np->cur_tx->skbuff = NULL;
+                       --np->really_tx_count;
+                       if (np->cur_tx->control & TXLD) {
+                               np->cur_tx = np->cur_tx->next_desc_logical;
+                               ++np->free_tx_count;
+                       } else {
+                               np->cur_tx = np->cur_tx->next_desc_logical;
+                               np->cur_tx = np->cur_tx->next_desc_logical;
+                               np->free_tx_count += 2;
+                       }
+                       num_tx++;
+               }               /* end of for loop */
+
+               if (num_tx && np->free_tx_count >= 2)
+                       netif_wake_queue(dev);
+
+               /* read transmit status for enhanced mode only */
+               if (np->crvalue & CR_W_ENH) {
+                       long data;
+
+                       data = ioread32(ioaddr + TSR);
+                       dev->stats.tx_errors += (data & 0xff000000) >> 24;
+                       dev->stats.tx_aborted_errors +=
+                               (data & 0xff000000) >> 24;
+                       dev->stats.tx_window_errors +=
+                               (data & 0x00ff0000) >> 16;
+                       dev->stats.collisions += (data & 0x0000ffff);
+               }
+
+               if (--boguscnt < 0) {
+                       printk(KERN_WARNING "%s: Too much work at interrupt, "
+                              "status=0x%4.4x.\n", dev->name, intr_status);
+                       if (!np->reset_timer_armed) {
+                               np->reset_timer_armed = 1;
+                               np->reset_timer.expires = RUN_AT(HZ/2);
+                               add_timer(&np->reset_timer);
+                               stop_nic_rxtx(ioaddr, 0);
+                               netif_stop_queue(dev);
+                               /* or netif_tx_disable(dev); ?? */
+                               /* Prevent other paths from enabling tx,rx,intrs */
+                               np->crvalue_sv = np->crvalue;
+                               np->imrvalue_sv = np->imrvalue;
+                               np->crvalue &= ~(CR_W_TXEN | CR_W_RXEN); /* or simply = 0? */
+                               np->imrvalue = 0;
+                       }
+
+                       break;
+               }
+       } while (1);
+
+       /* read the tally counters */
+       /* missed pkts */
+       dev->stats.rx_missed_errors += ioread32(ioaddr + TALLY) & 0x7fff;
+
+       /* crc error */
+       dev->stats.rx_crc_errors +=
+               (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+
+       if (debug)
+               printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
+                      dev->name, ioread32(ioaddr + ISR));
+
+       iowrite32(np->imrvalue, ioaddr + IMR);
+
+       spin_unlock(&np->lock);
+
+       return IRQ_RETVAL(handled);
+}
+
+
+/* This routine is logically part of the interrupt handler, but separated
+   for clarity and better register allocation. */
+static int netdev_rx(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+
+       /* If EOP is set on the next entry, it's a new packet. Send it up. */
+       while (!(np->cur_rx->status & RXOWN) && np->cur_rx->skbuff) {
+               s32 rx_status = np->cur_rx->status;
+
+               if (np->really_rx_count == 0)
+                       break;
+
+               if (debug)
+                       printk(KERN_DEBUG "  netdev_rx() status was %8.8x.\n", rx_status);
+
+               if ((!((rx_status & RXFSD) && (rx_status & RXLSD))) ||
+                   (rx_status & ErrorSummary)) {
+                       if (rx_status & ErrorSummary) { /* there was a fatal error */
+                               if (debug)
+                                       printk(KERN_DEBUG
+                                              "%s: Receive error, Rx status %8.8x.\n",
+                                              dev->name, rx_status);
+
+                               dev->stats.rx_errors++; /* end of a packet. */
+                               if (rx_status & (LONGPKT | RUNTPKT))
+                                       dev->stats.rx_length_errors++;
+                               if (rx_status & RXER)
+                                       dev->stats.rx_frame_errors++;
+                               if (rx_status & CRC)
+                                       dev->stats.rx_crc_errors++;
+                       } else {
+                               int need_to_reset = 0;
+                               int desno = 0;
+
+                               if (rx_status & RXFSD) {        /* this pkt is too long, over one rx buffer */
+                                       struct fealnx_desc *cur;
+
+                                       /* check this packet is received completely? */
+                                       cur = np->cur_rx;
+                                       while (desno <= np->really_rx_count) {
+                                               ++desno;
+                                               if ((!(cur->status & RXOWN)) &&
+                                                   (cur->status & RXLSD))
+                                                       break;
+                                               /* goto next rx descriptor */
+                                               cur = cur->next_desc_logical;
+                                       }
+                                       if (desno > np->really_rx_count)
+                                               need_to_reset = 1;
+                               } else  /* RXLSD did not find, something error */
+                                       need_to_reset = 1;
+
+                               if (need_to_reset == 0) {
+                                       int i;
+
+                                       dev->stats.rx_length_errors++;
+
+                                       /* free all rx descriptors related this long pkt */
+                                       for (i = 0; i < desno; ++i) {
+                                               if (!np->cur_rx->skbuff) {
+                                                       printk(KERN_DEBUG
+                                                               "%s: I'm scared\n", dev->name);
+                                                       break;
+                                               }
+                                               np->cur_rx->status = RXOWN;
+                                               np->cur_rx = np->cur_rx->next_desc_logical;
+                                       }
+                                       continue;
+                               } else {        /* rx error, need to reset this chip */
+                                       stop_nic_rx(ioaddr, np->crvalue);
+                                       reset_rx_descriptors(dev);
+                                       iowrite32(np->crvalue, ioaddr + TCRRCR);
+                               }
+                               break;  /* exit the while loop */
+                       }
+               } else {        /* this received pkt is ok */
+
+                       struct sk_buff *skb;
+                       /* Omit the four octet CRC from the length. */
+                       short pkt_len = ((rx_status & FLNGMASK) >> FLNGShift) - 4;
+
+#ifndef final_version
+                       if (debug)
+                               printk(KERN_DEBUG "  netdev_rx() normal Rx pkt length %d"
+                                      " status %x.\n", pkt_len, rx_status);
+#endif
+
+                       /* Check if the packet is long enough to accept without copying
+                          to a minimally-sized skbuff. */
+                       if (pkt_len < rx_copybreak &&
+                           (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) {
+                               skb_reserve(skb, 2);    /* 16 byte align the IP header */
+                               dma_sync_single_for_cpu(&np->pci_dev->dev,
+                                                       np->cur_rx->buffer,
+                                                       np->rx_buf_sz,
+                                                       DMA_FROM_DEVICE);
+                               /* Call copy + cksum if available. */
+
+#if ! defined(__alpha__)
+                               skb_copy_to_linear_data(skb,
+                                       np->cur_rx->skbuff->data, pkt_len);
+                               skb_put(skb, pkt_len);
+#else
+                               skb_put_data(skb, np->cur_rx->skbuff->data,
+                                            pkt_len);
+#endif
+                               dma_sync_single_for_device(&np->pci_dev->dev,
+                                                          np->cur_rx->buffer,
+                                                          np->rx_buf_sz,
+                                                          DMA_FROM_DEVICE);
+                       } else {
+                               dma_unmap_single(&np->pci_dev->dev,
+                                                np->cur_rx->buffer,
+                                                np->rx_buf_sz,
+                                                DMA_FROM_DEVICE);
+                               skb_put(skb = np->cur_rx->skbuff, pkt_len);
+                               np->cur_rx->skbuff = NULL;
+                               --np->really_rx_count;
+                       }
+                       skb->protocol = eth_type_trans(skb, dev);
+                       netif_rx(skb);
+                       dev->stats.rx_packets++;
+                       dev->stats.rx_bytes += pkt_len;
+               }
+
+               np->cur_rx = np->cur_rx->next_desc_logical;
+       }                       /* end of while loop */
+
+       /*  allocate skb for rx buffers */
+       allocate_rx_buffers(dev);
+
+       return 0;
+}
+
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+
+       /* The chip only need report frame silently dropped. */
+       if (netif_running(dev)) {
+               dev->stats.rx_missed_errors +=
+                       ioread32(ioaddr + TALLY) & 0x7fff;
+               dev->stats.rx_crc_errors +=
+                       (ioread32(ioaddr + TALLY) & 0x7fff0000) >> 16;
+       }
+
+       return &dev->stats;
+}
+
+
+/* for dev->set_multicast_list */
+static void set_rx_mode(struct net_device *dev)
+{
+       spinlock_t *lp = &((struct netdev_private *)netdev_priv(dev))->lock;
+       unsigned long flags;
+       spin_lock_irqsave(lp, flags);
+       __set_rx_mode(dev);
+       spin_unlock_irqrestore(lp, flags);
+}
+
+
+/* Take lock before calling */
+static void __set_rx_mode(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       u32 mc_filter[2];       /* Multicast hash filter */
+       u32 rx_mode;
+
+       if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */
+               memset(mc_filter, 0xff, sizeof(mc_filter));
+               rx_mode = CR_W_PROM | CR_W_AB | CR_W_AM;
+       } else if ((netdev_mc_count(dev) > multicast_filter_limit) ||
+                  (dev->flags & IFF_ALLMULTI)) {
+               /* Too many to match, or accept all multicasts. */
+               memset(mc_filter, 0xff, sizeof(mc_filter));
+               rx_mode = CR_W_AB | CR_W_AM;
+       } else {
+               struct netdev_hw_addr *ha;
+
+               memset(mc_filter, 0, sizeof(mc_filter));
+               netdev_for_each_mc_addr(ha, dev) {
+                       unsigned int bit;
+                       bit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
+                       mc_filter[bit >> 5] |= (1 << bit);
+               }
+               rx_mode = CR_W_AB | CR_W_AM;
+       }
+
+       stop_nic_rxtx(ioaddr, np->crvalue);
+
+       iowrite32(mc_filter[0], ioaddr + MAR0);
+       iowrite32(mc_filter[1], ioaddr + MAR1);
+       np->crvalue &= ~CR_W_RXMODEMASK;
+       np->crvalue |= rx_mode;
+       iowrite32(np->crvalue, ioaddr + TCRRCR);
+}
+
+static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+       struct netdev_private *np = netdev_priv(dev);
+
+       strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+       strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
+}
+
+static int netdev_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
+{
+       struct netdev_private *np = netdev_priv(dev);
+
+       spin_lock_irq(&np->lock);
+       mii_ethtool_get_link_ksettings(&np->mii, cmd);
+       spin_unlock_irq(&np->lock);
+
+       return 0;
+}
+
+static int netdev_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       int rc;
+
+       spin_lock_irq(&np->lock);
+       rc = mii_ethtool_set_link_ksettings(&np->mii, cmd);
+       spin_unlock_irq(&np->lock);
+
+       return rc;
+}
+
+static int netdev_nway_reset(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       return mii_nway_restart(&np->mii);
+}
+
+static u32 netdev_get_link(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       return mii_link_ok(&np->mii);
+}
+
+static u32 netdev_get_msglevel(struct net_device *dev)
+{
+       return debug;
+}
+
+static void netdev_set_msglevel(struct net_device *dev, u32 value)
+{
+       debug = value;
+}
+
+static const struct ethtool_ops netdev_ethtool_ops = {
+       .get_drvinfo            = netdev_get_drvinfo,
+       .nway_reset             = netdev_nway_reset,
+       .get_link               = netdev_get_link,
+       .get_msglevel           = netdev_get_msglevel,
+       .set_msglevel           = netdev_set_msglevel,
+       .get_link_ksettings     = netdev_get_link_ksettings,
+       .set_link_ksettings     = netdev_set_link_ksettings,
+};
+
+static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       int rc;
+
+       if (!netif_running(dev))
+               return -EINVAL;
+
+       spin_lock_irq(&np->lock);
+       rc = generic_mii_ioctl(&np->mii, if_mii(rq), cmd, NULL);
+       spin_unlock_irq(&np->lock);
+
+       return rc;
+}
+
+
+static int netdev_close(struct net_device *dev)
+{
+       struct netdev_private *np = netdev_priv(dev);
+       void __iomem *ioaddr = np->mem;
+       int i;
+
+       netif_stop_queue(dev);
+
+       /* Disable interrupts by clearing the interrupt mask. */
+       iowrite32(0x0000, ioaddr + IMR);
+
+       /* Stop the chip's Tx and Rx processes. */
+       stop_nic_rxtx(ioaddr, 0);
+
+       del_timer_sync(&np->timer);
+       del_timer_sync(&np->reset_timer);
+
+       free_irq(np->pci_dev->irq, dev);
+
+       /* Free all the skbuffs in the Rx queue. */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb = np->rx_ring[i].skbuff;
+
+               np->rx_ring[i].status = 0;
+               if (skb) {
+                       dma_unmap_single(&np->pci_dev->dev,
+                                        np->rx_ring[i].buffer, np->rx_buf_sz,
+                                        DMA_FROM_DEVICE);
+                       dev_kfree_skb(skb);
+                       np->rx_ring[i].skbuff = NULL;
+               }
+       }
+
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               struct sk_buff *skb = np->tx_ring[i].skbuff;
+
+               if (skb) {
+                       dma_unmap_single(&np->pci_dev->dev,
+                                        np->tx_ring[i].buffer, skb->len,
+                                        DMA_TO_DEVICE);
+                       dev_kfree_skb(skb);
+                       np->tx_ring[i].skbuff = NULL;
+               }
+       }
+
+       return 0;
+}
+
+static const struct pci_device_id fealnx_pci_tbl[] = {
+       {0x1516, 0x0800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+       {0x1516, 0x0803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
+       {0x1516, 0x0891, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
+       {} /* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, fealnx_pci_tbl);
+
+
+static struct pci_driver fealnx_driver = {
+       .name           = "fealnx",
+       .id_table       = fealnx_pci_tbl,
+       .probe          = fealnx_init_one,
+       .remove         = fealnx_remove_one,
+};
+
+module_pci_driver(fealnx_driver);
index 467001db5070ed568a9d4be4f5e3c6d957c526cb..228cd502bb48a93238b3581907b864845db8de26 100644 (file)
@@ -15525,6 +15525,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
        int err;
        int v_idx;
 
+       pci_set_drvdata(pf->pdev, pf);
        pci_save_state(pf->pdev);
 
        /* set up periodic task facility */
index c557dfc50aadd4511df9fe73ecb08197664e35fe..396e555023aae06b9e62ef090fa60283f9b27d08 100644 (file)
@@ -1411,7 +1411,7 @@ ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
        tlv->ouisubtype = htonl(ouisubtype);
 
        buf[0] = dcbcfg->pfc.pfccap & 0xF;
-       buf[1] = dcbcfg->pfc.pfcena & 0xF;
+       buf[1] = dcbcfg->pfc.pfcena;
 }
 
 /**
index 781475480ff2700762b0ab1ce2da2b9c6dac4553..0f52ea38b6f3a2a49fdf5420a2b395cc1b27a16e 100644 (file)
@@ -2126,7 +2126,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
        ice_for_each_rxq(vsi, i)
                ice_tx_xsk_pool(vsi, i);
 
-       return ret;
+       return 0;
 }
 
 /**
@@ -2693,12 +2693,14 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
                return ret;
 
        /* allocate memory for Tx/Rx ring stat pointers */
-       if (ice_vsi_alloc_stat_arrays(vsi))
+       ret = ice_vsi_alloc_stat_arrays(vsi);
+       if (ret)
                goto unroll_vsi_alloc;
 
        ice_alloc_fd_res(vsi);
 
-       if (ice_vsi_get_qs(vsi)) {
+       ret = ice_vsi_get_qs(vsi);
+       if (ret) {
                dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
                        vsi->idx);
                goto unroll_vsi_alloc_stat;
@@ -2811,6 +2813,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
                break;
        default:
                /* clean up the resources and exit */
+               ret = -EINVAL;
                goto unroll_vsi_init;
        }
 
@@ -3508,10 +3511,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
                if (vsi_flags & ICE_VSI_FLAG_INIT) {
                        ret = -EIO;
                        goto err_vsi_cfg_tc_lan;
-               } else {
-                       kfree(coalesce);
-                       return ice_schedule_reset(pf, ICE_RESET_PFR);
                }
+
+               kfree(coalesce);
+               return ice_schedule_reset(pf, ICE_RESET_PFR);
        }
 
        ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
@@ -3759,7 +3762,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
        dev = ice_pf_to_dev(pf);
        if (vsi->tc_cfg.ena_tc == ena_tc &&
            vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
-               return ret;
+               return 0;
 
        ice_for_each_traffic_class(i) {
                /* build bitmap of enabled TCs */
index 6b48cbc049c67178bbecfbfb12683770dada4b07..76f29a5bf8d73d6bc25f376370e17087c59f7f1f 100644 (file)
@@ -1455,8 +1455,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                if (match.mask->vlan_priority) {
                        fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO;
                        headers->vlan_hdr.vlan_prio =
-                               cpu_to_be16((match.key->vlan_priority <<
-                                            VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK);
+                               be16_encode_bits(match.key->vlan_priority,
+                                                VLAN_PRIO_MASK);
                }
 
                if (match.mask->vlan_tpid)
@@ -1489,8 +1489,8 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                if (match.mask->vlan_priority) {
                        fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO;
                        headers->cvlan_hdr.vlan_prio =
-                               cpu_to_be16((match.key->vlan_priority <<
-                                            VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK);
+                               be16_encode_bits(match.key->vlan_priority,
+                                                VLAN_PRIO_MASK);
                }
        }
 
index 9b4ecbe4f36d41b3855b70243de9e4a4994b0ea7..3ea00bc9b91caf1fcf293e11448281f48fe3edee 100644 (file)
@@ -4996,6 +4996,14 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
 
        for (i = 0; i < priv->port_count; i++) {
                port = priv->port_list[i];
+               if (percpu && port->ntxqs >= num_possible_cpus() * 2)
+                       xdp_set_features_flag(port->dev,
+                                             NETDEV_XDP_ACT_BASIC |
+                                             NETDEV_XDP_ACT_REDIRECT |
+                                             NETDEV_XDP_ACT_NDO_XMIT);
+               else
+                       xdp_clear_features_flag(port->dev);
+
                mvpp2_swf_bm_pool_init(port);
                if (status[i])
                        mvpp2_open(port->dev);
@@ -6863,13 +6871,14 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
        if (!port->priv->percpu_pools)
                mvpp2_set_hw_csum(port, port->pool_long->id);
+       else if (port->ntxqs >= num_possible_cpus() * 2)
+               dev->xdp_features = NETDEV_XDP_ACT_BASIC |
+                                   NETDEV_XDP_ACT_REDIRECT |
+                                   NETDEV_XDP_ACT_NDO_XMIT;
 
        dev->vlan_features |= features;
        netif_set_tso_max_segs(dev, MVPP2_MAX_TSO_SEGS);
 
-       dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-                           NETDEV_XDP_ACT_NDO_XMIT;
-
        dev->priv_flags |= IFF_UNICAST_FLT;
 
        /* MTU range: 68 - 9704 */
index 389663a13d1d1536a7c0f40267896e1951ef8959..ef721caeac49b1b9872cdf2aebbdc67515c9ad6d 100644 (file)
@@ -884,6 +884,9 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf,
 int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc);
 int rvu_cpt_init(struct rvu *rvu);
 
+#define NDC_AF_BANK_MASK       GENMASK_ULL(7, 0)
+#define NDC_AF_BANK_LINE_MASK  GENMASK_ULL(31, 16)
+
 /* CN10K RVU */
 int rvu_set_channels_base(struct rvu *rvu);
 void rvu_program_channels(struct rvu *rvu);
@@ -902,6 +905,8 @@ static inline void rvu_dbg_init(struct rvu *rvu) {}
 static inline void rvu_dbg_exit(struct rvu *rvu) {}
 #endif
 
+int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr);
+
 /* RVU Switch */
 void rvu_switch_enable(struct rvu *rvu);
 void rvu_switch_disable(struct rvu *rvu);
index fa280ebd3052b8ddd6bc7fe90a417f48643af6e7..26cfa501f1a112dd166b4260faeccd0cc65d1b17 100644 (file)
@@ -198,9 +198,6 @@ enum cpt_eng_type {
        CPT_IE_TYPE = 3,
 };
 
-#define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \
-                                               blk_addr, NDC_AF_CONST) & 0xFF)
-
 #define rvu_dbg_NULL NULL
 #define rvu_dbg_open_NULL NULL
 
@@ -1448,6 +1445,7 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
        struct nix_hw *nix_hw;
        struct rvu *rvu;
        int bank, max_bank;
+       u64 ndc_af_const;
 
        if (blk_addr == BLKADDR_NDC_NPA0) {
                rvu = s->private;
@@ -1456,7 +1454,8 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
                rvu = nix_hw->rvu;
        }
 
-       max_bank = NDC_MAX_BANK(rvu, blk_addr);
+       ndc_af_const = rvu_read64(rvu, blk_addr, NDC_AF_CONST);
+       max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
        for (bank = 0; bank < max_bank; bank++) {
                seq_printf(s, "BANK:%d\n", bank);
                seq_printf(s, "\tHits:\t%lld\n",
index 26e639e57dae372d530009e8f46e87ff8977214e..4ad707e758b9f700cd675f0f093b59f5b088779d 100644 (file)
@@ -790,6 +790,7 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
        struct nix_aq_res_s *result;
        int timeout = 1000;
        u64 reg, head;
+       int ret;
 
        result = (struct nix_aq_res_s *)aq->res->base;
 
@@ -813,9 +814,22 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
                        return -EBUSY;
        }
 
-       if (result->compcode != NIX_AQ_COMP_GOOD)
+       if (result->compcode != NIX_AQ_COMP_GOOD) {
                /* TODO: Replace this with some error code */
+               if (result->compcode == NIX_AQ_COMP_CTX_FAULT ||
+                   result->compcode == NIX_AQ_COMP_LOCKERR ||
+                   result->compcode == NIX_AQ_COMP_CTX_POISON) {
+                       ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX);
+                       ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX);
+                       ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX);
+                       ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_TX);
+                       if (ret)
+                               dev_err(rvu->dev,
+                                       "%s: Not able to unlock cachelines\n", __func__);
+               }
+
                return -EBUSY;
+       }
 
        return 0;
 }
index 70bd036ed76e42f184775fe7a5556ea0ead0dc66..4f5ca5ab13a40640578aed38f6ef1eea59609f56 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) 2018 Marvell.
  *
  */
-
+#include <linux/bitfield.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 
@@ -42,9 +42,18 @@ static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
                        return -EBUSY;
        }
 
-       if (result->compcode != NPA_AQ_COMP_GOOD)
+       if (result->compcode != NPA_AQ_COMP_GOOD) {
                /* TODO: Replace this with some error code */
+               if (result->compcode == NPA_AQ_COMP_CTX_FAULT ||
+                   result->compcode == NPA_AQ_COMP_LOCKERR ||
+                   result->compcode == NPA_AQ_COMP_CTX_POISON) {
+                       if (rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NPA0))
+                               dev_err(rvu->dev,
+                                       "%s: Not able to unlock cachelines\n", __func__);
+               }
+
                return -EBUSY;
+       }
 
        return 0;
 }
@@ -545,3 +554,48 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
 
        npa_ctx_free(rvu, pfvf);
 }
+
+/* Due to an Hardware errata, in some corner cases, AQ context lock
+ * operations can result in a NDC way getting into an illegal state
+ * of not valid but locked.
+ *
+ * This API solves the problem by clearing the lock bit of the NDC block.
+ * The operation needs to be done for each line of all the NDC banks.
+ */
+int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr)
+{
+       int bank, max_bank, line, max_line, err;
+       u64 reg, ndc_af_const;
+
+       /* Set the ENABLE bit(63) to '0' */
+       reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL);
+       rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0));
+
+       /* Poll until the BUSY bits(47:32) are set to '0' */
+       err = rvu_poll_reg(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, GENMASK_ULL(47, 32), true);
+       if (err) {
+               dev_err(rvu->dev, "Timed out while polling for NDC CAM busy bits.\n");
+               return err;
+       }
+
+       ndc_af_const = rvu_read64(rvu, blkaddr, NDC_AF_CONST);
+       max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
+       max_line = FIELD_GET(NDC_AF_BANK_LINE_MASK, ndc_af_const);
+       for (bank = 0; bank < max_bank; bank++) {
+               for (line = 0; line < max_line; line++) {
+                       /* Check if 'cache line valid bit(63)' is not set
+                        * but 'cache line lock bit(60)' is set and on
+                        * success, reset the lock bit(60).
+                        */
+                       reg = rvu_read64(rvu, blkaddr,
+                                        NDC_AF_BANKX_LINEX_METADATA(bank, line));
+                       if (!(reg & BIT_ULL(63)) && (reg & BIT_ULL(60))) {
+                               rvu_write64(rvu, blkaddr,
+                                           NDC_AF_BANKX_LINEX_METADATA(bank, line),
+                                           reg & ~BIT_ULL(60));
+                       }
+               }
+       }
+
+       return 0;
+}
index 1729b22580ce1cd595290bc2ca7918a68d3a0332..7007f0b8e6591ab89a8871aff8fe5a49e80c1241 100644 (file)
 #define NDC_AF_INTR_ENA_W1S            (0x00068)
 #define NDC_AF_INTR_ENA_W1C            (0x00070)
 #define NDC_AF_ACTIVE_PC               (0x00078)
+#define NDC_AF_CAMS_RD_INTERVAL                (0x00080)
 #define NDC_AF_BP_TEST_ENABLE          (0x001F8)
 #define NDC_AF_BP_TEST(a)              (0x00200 | (a) << 3)
 #define NDC_AF_BLK_RST                 (0x002F0)
                (0x00F00 | (a) << 5 | (b) << 4)
 #define NDC_AF_BANKX_HIT_PC(a)         (0x01000 | (a) << 3)
 #define NDC_AF_BANKX_MISS_PC(a)                (0x01100 | (a) << 3)
+#define NDC_AF_BANKX_LINEX_METADATA(a, b) \
+               (0x10000 | (a) << 12 | (b) << 3)
 
 /* LBK */
 #define LBK_CONST                      (0x10ull)
index 88460b7796e5574176a1e4f229cd0541050ac02d..4276c6eb682012b99124be49f24d08a2ae0cc97d 100644 (file)
@@ -1243,6 +1243,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 void mlx5e_rx_dim_work(struct work_struct *work);
 void mlx5e_tx_dim_work(struct work_struct *work);
 
+void mlx5e_set_xdp_feature(struct net_device *netdev);
 netdev_features_t mlx5e_features_check(struct sk_buff *skb,
                                       struct net_device *netdev,
                                       netdev_features_t features);
index 7708acc9b2ab3a274f444e63bdfbb7278ded6ed5..79fd21ecb9cbc70c7ff47d54e7433265b608439e 100644 (file)
@@ -1985,6 +1985,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5e_params new_params;
+       int err;
 
        if (enable) {
                /* Checking the regular RQ here; mlx5e_validate_xsk_param called
@@ -2005,7 +2006,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
        MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
        mlx5e_set_rq_type(mdev, &new_params);
 
-       return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+       err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+       if (err)
+               return err;
+
+       /* update XDP supported features */
+       mlx5e_set_xdp_feature(netdev);
+
+       return 0;
 }
 
 static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
index 76a9c5194a7046a6867414d9240ae9dbc97db68f..51b5f3cca50470577a55e6157fbba24619328fdc 100644 (file)
@@ -4004,6 +4004,25 @@ static int mlx5e_handle_feature(struct net_device *netdev,
        return 0;
 }
 
+void mlx5e_set_xdp_feature(struct net_device *netdev)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_params *params = &priv->channels.params;
+       xdp_features_t val;
+
+       if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+               xdp_clear_features_flag(netdev);
+               return;
+       }
+
+       val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+             NETDEV_XDP_ACT_XSK_ZEROCOPY |
+             NETDEV_XDP_ACT_NDO_XMIT;
+       if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+               val |= NETDEV_XDP_ACT_RX_SG;
+       xdp_set_features_flag(netdev, val);
+}
+
 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 {
        netdev_features_t oper_features = features;
@@ -4030,6 +4049,9 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
                return -EINVAL;
        }
 
+       /* update XDP supported features */
+       mlx5e_set_xdp_feature(netdev);
+
        return 0;
 }
 
@@ -4761,13 +4783,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
        if (old_prog)
                bpf_prog_put(old_prog);
 
-       if (reset) {
-               if (prog)
-                       xdp_features_set_redirect_target(netdev, true);
-               else
-                       xdp_features_clear_redirect_target(netdev);
-       }
-
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
                goto unlock;
 
@@ -5163,13 +5178,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->features         |= NETIF_F_HIGHDMA;
        netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
 
-       netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-                              NETDEV_XDP_ACT_XSK_ZEROCOPY |
-                              NETDEV_XDP_ACT_RX_SG;
-
        netdev->priv_flags       |= IFF_UNICAST_FLT;
 
        netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
+       mlx5e_set_xdp_feature(netdev);
        mlx5e_set_netdev_dev_addr(netdev);
        mlx5e_macsec_build_netdev(priv);
        mlx5e_ipsec_build_netdev(priv);
@@ -5241,6 +5253,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
        mlx5e_health_create_reporters(priv);
+       /* update XDP supported features */
+       mlx5e_set_xdp_feature(netdev);
+
        return 0;
 }
 
index 9b92034430854759bc1d50c002c1b3810c99a542..43fd12fb87b879879d52af0b3134344c9f89bf21 100644 (file)
@@ -747,6 +747,9 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
        /* RQ */
        mlx5e_build_rq_params(mdev, params);
 
+       /* update XDP supported features */
+       mlx5e_set_xdp_feature(netdev);
+
        /* CQ moderation params */
        params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
        mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
index 871a3e62f85270c7f801ea8352c6a5c9e04b06e5..2d763664dcda18e1c67b5637a707e88984e3ee7d 100644 (file)
@@ -249,6 +249,21 @@ static int sparx5_dcb_ieee_dscp_setdel(struct net_device *dev,
        return 0;
 }
 
+static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+       int err;
+
+       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
+               err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp);
+       else
+               err = dcb_ieee_delapp(dev, app);
+
+       if (err < 0)
+               return err;
+
+       return sparx5_dcb_app_update(dev);
+}
+
 static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app)
 {
        struct dcb_app app_itr;
@@ -264,7 +279,7 @@ static int sparx5_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app)
        if (prio) {
                app_itr = *app;
                app_itr.priority = prio;
-               dcb_ieee_delapp(dev, &app_itr);
+               sparx5_dcb_ieee_delapp(dev, &app_itr);
        }
 
        if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
@@ -281,21 +296,6 @@ out:
        return err;
 }
 
-static int sparx5_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app)
-{
-       int err;
-
-       if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP)
-               err = sparx5_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp);
-       else
-               err = dcb_ieee_delapp(dev, app);
-
-       if (err < 0)
-               return err;
-
-       return sparx5_dcb_app_update(dev);
-}
-
 static int sparx5_dcb_setapptrust(struct net_device *dev, u8 *selectors,
                                  int nselectors)
 {
index 943d26cbf39f5d8d14ed247f4918d51b5104f31f..71712ea25403dd71322f36297bb54f782d789f2c 100644 (file)
@@ -101,6 +101,7 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
                goto out;
 
        skb->dev = addr->master->dev;
+       skb->skb_iif = skb->dev->ifindex;
        len = skb->len + ETH_HLEN;
        ipvlan_count_rx(addr->master, len, true, false);
 out:
index 047c581457e34510236133a6ff4ff292a6c06385..5813b07242ce16486aea67f42cd86ffbc49d7cf9 100644 (file)
@@ -79,7 +79,7 @@
 #define SGMII_ABILITY                  BIT(0)
 
 #define VEND1_MII_BASIC_CONFIG         0xAFC6
-#define MII_BASIC_CONFIG_REV           BIT(8)
+#define MII_BASIC_CONFIG_REV           BIT(4)
 #define MII_BASIC_CONFIG_SGMII         0x9
 #define MII_BASIC_CONFIG_RGMII         0x7
 #define MII_BASIC_CONFIG_RMII          0x5
index 1bb54de7124d95e4974c158017ac6611a370c4d8..293dc3b2c84a6c1931e8df42cdcd5f2798004f3c 100644 (file)
@@ -1257,6 +1257,26 @@ static int veth_enable_range_safe(struct net_device *dev, int start, int end)
        return 0;
 }
 
+static void veth_set_xdp_features(struct net_device *dev)
+{
+       struct veth_priv *priv = netdev_priv(dev);
+       struct net_device *peer;
+
+       peer = rcu_dereference(priv->peer);
+       if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+               xdp_features_t val = NETDEV_XDP_ACT_BASIC |
+                                    NETDEV_XDP_ACT_REDIRECT |
+                                    NETDEV_XDP_ACT_RX_SG;
+
+               if (priv->_xdp_prog || veth_gro_requested(dev))
+                       val |= NETDEV_XDP_ACT_NDO_XMIT |
+                              NETDEV_XDP_ACT_NDO_XMIT_SG;
+               xdp_set_features_flag(dev, val);
+       } else {
+               xdp_clear_features_flag(dev);
+       }
+}
+
 static int veth_set_channels(struct net_device *dev,
                             struct ethtool_channels *ch)
 {
@@ -1323,6 +1343,12 @@ out:
                if (peer)
                        netif_carrier_on(peer);
        }
+
+       /* update XDP supported features */
+       veth_set_xdp_features(dev);
+       if (peer)
+               veth_set_xdp_features(peer);
+
        return err;
 
 revert:
@@ -1489,7 +1515,10 @@ static int veth_set_features(struct net_device *dev,
                err = veth_napi_enable(dev);
                if (err)
                        return err;
+
+               xdp_features_set_redirect_target(dev, true);
        } else {
+               xdp_features_clear_redirect_target(dev);
                veth_napi_del(dev);
        }
        return 0;
@@ -1570,10 +1599,15 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                        peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
                        peer->max_mtu = max_mtu;
                }
+
+               xdp_features_set_redirect_target(dev, true);
        }
 
        if (old_prog) {
                if (!prog) {
+                       if (!veth_gro_requested(dev))
+                               xdp_features_clear_redirect_target(dev);
+
                        if (dev->flags & IFF_UP)
                                veth_disable_xdp(dev);
 
@@ -1686,10 +1720,6 @@ static void veth_setup(struct net_device *dev)
        dev->hw_enc_features = VETH_FEATURES;
        dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
        netif_set_tso_max_size(dev, GSO_MAX_SIZE);
-
-       dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-                           NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
-                           NETDEV_XDP_ACT_NDO_XMIT_SG;
 }
 
 /*
@@ -1857,6 +1887,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
                goto err_queues;
 
        veth_disable_gro(dev);
+       /* update XDP supported features */
+       veth_set_xdp_features(dev);
+       veth_set_xdp_features(peer);
+
        return 0;
 
 err_queues:
index fb5e68ed3ec27c1895f96108e35ee7b4832595b6..1a309cfb4976aa4501b743749b528fd35d5e10cf 100644 (file)
@@ -545,6 +545,87 @@ ok:
        return skb;
 }
 
+static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
+{
+       unsigned int len;
+       unsigned int packets = 0;
+       unsigned int bytes = 0;
+       void *ptr;
+
+       while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+               if (likely(!is_xdp_frame(ptr))) {
+                       struct sk_buff *skb = ptr;
+
+                       pr_debug("Sent skb %p\n", skb);
+
+                       bytes += skb->len;
+                       napi_consume_skb(skb, in_napi);
+               } else {
+                       struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+                       bytes += xdp_get_frame_len(frame);
+                       xdp_return_frame(frame);
+               }
+               packets++;
+       }
+
+       /* Avoid overhead when no packets have been processed
+        * happens when called speculatively from start_xmit.
+        */
+       if (!packets)
+               return;
+
+       u64_stats_update_begin(&sq->stats.syncp);
+       sq->stats.bytes += bytes;
+       sq->stats.packets += packets;
+       u64_stats_update_end(&sq->stats.syncp);
+}
+
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+       if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+               return false;
+       else if (q < vi->curr_queue_pairs)
+               return true;
+       else
+               return false;
+}
+
+static void check_sq_full_and_disable(struct virtnet_info *vi,
+                                     struct net_device *dev,
+                                     struct send_queue *sq)
+{
+       bool use_napi = sq->napi.weight;
+       int qnum;
+
+       qnum = sq - vi->sq;
+
+       /* If running out of space, stop queue to avoid getting packets that we
+        * are then unable to transmit.
+        * An alternative would be to force queuing layer to requeue the skb by
+        * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
+        * returned in a normal path of operation: it means that driver is not
+        * maintaining the TX queue stop/start state properly, and causes
+        * the stack to do a non-trivial amount of useless work.
+        * Since most packets only take 1 or 2 ring slots, stopping the queue
+        * early means 16 slots are typically wasted.
+        */
+       if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+               netif_stop_subqueue(dev, qnum);
+               if (use_napi) {
+                       if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
+                               virtqueue_napi_schedule(&sq->napi, sq->vq);
+               } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+                       /* More just got used, free them then recheck. */
+                       free_old_xmit_skbs(sq, false);
+                       if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+                               netif_start_subqueue(dev, qnum);
+                               virtqueue_disable_cb(sq->vq);
+                       }
+               }
+       }
+}
+
 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
                                   struct send_queue *sq,
                                   struct xdp_frame *xdpf)
@@ -686,6 +767,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
        }
        ret = nxmit;
 
+       if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
+               check_sq_full_and_disable(vi, dev, sq);
+
        if (flags & XDP_XMIT_FLUSH) {
                if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
                        kicks = 1;
@@ -1714,52 +1798,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
        return stats.packets;
 }
 
-static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
-{
-       unsigned int len;
-       unsigned int packets = 0;
-       unsigned int bytes = 0;
-       void *ptr;
-
-       while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-               if (likely(!is_xdp_frame(ptr))) {
-                       struct sk_buff *skb = ptr;
-
-                       pr_debug("Sent skb %p\n", skb);
-
-                       bytes += skb->len;
-                       napi_consume_skb(skb, in_napi);
-               } else {
-                       struct xdp_frame *frame = ptr_to_xdp(ptr);
-
-                       bytes += xdp_get_frame_len(frame);
-                       xdp_return_frame(frame);
-               }
-               packets++;
-       }
-
-       /* Avoid overhead when no packets have been processed
-        * happens when called speculatively from start_xmit.
-        */
-       if (!packets)
-               return;
-
-       u64_stats_update_begin(&sq->stats.syncp);
-       sq->stats.bytes += bytes;
-       sq->stats.packets += packets;
-       u64_stats_update_end(&sq->stats.syncp);
-}
-
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
-       if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
-               return false;
-       else if (q < vi->curr_queue_pairs)
-               return true;
-       else
-               return false;
-}
-
 static void virtnet_poll_cleantx(struct receive_queue *rq)
 {
        struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1989,30 +2027,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
                nf_reset_ct(skb);
        }
 
-       /* If running out of space, stop queue to avoid getting packets that we
-        * are then unable to transmit.
-        * An alternative would be to force queuing layer to requeue the skb by
-        * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
-        * returned in a normal path of operation: it means that driver is not
-        * maintaining the TX queue stop/start state properly, and causes
-        * the stack to do a non-trivial amount of useless work.
-        * Since most packets only take 1 or 2 ring slots, stopping the queue
-        * early means 16 slots are typically wasted.
-        */
-       if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
-               netif_stop_subqueue(dev, qnum);
-               if (use_napi) {
-                       if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
-                               virtqueue_napi_schedule(&sq->napi, sq->vq);
-               } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
-                       /* More just got used, free them then recheck. */
-                       free_old_xmit_skbs(sq, false);
-                       if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
-                               netif_start_subqueue(dev, qnum);
-                               virtqueue_disable_cb(sq->vq);
-                       }
-               }
-       }
+       check_sq_full_and_disable(vi, dev, sq);
 
        if (kick || netif_xmit_stopped(txq)) {
                if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
index 583adb37ee1e31e6f03858f6893b631e0d26d147..125284b346a7765f264c9e8ebaaeb0885c4bad91 100644 (file)
@@ -106,7 +106,7 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
 {
        unsigned int cpu = *stored_cpu, cpu_index, i;
 
-       if (unlikely(cpu == nr_cpumask_bits ||
+       if (unlikely(cpu >= nr_cpu_ids ||
                     !cpumask_test_cpu(cpu, cpu_online_mask))) {
                cpu_index = id % cpumask_weight(cpu_online_mask);
                cpu = cpumask_first(cpu_online_mask);
index ed9c5e2cf3ad43feaa590820202ec844ee96ed6c..a187f0e0b0f7d1799f3f8d8d6ad36a801b7ca756 100644 (file)
@@ -175,6 +175,7 @@ static int pn533_usb_send_frame(struct pn533 *dev,
        print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
                             out->data, out->len, false);
 
+       arg.phy = phy;
        init_completion(&arg.done);
        cntx = phy->out_urb->context;
        phy->out_urb->context = &arg;
index 901c591458110f47bb37dbfb40d8a2dbbb4261c2..ea16a0aba6799ab74c94bf2223cf01e54a22f332 100644 (file)
@@ -256,7 +256,7 @@ select_kpp:
                                 chap->qid, ret, gid_name);
                        chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
                        chap->dh_tfm = NULL;
-                       return -ret;
+                       return ret;
                }
                dev_dbg(ctrl->device, "qid %d: selected DH group %s\n",
                        chap->qid, gid_name);
index 8698410aeb843ab0531120901d7588fd7f68308e..c2730b116dc680eac04df43a81c81a3826c5da16 100644 (file)
@@ -38,6 +38,7 @@ struct nvme_ns_info {
        bool is_shared;
        bool is_readonly;
        bool is_ready;
+       bool is_removed;
 };
 
 unsigned int admin_timeout = 60;
@@ -1402,16 +1403,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
        error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
        if (error) {
                dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
-               goto out_free_id;
+               kfree(*id);
        }
-
-       error = NVME_SC_INVALID_NS | NVME_SC_DNR;
-       if ((*id)->ncap == 0) /* namespace not allocated or attached */
-               goto out_free_id;
-       return 0;
-
-out_free_id:
-       kfree(*id);
        return error;
 }
 
@@ -1425,6 +1418,13 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
        ret = nvme_identify_ns(ctrl, info->nsid, &id);
        if (ret)
                return ret;
+
+       if (id->ncap == 0) {
+               /* namespace not allocated or attached */
+               info->is_removed = true;
+               return -ENODEV;
+       }
+
        info->anagrpid = id->anagrpid;
        info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
        info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
@@ -3104,7 +3104,7 @@ static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl)
         * Rather than blindly freezing the IO queues for this effect that
         * doesn't even apply to IO, mask it off.
         */
-       log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK;
+       log->acs[nvme_admin_security_recv] &= cpu_to_le32(~NVME_CMD_EFFECTS_CSE_MASK);
 
        log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
        log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
@@ -4429,6 +4429,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns_info info = { .nsid = nsid };
        struct nvme_ns *ns;
+       int ret;
 
        if (nvme_identify_ns_descs(ctrl, &info))
                return;
@@ -4445,19 +4446,19 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
         * set up a namespace.  If not fall back to the legacy version.
         */
        if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
-           (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) {
-               if (nvme_ns_info_from_id_cs_indep(ctrl, &info))
-                       return;
-       } else {
-               if (nvme_ns_info_from_identify(ctrl, &info))
-                       return;
-       }
+           (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
+               ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
+       else
+               ret = nvme_ns_info_from_identify(ctrl, &info);
+
+       if (info.is_removed)
+               nvme_ns_remove_by_nsid(ctrl, nsid);
 
        /*
         * Ignore the namespace if it is not ready. We will get an AEN once it
         * becomes ready and restart the scan.
         */
-       if (!info.is_ready)
+       if (ret || !info.is_ready)
                return;
 
        ns = nvme_find_get_ns(ctrl, nsid);
index a6e22116e1396aaf7ada61190ded69747429f532..dcac3df8a5f760427cff2750ac7c0de501fca409 100644 (file)
@@ -189,7 +189,8 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
 
 static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
 {
-       if (!ctrl->subsys)
+       if (!ctrl->subsys ||
+           !strcmp(ctrl->opts->subsysnqn, NVME_DISC_SUBSYS_NAME))
                return ctrl->opts->subsysnqn;
        return ctrl->subsys->subnqn;
 }
index 1955c0ec209e4977fd147cfde806b501411f45b3..7723a498952442c14c5346a5e30fd1383aa0940e 100644 (file)
@@ -2492,6 +2492,10 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 
        len = nvmf_get_address(ctrl, buf, size);
 
+       mutex_lock(&queue->queue_lock);
+
+       if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
+               goto done;
        ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
        if (ret > 0) {
                if (len > 0)
@@ -2499,6 +2503,8 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
                len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
                                (len) ? "," : "", &src_addr);
        }
+done:
+       mutex_unlock(&queue->queue_lock);
 
        return len;
 }
index b8009aa11f3cede0d712c9ba0bc0c98b4287e119..be679aa5db643b39201d79c9c624c2ae233c82b6 100644 (file)
@@ -163,11 +163,11 @@ EXPORT_SYMBOL_GPL(pci_msix_alloc_irq_at);
 
 /**
  * pci_msix_free_irq - Free an interrupt on a PCI/MSIX interrupt domain
- *                   which was allocated via pci_msix_alloc_irq_at()
  *
  * @dev:       The PCI device to operate on
  * @map:       A struct msi_map describing the interrupt to free
- *             as returned from the allocation function.
+ *
+ * Undo an interrupt vector allocation. Does not disable MSI-X.
  */
 void pci_msix_free_irq(struct pci_dev *dev, struct msi_map map)
 {
index c9116d9e4b5756152b372f9ba6b38279418a97d0..70cb50fd41c29b2e00baf88ce259e2538a4692ac 100644 (file)
@@ -436,11 +436,8 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
                bSoftware = config >> 63;
                raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
                if (bSoftware) {
-                       if (raw_config_val < SBI_PMU_FW_MAX)
-                               ret = (raw_config_val & 0xFFFF) |
-                                     (SBI_PMU_EVENT_TYPE_FW << 16);
-                       else
-                               return -EINVAL;
+                       ret = (raw_config_val & 0xFFFF) |
+                               (SBI_PMU_EVENT_TYPE_FW << 16);
                } else {
                        ret = RISCV_PMU_RAW_EVENT_IDX;
                        *econfig = raw_config_val;
index 09c7829e95c4b037947f573ff2045cf6cebbdd81..382793e73a60a8b3f4ade16b00768fc368eb50e2 100644 (file)
@@ -16,17 +16,17 @@ if MELLANOX_PLATFORM
 
 config MLXREG_HOTPLUG
        tristate "Mellanox platform hotplug driver support"
-       depends on REGMAP
        depends on HWMON
        depends on I2C
+       select REGMAP
        help
          This driver handles hot-plug events for the power suppliers, power
          cables and fans on the wide range Mellanox IB and Ethernet systems.
 
 config MLXREG_IO
        tristate "Mellanox platform register access driver support"
-       depends on REGMAP
        depends on HWMON
+       select REGMAP
        help
          This driver allows access to Mellanox programmable device register
          space through sysfs interface. The sets of registers for sysfs access
@@ -36,9 +36,9 @@ config MLXREG_IO
 
 config MLXREG_LC
        tristate "Mellanox line card platform driver support"
-       depends on REGMAP
        depends on HWMON
        depends on I2C
+       select REGMAP
        help
          This driver provides support for the Mellanox MSN4800-XX line cards,
          which are the part of MSN4800 Ethernet modular switch systems
@@ -80,10 +80,9 @@ config MLXBF_PMC
 
 config NVSW_SN2201
        tristate "Nvidia SN2201 platform driver support"
-       depends on REGMAP
        depends on HWMON
        depends on I2C
-       depends on REGMAP_I2C
+       select REGMAP_I2C
        help
          This driver provides support for the Nvidia SN2201 platform.
          The SN2201 is a highly integrated for one rack unit system with
index ec7c2b4e1721cd14cc000bd8bf3cd7d9587220b6..4a01b315e0a91ed5c6517407b4eb6250a42809b3 100644 (file)
@@ -955,7 +955,8 @@ config SERIAL_MULTI_INSTANTIATE
 
 config MLX_PLATFORM
        tristate "Mellanox Technologies platform support"
-       depends on I2C && REGMAP
+       depends on I2C
+       select REGMAP
        help
          This option enables system support for the Mellanox Technologies
          platform. The Mellanox systems provide data center networking
index ab05b9ee6655aa63a3ddf1023bd2f8a70047b635..2edaae04a6912a4bf6a2d377f1698dd5f9509584 100644 (file)
@@ -171,9 +171,7 @@ MODULE_PARM_DESC(disable_workarounds, "Disable workarounds for platform bugs");
 static struct amd_pmc_dev pmc;
 static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret);
 static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf);
-#ifdef CONFIG_SUSPEND
 static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data);
-#endif
 
 static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
 {
@@ -386,7 +384,6 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table
        return 0;
 }
 
-#ifdef CONFIG_SUSPEND
 static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
 {
        struct smu_metrics table;
@@ -400,7 +397,6 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
                dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n",
                         table.timein_s0i3_lastcapture);
 }
-#endif
 
 static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
 {
@@ -673,7 +669,6 @@ out_unlock:
        return rc;
 }
 
-#ifdef CONFIG_SUSPEND
 static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
 {
        switch (dev->cpu_id) {
@@ -861,9 +856,7 @@ static int __maybe_unused amd_pmc_suspend_handler(struct device *dev)
        return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(amd_pmc_pm, amd_pmc_suspend_handler, NULL);
-
-#endif
+static DEFINE_SIMPLE_DEV_PM_OPS(amd_pmc_pm, amd_pmc_suspend_handler, NULL);
 
 static const struct pci_device_id pmc_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) },
@@ -905,7 +898,6 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
        return 0;
 }
 
-#ifdef CONFIG_SUSPEND
 static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data)
 {
        int err;
@@ -926,7 +918,6 @@ static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data)
 
        return 0;
 }
-#endif
 
 static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf)
 {
@@ -1017,11 +1008,11 @@ static int amd_pmc_probe(struct platform_device *pdev)
        }
 
        platform_set_drvdata(pdev, dev);
-#ifdef CONFIG_SUSPEND
-       err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops);
-       if (err)
-               dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n");
-#endif
+       if (IS_ENABLED(CONFIG_SUSPEND)) {
+               err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops);
+               if (err)
+                       dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n");
+       }
 
        amd_pmc_dbgfs_register(dev);
        return 0;
@@ -1035,9 +1026,8 @@ static int amd_pmc_remove(struct platform_device *pdev)
 {
        struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
 
-#ifdef CONFIG_SUSPEND
-       acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops);
-#endif
+       if (IS_ENABLED(CONFIG_SUSPEND))
+               acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops);
        amd_pmc_dbgfs_unregister(dev);
        pci_dev_put(dev->rdev);
        mutex_destroy(&dev->lock);
@@ -1061,9 +1051,7 @@ static struct platform_driver amd_pmc_driver = {
                .name = "amd_pmc",
                .acpi_match_table = amd_pmc_acpi_ids,
                .dev_groups = pmc_groups,
-#ifdef CONFIG_SUSPEND
-               .pm = &amd_pmc_pm,
-#endif
+               .pm = pm_sleep_ptr(&amd_pmc_pm),
        },
        .probe = amd_pmc_probe,
        .remove = amd_pmc_remove,
index d547c9d097256f61ceffd52d15718b8d62157876..2750dee99c3e255a8821b591139775c1d4c641cf 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/hwmon.h>
 #include <linux/kstrtox.h>
-#include <linux/math.h>
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -96,6 +95,7 @@ struct combined_chip_info {
 };
 
 struct dell_wmi_ddv_sensors {
+       bool active;
        struct mutex lock;      /* protect caching */
        unsigned long timestamp;
        union acpi_object *obj;
@@ -520,6 +520,9 @@ static struct hwmon_channel_info *dell_wmi_ddv_channel_create(struct device *dev
 
 static void dell_wmi_ddv_hwmon_cache_invalidate(struct dell_wmi_ddv_sensors *sensors)
 {
+       if (!sensors->active)
+               return;
+
        mutex_lock(&sensors->lock);
        kfree(sensors->obj);
        sensors->obj = NULL;
@@ -530,6 +533,7 @@ static void dell_wmi_ddv_hwmon_cache_destroy(void *data)
 {
        struct dell_wmi_ddv_sensors *sensors = data;
 
+       sensors->active = false;
        mutex_destroy(&sensors->lock);
        kfree(sensors->obj);
 }
@@ -549,6 +553,7 @@ static struct hwmon_channel_info *dell_wmi_ddv_channel_init(struct wmi_device *w
                return ERR_PTR(ret);
 
        mutex_init(&sensors->lock);
+       sensors->active = true;
 
        ret = devm_add_action_or_reset(&wdev->dev, dell_wmi_ddv_hwmon_cache_destroy, sensors);
        if (ret < 0)
@@ -659,7 +664,8 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char
        if (ret < 0)
                return ret;
 
-       return sysfs_emit(buf, "%d\n", DIV_ROUND_CLOSEST(value, 10));
+       /* Use 2731 instead of 2731.5 to avoid unnecessary rounding */
+       return sysfs_emit(buf, "%d\n", value - 2731);
 }
 
 static ssize_t eppid_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -852,7 +858,7 @@ static int dell_wmi_ddv_resume(struct device *dev)
 {
        struct dell_wmi_ddv_data *data = dev_get_drvdata(dev);
 
-       /* Force re-reading of all sensors */
+       /* Force re-reading of all active sensors */
        dell_wmi_ddv_hwmon_cache_invalidate(&data->fans);
        dell_wmi_ddv_hwmon_cache_invalidate(&data->temps);
 
index 309eab9c055888a7affde7ed898a131f072c5f9c..322237e056f32e585e342bf8c27934c39c6fce84 100644 (file)
@@ -159,9 +159,10 @@ static const struct int3472_tps68470_board_data surface_go_tps68470_board_data =
 static const struct int3472_tps68470_board_data surface_go3_tps68470_board_data = {
        .dev_name = "i2c-INT3472:01",
        .tps68470_regulator_pdata = &surface_go_tps68470_pdata,
-       .n_gpiod_lookups = 1,
+       .n_gpiod_lookups = 2,
        .tps68470_gpio_lookup_tables = {
-               &surface_go_int347a_gpios
+               &surface_go_int347a_gpios,
+               &surface_go_int347e_gpios,
        },
 };
 
index a7e02b24a87ad7dc02c1533f1555d8362f49501d..0954a04623edfff38d91cde7f22d7d2d3464e7f2 100644 (file)
@@ -47,7 +47,7 @@ struct isst_cmd_set_req_type {
 
 static const struct isst_valid_cmd_ranges isst_valid_cmds[] = {
        {0xD0, 0x00, 0x03},
-       {0x7F, 0x00, 0x0B},
+       {0x7F, 0x00, 0x0C},
        {0x7F, 0x10, 0x12},
        {0x7F, 0x20, 0x23},
        {0x94, 0x03, 0x03},
@@ -112,6 +112,7 @@ static void isst_delete_hash(void)
  * isst_store_cmd() - Store command to a hash table
  * @cmd: Mailbox command.
  * @sub_cmd: Mailbox sub-command or MSR id.
+ * @cpu: Target CPU for the command
  * @mbox_cmd_type: Mailbox or MSR command.
  * @param: Mailbox parameter.
  * @data: Mailbox request data or MSR data.
@@ -363,7 +364,7 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
 /**
  * isst_if_get_pci_dev() - Get the PCI device instance for a CPU
  * @cpu: Logical CPU number.
- * @bus_number: The bus number assigned by the hardware.
+ * @bus_no: The bus number assigned by the hardware.
  * @dev: The device number assigned by the hardware.
  * @fn: The function number assigned by the hardware.
  *
index fdecdae248d7787200c3e09077b4df74d3f70a67..35ff506b402e1110b31274a9df09b704ad521d47 100644 (file)
@@ -40,6 +40,7 @@
  * @offset:    Offset to the first valid member in command structure.
  *             This will be the offset of the start of the command
  *             after command count field
+ * @owner:     Registered module owner
  * @cmd_callback: Callback function to handle IOCTL. The callback has the
  *             command pointer with data for command. There is a pointer
  *             called write_only, which when set, will not copy the
index c60733261c89c9cf106088814126aca6cb70f438..c999732b0f1e5d3faf8c74bac78829efb82a14e1 100644 (file)
@@ -209,14 +209,14 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info,
        if (!name)
                return -EOPNOTSUPP;
 
-       feature_vsec_dev = kzalloc(sizeof(*feature_vsec_dev), GFP_KERNEL);
-       if (!feature_vsec_dev)
+       res = kcalloc(pfs->pfs_header.num_entries, sizeof(*res), GFP_KERNEL);
+       if (!res)
                return -ENOMEM;
 
-       res = kcalloc(pfs->pfs_header.num_entries, sizeof(*res), GFP_KERNEL);
-       if (!res) {
+       feature_vsec_dev = kzalloc(sizeof(*feature_vsec_dev), GFP_KERNEL);
+       if (!feature_vsec_dev) {
                ret = -ENOMEM;
-               goto free_vsec;
+               goto free_res;
        }
 
        snprintf(feature_id_name, sizeof(feature_id_name), "tpmi-%s", name);
@@ -239,6 +239,8 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info,
        /*
         * intel_vsec_add_aux() is resource managed, no explicit
         * delete is required on error or on module unload.
+        * feature_vsec_dev memory is also freed as part of device
+        * delete.
         */
        ret = intel_vsec_add_aux(vsec_dev->pcidev, &vsec_dev->auxdev.dev,
                                 feature_vsec_dev, feature_id_name);
@@ -249,8 +251,6 @@ static int tpmi_create_device(struct intel_tpmi_info *tpmi_info,
 
 free_res:
        kfree(res);
-free_vsec:
-       kfree(feature_vsec_dev);
 
        return ret;
 }
index 7b6779cdb13493887474f7fea868feda8d2f24ab..67367f010139e21fbde7b3a345c80c36ab1bc31a 100644 (file)
@@ -5980,7 +5980,7 @@ MODULE_DEVICE_TABLE(dmi, mlxplat_dmi_table);
 static int mlxplat_mlxcpld_verify_bus_topology(int *nr)
 {
        struct i2c_adapter *search_adap;
-       int shift, i;
+       int i, shift = 0;
 
        /* Scan adapters from expected id to verify it is free. */
        *nr = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR;
index cc5b2e22b42ac277913490a9e3e54bc0175ee05a..f3d7c1da299fef86b86b58f297da60234df26b50 100644 (file)
@@ -1207,13 +1207,13 @@ __power_supply_register(struct device *parent,
        struct power_supply *psy;
        int rc;
 
+       if (!desc || !desc->name || !desc->properties || !desc->num_properties)
+               return ERR_PTR(-EINVAL);
+
        if (!parent)
                pr_warn("%s: Expected proper parent device for '%s'\n",
                        __func__, desc->name);
 
-       if (!desc || !desc->name || !desc->properties || !desc->num_properties)
-               return ERR_PTR(-EINVAL);
-
        if (psy_has_property(desc, POWER_SUPPLY_PROP_USB_TYPE) &&
            (!desc->usb_types || !desc->num_usb_types))
                return ERR_PTR(-EINVAL);
index ec31f887184fd2d2b783299e5835ecbc19dcab01..de77df97b3a448109bc67382cacbd838d0a28088 100644 (file)
@@ -1126,8 +1126,7 @@ static void qcom_battmgr_sm8350_callback(struct qcom_battmgr *battmgr,
                        battmgr->info.charge_type = le32_to_cpu(resp->intval.value);
                        break;
                case BATT_CAPACITY:
-                       battmgr->status.percent = le32_to_cpu(resp->intval.value);
-                       do_div(battmgr->status.percent, 100);
+                       battmgr->status.percent = le32_to_cpu(resp->intval.value) / 100;
                        break;
                case BATT_VOLT_OCV:
                        battmgr->status.voltage_ocv = le32_to_cpu(resp->intval.value);
index bc6adda588835849c6c98cbdb2216fd656b8f2e2..a27673706c3d600d1d653493a34bb301bbcc578c 100644 (file)
@@ -143,6 +143,8 @@ static const struct x86_cpu_id pl4_support_ids[] = {
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY },
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
        { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE, X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE_L, X86_FEATURE_ANY },
        {}
 };
 
index e180dee0f83d04c84c9cf54f4a1a6c2f0de49720..52c32dcbf7d846ac994559f6d111e0d2c287794f 100644 (file)
@@ -679,4 +679,3 @@ fs_initcall(powercap_init);
 
 MODULE_DESCRIPTION("PowerCap sysfs Driver");
 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
index ad37bc46f2721424481e386da0af1a1ab6f25afe..507ff0d5f7bd88fa80d6168330a8efde4b2e5dfc 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (C) ST-Ericsson SA 2010
  *
  * Author: Arun R Murthy <arun.murthy@stericsson.com>
+ * Datasheet: https://web.archive.org/web/20130614115108/http://www.stericsson.com/developers/CD00291561_UM1031_AB8500_user_manual-rev5_CTDS_public.pdf
  */
 #include <linux/err.h>
 #include <linux/platform_device.h>
@@ -20,6 +21,8 @@
 #define AB8500_PWM_OUT_CTRL2_REG       0x61
 #define AB8500_PWM_OUT_CTRL7_REG       0x66
 
+#define AB8500_PWM_CLKRATE 9600000
+
 struct ab8500_pwm_chip {
        struct pwm_chip chip;
        unsigned int hwid;
@@ -35,13 +38,60 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 {
        int ret;
        u8 reg;
-       unsigned int higher_val, lower_val;
+       u8 higher_val, lower_val;
+       unsigned int duty_steps, div;
        struct ab8500_pwm_chip *ab8500 = ab8500_pwm_from_chip(chip);
 
        if (state->polarity != PWM_POLARITY_NORMAL)
                return -EINVAL;
 
-       if (!state->enabled) {
+       if (state->enabled) {
+               /*
+                * A time quantum is
+                *   q = (32 - FreqPWMOutx[3:0]) / AB8500_PWM_CLKRATE
+                * The period is always 1024 q, duty_cycle is between 1q and 1024q.
+                *
+                * FreqPWMOutx[3:0] | output frequency | output frequency | 1024q = period
+                *                  | (from manual)    |   (1 / 1024q)    | = 1 / freq
+                * -----------------+------------------+------------------+--------------
+                *      b0000       |      293 Hz      |  292.968750 Hz   | 3413333.33 ns
+                *      b0001       |      302 Hz      |  302.419355 Hz   | 3306666.66 ns
+                *      b0010       |      312 Hz      |  312.500000 Hz   | 3200000    ns
+                *      b0011       |      323 Hz      |  323.275862 Hz   | 3093333.33 ns
+                *      b0100       |      334 Hz      |  334.821429 Hz   | 2986666.66 ns
+                *      b0101       |      347 Hz      |  347.222222 Hz   | 2880000    ns
+                *      b0110       |      360 Hz      |  360.576923 Hz   | 2773333.33 ns
+                *      b0111       |      375 Hz      |  375.000000 Hz   | 2666666.66 ns
+                *      b1000       |      390 Hz      |  390.625000 Hz   | 2560000    ns
+                *      b1001       |      407 Hz      |  407.608696 Hz   | 2453333.33 ns
+                *      b1010       |      426 Hz      |  426.136364 Hz   | 2346666.66 ns
+                *      b1011       |      446 Hz      |  446.428571 Hz   | 2240000    ns
+                *      b1100       |      468 Hz      |  468.750000 Hz   | 2133333.33 ns
+                *      b1101       |      493 Hz      |  493.421053 Hz   | 2026666.66 ns
+                *      b1110       |      520 Hz      |  520.833333 Hz   | 1920000    ns
+                *      b1111       |      551 Hz      |  551.470588 Hz   | 1813333.33 ns
+                *
+                *
+                * AB8500_PWM_CLKRATE is a multiple of 1024, so the division by
+                * 1024 can be done in this factor without loss of precision.
+                */
+               div = min_t(u64, mul_u64_u64_div_u64(state->period,
+                                                    AB8500_PWM_CLKRATE >> 10,
+                                                    NSEC_PER_SEC), 32); /* 32 - FreqPWMOutx[3:0] */
+               if (div <= 16)
+                       /* requested period < 3413333.33 */
+                       return -EINVAL;
+
+               duty_steps = max_t(u64, mul_u64_u64_div_u64(state->duty_cycle,
+                                                           AB8500_PWM_CLKRATE,
+                                                           (u64)NSEC_PER_SEC * div), 1024);
+       }
+
+       /*
+        * The hardware doesn't support duty_steps = 0 explicitly, but emits low
+        * when disabled.
+        */
+       if (!state->enabled || duty_steps == 0) {
                ret = abx500_mask_and_set_register_interruptible(chip->dev,
                                        AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
                                        1 << ab8500->hwid, 0);
@@ -53,28 +103,29 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        /*
-        * get the first 8 bits that are be written to
+        * The lower 8 bits of duty_steps is written to ...
         * AB8500_PWM_OUT_CTRL1_REG[0:7]
         */
-       lower_val = state->duty_cycle & 0x00FF;
+       lower_val = (duty_steps - 1) & 0x00ff;
        /*
-        * get bits [9:10] that are to be written to
-        * AB8500_PWM_OUT_CTRL2_REG[0:1]
+        * The two remaining high bits to
+        * AB8500_PWM_OUT_CTRL2_REG[0:1]; together with FreqPWMOutx.
         */
-       higher_val = ((state->duty_cycle & 0x0300) >> 8);
+       higher_val = ((duty_steps - 1) & 0x0300) >> 8 | (32 - div) << 4;
 
        reg = AB8500_PWM_OUT_CTRL1_REG + (ab8500->hwid * 2);
 
        ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC,
-                       reg, (u8)lower_val);
+                       reg, lower_val);
        if (ret < 0)
                return ret;
 
        ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC,
-                       (reg + 1), (u8)higher_val);
+                       (reg + 1), higher_val);
        if (ret < 0)
                return ret;
 
+       /* enable */
        ret = abx500_mask_and_set_register_interruptible(chip->dev,
                                AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
                                1 << ab8500->hwid, 1 << ab8500->hwid);
@@ -85,8 +136,51 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        return ret;
 }
 
+static int ab8500_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                               struct pwm_state *state)
+{
+       u8 ctrl7, lower_val, higher_val;
+       int ret;
+       struct ab8500_pwm_chip *ab8500 = ab8500_pwm_from_chip(chip);
+       unsigned int div, duty_steps;
+
+       ret = abx500_get_register_interruptible(chip->dev, AB8500_MISC,
+                                               AB8500_PWM_OUT_CTRL7_REG,
+                                               &ctrl7);
+       if (ret)
+               return ret;
+
+       state->polarity = PWM_POLARITY_NORMAL;
+
+       if (!(ctrl7 & 1 << ab8500->hwid)) {
+               state->enabled = false;
+               return 0;
+       }
+
+       ret = abx500_get_register_interruptible(chip->dev, AB8500_MISC,
+                                               AB8500_PWM_OUT_CTRL1_REG + (ab8500->hwid * 2),
+                                               &lower_val);
+       if (ret)
+               return ret;
+
+       ret = abx500_get_register_interruptible(chip->dev, AB8500_MISC,
+                                               AB8500_PWM_OUT_CTRL2_REG + (ab8500->hwid * 2),
+                                               &higher_val);
+       if (ret)
+               return ret;
+
+       div = 32 - ((higher_val & 0xf0) >> 4);
+       duty_steps = ((higher_val & 3) << 8 | lower_val) + 1;
+
+       state->period = DIV64_U64_ROUND_UP((u64)div << 10, AB8500_PWM_CLKRATE);
+       state->duty_cycle = DIV64_U64_ROUND_UP((u64)div * duty_steps, AB8500_PWM_CLKRATE);
+
+       return 0;
+}
+
 static const struct pwm_ops ab8500_pwm_ops = {
        .apply = ab8500_pwm_apply,
+       .get_state = ab8500_pwm_get_state,
        .owner = THIS_MODULE,
 };
 
index bd2308812096d60812c927a2eb0b258544830b23..3bbb26c862c35c6f866d6129c2a488ff9979cab6 100644 (file)
@@ -198,20 +198,35 @@ static const struct pwm_ops dwc_pwm_ops = {
        .owner = THIS_MODULE,
 };
 
+static struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
+{
+       struct dwc_pwm *dwc;
+
+       dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
+       if (!dwc)
+               return NULL;
+
+       dwc->chip.dev = dev;
+       dwc->chip.ops = &dwc_pwm_ops;
+       dwc->chip.npwm = DWC_TIMERS_TOTAL;
+
+       dev_set_drvdata(dev, dwc);
+       return dwc;
+}
+
 static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
        struct device *dev = &pci->dev;
        struct dwc_pwm *dwc;
        int ret;
 
-       dwc = devm_kzalloc(&pci->dev, sizeof(*dwc), GFP_KERNEL);
+       dwc = dwc_pwm_alloc(dev);
        if (!dwc)
                return -ENOMEM;
 
        ret = pcim_enable_device(pci);
        if (ret) {
-               dev_err(&pci->dev,
-                       "Failed to enable device (%pe)\n", ERR_PTR(ret));
+               dev_err(dev, "Failed to enable device (%pe)\n", ERR_PTR(ret));
                return ret;
        }
 
@@ -219,24 +234,17 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 
        ret = pcim_iomap_regions(pci, BIT(0), pci_name(pci));
        if (ret) {
-               dev_err(&pci->dev,
-                       "Failed to iomap PCI BAR (%pe)\n", ERR_PTR(ret));
+               dev_err(dev, "Failed to iomap PCI BAR (%pe)\n", ERR_PTR(ret));
                return ret;
        }
 
        dwc->base = pcim_iomap_table(pci)[0];
        if (!dwc->base) {
-               dev_err(&pci->dev, "Base address missing\n");
+               dev_err(dev, "Base address missing\n");
                return -ENOMEM;
        }
 
-       pci_set_drvdata(pci, dwc);
-
-       dwc->chip.dev = dev;
-       dwc->chip.ops = &dwc_pwm_ops;
-       dwc->chip.npwm = DWC_TIMERS_TOTAL;
-
-       ret = pwmchip_add(&dwc->chip);
+       ret = devm_pwmchip_add(dev, &dwc->chip);
        if (ret)
                return ret;
 
@@ -248,12 +256,8 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 
 static void dwc_pwm_remove(struct pci_dev *pci)
 {
-       struct dwc_pwm *dwc = pci_get_drvdata(pci);
-
        pm_runtime_forbid(&pci->dev);
        pm_runtime_get_noresume(&pci->dev);
-
-       pwmchip_remove(&dwc->chip);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 4987ca940b648c7f5bd68c866e742e5e165758dd..8362b4870c66c464b7012af7cb8be349f3760dc8 100644 (file)
@@ -55,8 +55,8 @@ static int iqs620_pwm_init(struct iqs620_pwm_private *iqs620_pwm,
        if (ret)
                return ret;
 
-       return regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
-                                 IQS620_PWR_SETTINGS_PWM_OUT, 0xff);
+       return regmap_set_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
+                              IQS620_PWR_SETTINGS_PWM_OUT);
 }
 
 static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
index 215ef90691144601be0777490a761aa390fd2242..35675e4058c6bb71601d0a1b34ca06d36326d9f9 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/i2c.h>
 #include <linux/mfd/lp3943.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
index 62b6acc6373dbfdfdf156fa294d49a578bf41213..393a4b97fc19eb02cc4bd61991dba3252f890f34 100644 (file)
@@ -161,7 +161,13 @@ static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        mutex_lock(&ddata->lock);
        if (state->period != ddata->approx_period) {
-               if (ddata->user_count != 1) {
+               /*
+                * Don't let a 2nd user change the period underneath the 1st user.
+                * However if ddate->approx_period == 0 this is the first time we set
+                * any period, so let whoever gets here first set the period so other
+                * users who agree on the period won't fail.
+                */
+               if (ddata->user_count != 1 && ddata->approx_period) {
                        mutex_unlock(&ddata->lock);
                        return -EBUSY;
                }
index 514ff58a4471d664701243c203183260c551142d..f315fa106be872fe7cf88bbf1620a008fa65dcd2 100644 (file)
@@ -127,7 +127,7 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        /* ensure CMP & ARR registers are properly written */
        ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val,
-                                      (val & STM32_LPTIM_CMPOK_ARROK),
+                                      (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK,
                                       100, 1000);
        if (ret) {
                dev_err(priv->chip.dev, "ARR/CMP registers write issue\n");
index ae69e493913da2ec1782c8acf0f88386e407ed46..4fcd36055b0257d8b62e64a7476caba8a323fefa 100644 (file)
@@ -1584,7 +1584,7 @@ static int set_machine_constraints(struct regulator_dev *rdev)
        }
 
        if (rdev->desc->off_on_delay)
-               rdev->last_off = ktime_get();
+               rdev->last_off = ktime_get_boottime();
 
        /* If the constraints say the regulator should be on at this point
         * and we have control then make sure it is enabled.
@@ -2673,7 +2673,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
                 * this regulator was disabled.
                 */
                ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay);
-               s64 remaining = ktime_us_delta(end, ktime_get());
+               s64 remaining = ktime_us_delta(end, ktime_get_boottime());
 
                if (remaining > 0)
                        _regulator_delay_helper(remaining);
@@ -2912,7 +2912,7 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
        }
 
        if (rdev->desc->off_on_delay)
-               rdev->last_off = ktime_get();
+               rdev->last_off = ktime_get_boottime();
 
        trace_regulator_disable_complete(rdev_get_name(rdev));
 
index f0fb0f56e4207e2786360ec3b457ae7886155154..648e3641885a884b2da137d1e74efdd32a44b279 100644 (file)
@@ -193,7 +193,7 @@ static int max597x_get_status(struct regulator_dev *rdev)
 
        ret = regmap_read(rdev->regmap, MAX5970_REG_STATUS3, &val);
        if (ret)
-               return REGULATOR_FAILED_RETRY;
+               return ret;
 
        if (val & MAX5970_STATUS3_ALERT)
                return REGULATOR_STATUS_ERROR;
index 2ba72de0fa4719ae5d75d89812a5dd9ca469c50d..5a71579af0a14e032a36139ace3bc1e4946aa891 100644 (file)
@@ -1677,7 +1677,7 @@ config RTC_DRV_MPC5121
 config RTC_DRV_JZ4740
        tristate "Ingenic JZ4740 SoC"
        depends on MIPS || COMPILE_TEST
-       depends on OF
+       depends on OF && COMMON_CLK
        help
          If you say yes here you get support for the Ingenic JZ47xx SoCs RTC
          controllers.
@@ -1773,6 +1773,18 @@ config RTC_DRV_SNVS
           This driver can also be built as a module, if so, the module
           will be called "rtc-snvs".
 
+config RTC_DRV_BBNSM
+       tristate "NXP BBNSM RTC support"
+       select REGMAP_MMIO
+       depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
+       depends on OF
+       help
+          If you say yes here you get support for the NXP BBNSM RTC module.
+
+          This driver can also be built as a module, if so, the module
+          will be called "rtc-bbnsm".
+
 config RTC_DRV_IMX_SC
        depends on IMX_SCU
        depends on HAVE_ARM_SMCCC
index 59eb30289335573df9ed655ec438d80b53edcc65..ea445d1ebb172898dc71fd7a097e81385e610dc5 100644 (file)
@@ -33,6 +33,7 @@ obj-$(CONFIG_RTC_DRV_ASPEED)  += rtc-aspeed.o
 obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o
 obj-$(CONFIG_RTC_DRV_AU1XXX)   += rtc-au1xxx.o
+obj-$(CONFIG_RTC_DRV_BBNSM)    += rtc-nxp-bbnsm.o
 obj-$(CONFIG_RTC_DRV_BD70528)  += rtc-bd70528.o
 obj-$(CONFIG_RTC_DRV_BQ32K)    += rtc-bq32k.o
 obj-$(CONFIG_RTC_DRV_BQ4802)   += rtc-bq4802.o
index 7c30cb3c764d83c9c85c16182874182709a81635..499d89150afc9a54299a4a8dab7d8aabe60c7847 100644 (file)
@@ -392,7 +392,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
                return err;
        if (!rtc->ops) {
                err = -ENODEV;
-       } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->read_alarm) {
+       } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) {
                err = -EINVAL;
        } else {
                memset(alarm, 0, sizeof(struct rtc_wkalrm));
index 2f8deb8c4cd3e680001ec7e0289bd4372be8cf72..34611f6dedcba8a7660821f13f3b6b333ed81dff 100644 (file)
@@ -536,9 +536,14 @@ static int abeoz9_probe(struct i2c_client *client)
        clear_bit(RTC_FEATURE_ALARM, data->rtc->features);
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                ret = devm_request_threaded_irq(dev, client->irq, NULL,
                                                abeoz9_rtc_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                dev_name(dev), dev);
                if (ret) {
                        dev_err(dev, "failed to request alarm irq\n");
index 2e0e6432901b844aeb301375d4252c7d8c5e4f66..f34a2e59cac765380d50a93f7892690cd72afa41 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
 #include <linux/i2c.h>
 #include <linux/kstrtox.h>
 #include <linux/module.h>
 #define ABX8XX_TRICKLE_STANDARD_DIODE  0x8
 #define ABX8XX_TRICKLE_SCHOTTKY_DIODE  0x4
 
+#define ABX8XX_REG_EXTRAM      0x3f
+#define ABX8XX_EXTRAM_XADS     GENMASK(1, 0)
+
+#define ABX8XX_SRAM_BASE       0x40
+#define ABX8XX_SRAM_WIN_SIZE   0x40
+#define ABX8XX_RAM_SIZE                256
+
+#define NVMEM_ADDR_LOWER       GENMASK(5, 0)
+#define NVMEM_ADDR_UPPER       GENMASK(7, 6)
+
 static u8 trickle_resistors[] = {0, 3, 6, 11};
 
 enum abx80x_chip {AB0801, AB0803, AB0804, AB0805,
@@ -674,6 +685,68 @@ static int abx80x_setup_watchdog(struct abx80x_priv *priv)
 }
 #endif
 
+static int abx80x_nvmem_xfer(struct abx80x_priv *priv, unsigned int offset,
+                            void *val, size_t bytes, bool write)
+{
+       int ret;
+
+       while (bytes) {
+               u8 extram, reg, len, lower, upper;
+
+               lower = FIELD_GET(NVMEM_ADDR_LOWER, offset);
+               upper = FIELD_GET(NVMEM_ADDR_UPPER, offset);
+               extram = FIELD_PREP(ABX8XX_EXTRAM_XADS, upper);
+               reg = ABX8XX_SRAM_BASE + lower;
+               len = min(lower + bytes, (size_t)ABX8XX_SRAM_WIN_SIZE) - lower;
+               len = min_t(u8, len, I2C_SMBUS_BLOCK_MAX);
+
+               ret = i2c_smbus_write_byte_data(priv->client, ABX8XX_REG_EXTRAM,
+                                               extram);
+               if (ret)
+                       return ret;
+
+               if (write)
+                       ret = i2c_smbus_write_i2c_block_data(priv->client, reg,
+                                                            len, val);
+               else
+                       ret = i2c_smbus_read_i2c_block_data(priv->client, reg,
+                                                           len, val);
+               if (ret)
+                       return ret;
+
+               offset += len;
+               val += len;
+               bytes -= len;
+       }
+
+       return 0;
+}
+
+static int abx80x_nvmem_read(void *priv, unsigned int offset, void *val,
+                            size_t bytes)
+{
+       return abx80x_nvmem_xfer(priv, offset, val, bytes, false);
+}
+
+static int abx80x_nvmem_write(void *priv, unsigned int offset, void *val,
+                             size_t bytes)
+{
+       return abx80x_nvmem_xfer(priv, offset, val, bytes, true);
+}
+
+static int abx80x_setup_nvmem(struct abx80x_priv *priv)
+{
+       struct nvmem_config config = {
+               .type = NVMEM_TYPE_BATTERY_BACKED,
+               .reg_read = abx80x_nvmem_read,
+               .reg_write = abx80x_nvmem_write,
+               .size = ABX8XX_RAM_SIZE,
+               .priv = priv,
+       };
+
+       return devm_rtc_nvmem_register(priv->rtc, &config);
+}
+
 static const struct i2c_device_id abx80x_id[] = {
        { "abx80x", ABX80X },
        { "ab0801", AB0801 },
@@ -840,6 +913,10 @@ static int abx80x_probe(struct i2c_client *client)
                        return err;
        }
 
+       err = abx80x_setup_nvmem(priv);
+       if (err)
+               return err;
+
        if (client->irq > 0) {
                dev_info(&client->dev, "IRQ %d supplied\n", client->irq);
                err = devm_request_threaded_irq(&client->dev, client->irq, NULL,
index c74130e8f496d1d37cecf8eecf3a9aa1f4264277..1efa81cecc273fb3046b7e365c1d130a7a1f679b 100644 (file)
@@ -27,13 +27,17 @@ struct brcmstb_waketmr {
        struct rtc_device *rtc;
        struct device *dev;
        void __iomem *base;
-       int irq;
+       unsigned int wake_irq;
+       unsigned int alarm_irq;
        struct notifier_block reboot_notifier;
        struct clk *clk;
        u32 rate;
+       unsigned long rtc_alarm;
+       bool alarm_en;
 };
 
 #define BRCMSTB_WKTMR_EVENT            0x00
+#define  WKTMR_ALARM_EVENT             BIT(0)
 #define BRCMSTB_WKTMR_COUNTER          0x04
 #define BRCMSTB_WKTMR_ALARM            0x08
 #define BRCMSTB_WKTMR_PRESCALER                0x0C
@@ -41,28 +45,71 @@ struct brcmstb_waketmr {
 
 #define BRCMSTB_WKTMR_DEFAULT_FREQ     27000000
 
+static inline bool brcmstb_waketmr_is_pending(struct brcmstb_waketmr *timer)
+{
+       u32 reg;
+
+       reg = readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
+       return !!(reg & WKTMR_ALARM_EVENT);
+}
+
 static inline void brcmstb_waketmr_clear_alarm(struct brcmstb_waketmr *timer)
 {
-       writel_relaxed(1, timer->base + BRCMSTB_WKTMR_EVENT);
+       u32 reg;
+
+       if (timer->alarm_en && timer->alarm_irq)
+               disable_irq(timer->alarm_irq);
+       timer->alarm_en = false;
+       reg = readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER);
+       writel_relaxed(reg - 1, timer->base + BRCMSTB_WKTMR_ALARM);
+       writel_relaxed(WKTMR_ALARM_EVENT, timer->base + BRCMSTB_WKTMR_EVENT);
        (void)readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
 }
 
 static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
                                      unsigned int secs)
 {
+       unsigned int now;
+
        brcmstb_waketmr_clear_alarm(timer);
 
        /* Make sure we are actually counting in seconds */
        writel_relaxed(timer->rate, timer->base + BRCMSTB_WKTMR_PRESCALER);
 
-       writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
+       writel_relaxed(secs, timer->base + BRCMSTB_WKTMR_ALARM);
+       now = readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER);
+
+       while ((int)(secs - now) <= 0 &&
+               !brcmstb_waketmr_is_pending(timer)) {
+               secs = now + 1;
+               writel_relaxed(secs, timer->base + BRCMSTB_WKTMR_ALARM);
+               now = readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER);
+       }
 }
 
 static irqreturn_t brcmstb_waketmr_irq(int irq, void *data)
 {
        struct brcmstb_waketmr *timer = data;
 
-       pm_wakeup_event(timer->dev, 0);
+       if (!timer->alarm_irq)
+               pm_wakeup_event(timer->dev, 0);
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t brcmstb_alarm_irq(int irq, void *data)
+{
+       struct brcmstb_waketmr *timer = data;
+
+       /* Ignore spurious interrupts */
+       if (!brcmstb_waketmr_is_pending(timer))
+               return IRQ_HANDLED;
+
+       if (timer->alarm_en) {
+               if (!device_may_wakeup(timer->dev))
+                       writel_relaxed(WKTMR_ALARM_EVENT,
+                                      timer->base + BRCMSTB_WKTMR_EVENT);
+               rtc_update_irq(timer->rtc, 1, RTC_IRQF | RTC_AF);
+       }
 
        return IRQ_HANDLED;
 }
@@ -88,17 +135,25 @@ static void wktmr_read(struct brcmstb_waketmr *timer,
 static int brcmstb_waketmr_prepare_suspend(struct brcmstb_waketmr *timer)
 {
        struct device *dev = timer->dev;
-       int ret = 0;
+       int ret;
 
        if (device_may_wakeup(dev)) {
-               ret = enable_irq_wake(timer->irq);
+               ret = enable_irq_wake(timer->wake_irq);
                if (ret) {
                        dev_err(dev, "failed to enable wake-up interrupt\n");
                        return ret;
                }
+               if (timer->alarm_en && timer->alarm_irq) {
+                       ret = enable_irq_wake(timer->alarm_irq);
+                       if (ret) {
+                               dev_err(dev, "failed to enable rtc interrupt\n");
+                               disable_irq_wake(timer->wake_irq);
+                               return ret;
+                       }
+               }
        }
 
-       return ret;
+       return 0;
 }
 
 /* If enabled as a wakeup-source, arm the timer when powering off */
@@ -146,46 +201,47 @@ static int brcmstb_waketmr_getalarm(struct device *dev,
                                    struct rtc_wkalrm *alarm)
 {
        struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
-       time64_t sec;
-       u32 reg;
 
-       sec = readl_relaxed(timer->base + BRCMSTB_WKTMR_ALARM);
-       if (sec != 0) {
-               /* Alarm is enabled */
-               alarm->enabled = 1;
-               rtc_time64_to_tm(sec, &alarm->time);
-       }
+       alarm->enabled = timer->alarm_en;
+       rtc_time64_to_tm(timer->rtc_alarm, &alarm->time);
 
-       reg = readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
-       alarm->pending = !!(reg & 1);
+       alarm->pending = brcmstb_waketmr_is_pending(timer);
 
        return 0;
 }
 
-static int brcmstb_waketmr_setalarm(struct device *dev,
-                                    struct rtc_wkalrm *alarm)
+static int brcmstb_waketmr_alarm_enable(struct device *dev,
+                                       unsigned int enabled)
 {
        struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
-       time64_t sec;
-
-       if (alarm->enabled)
-               sec = rtc_tm_to_time64(&alarm->time);
-       else
-               sec = 0;
 
-       brcmstb_waketmr_set_alarm(timer, sec);
+       if (enabled && !timer->alarm_en) {
+               if ((int)(readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER) -
+                   readl_relaxed(timer->base + BRCMSTB_WKTMR_ALARM)) >= 0 &&
+                   !brcmstb_waketmr_is_pending(timer))
+                       return -EINVAL;
+               timer->alarm_en = true;
+               if (timer->alarm_irq)
+                       enable_irq(timer->alarm_irq);
+       } else if (!enabled && timer->alarm_en) {
+               if (timer->alarm_irq)
+                       disable_irq(timer->alarm_irq);
+               timer->alarm_en = false;
+       }
 
        return 0;
 }
 
-/*
- * Does not do much but keep the RTC class happy. We always support
- * alarms.
- */
-static int brcmstb_waketmr_alarm_enable(struct device *dev,
-                                       unsigned int enabled)
+static int brcmstb_waketmr_setalarm(struct device *dev,
+                                    struct rtc_wkalrm *alarm)
 {
-       return 0;
+       struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+
+       timer->rtc_alarm = rtc_tm_to_time64(&alarm->time);
+
+       brcmstb_waketmr_set_alarm(timer, timer->rtc_alarm);
+
+       return brcmstb_waketmr_alarm_enable(dev, alarm->enabled);
 }
 
 static const struct rtc_class_ops brcmstb_waketmr_ops = {
@@ -221,12 +277,12 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev)
         * Set wakeup capability before requesting wakeup interrupt, so we can
         * process boot-time "wakeups" (e.g., from S5 soft-off)
         */
-       device_set_wakeup_capable(dev, true);
-       device_wakeup_enable(dev);
+       device_init_wakeup(dev, true);
 
-       timer->irq = platform_get_irq(pdev, 0);
-       if (timer->irq < 0)
+       ret = platform_get_irq(pdev, 0);
+       if (ret < 0)
                return -ENODEV;
+       timer->wake_irq = (unsigned int)ret;
 
        timer->clk = devm_clk_get(dev, NULL);
        if (!IS_ERR(timer->clk)) {
@@ -241,11 +297,24 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev)
                timer->clk = NULL;
        }
 
-       ret = devm_request_irq(dev, timer->irq, brcmstb_waketmr_irq, 0,
+       ret = devm_request_irq(dev, timer->wake_irq, brcmstb_waketmr_irq, 0,
                               "brcmstb-waketimer", timer);
        if (ret < 0)
                goto err_clk;
 
+       brcmstb_waketmr_clear_alarm(timer);
+
+       /* Attempt to initialize non-wake irq */
+       ret = platform_get_irq(pdev, 1);
+       if (ret > 0) {
+               timer->alarm_irq = (unsigned int)ret;
+               ret = devm_request_irq(dev, timer->alarm_irq, brcmstb_alarm_irq,
+                                      IRQF_NO_AUTOEN, "brcmstb-waketimer-rtc",
+                                      timer);
+               if (ret < 0)
+                       timer->alarm_irq = 0;
+       }
+
        timer->reboot_notifier.notifier_call = brcmstb_waketmr_reboot;
        register_reboot_notifier(&timer->reboot_notifier);
 
@@ -256,8 +325,6 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev)
        if (ret)
                goto err_notifier;
 
-       dev_info(dev, "registered, with irq %d\n", timer->irq);
-
        return 0;
 
 err_notifier:
@@ -295,7 +362,9 @@ static int brcmstb_waketmr_resume(struct device *dev)
        if (!device_may_wakeup(dev))
                return 0;
 
-       ret = disable_irq_wake(timer->irq);
+       ret = disable_irq_wake(timer->wake_irq);
+       if (timer->alarm_en && timer->alarm_irq)
+               disable_irq_wake(timer->alarm_irq);
 
        brcmstb_waketmr_clear_alarm(timer);
 
@@ -325,4 +394,5 @@ module_platform_driver(brcmstb_waketmr_driver);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Brian Norris");
 MODULE_AUTHOR("Markus Mayer");
+MODULE_AUTHOR("Doug Berger");
 MODULE_DESCRIPTION("Wake-up timer driver for STB chips");
index def9b7f9d9577e9dad4052b781f5bfaddbfc5813..e86ba84df6cbedd1f14f02410dedeb15f9aa3c06 100644 (file)
@@ -1712,9 +1712,9 @@ static const struct regmap_config regmap_config = {
        .val_bits = 8,
 };
 
-static int ds1307_probe(struct i2c_client *client,
-                       const struct i2c_device_id *id)
+static int ds1307_probe(struct i2c_client *client)
 {
+       const struct i2c_device_id *id = i2c_client_get_device_id(client);
        struct ds1307           *ds1307;
        const void              *match;
        int                     err = -ENODEV;
@@ -2011,7 +2011,7 @@ static struct i2c_driver ds1307_driver = {
                .name   = "rtc-ds1307",
                .of_match_table = ds1307_of_match,
        },
-       .probe          = ds1307_probe,
+       .probe_new      = ds1307_probe,
        .id_table       = ds1307_id,
 };
 
index 1e8bc6cc1e12d85583e674fe476fb787bab635b0..dc6b0f4a54e2ea92ac865dbdbde89ec26a6c7e0c 100644 (file)
@@ -164,7 +164,7 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
 
        if (status != EFI_SUCCESS) {
                /* should never happen */
-               dev_err(dev, "can't read time\n");
+               dev_err_once(dev, "can't read time\n");
                return -EINVAL;
        }
 
index cc710d682121bdeed76f509af7e345675ffec4e9..7d5a298a9a3bc678c671ede56880e8f82c2f8e98 100644 (file)
@@ -518,9 +518,14 @@ static int hym8563_probe(struct i2c_client *client)
        }
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, hym8563_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                client->name, hym8563);
                if (ret < 0) {
                        dev_err(&client->dev, "irq %d request failed, %d\n",
index a3b0de3393f5714fa8a68628539316a12c9ac95e..e68a79b5e00e586ac2a72b4ea2b0171d21129ebb 100644 (file)
@@ -8,16 +8,16 @@
  * by Alessandro Zummo <a.zummo@towertech.it>.
  */
 
-#include <linux/i2c.h>
 #include <linux/bcd.h>
-#include <linux/rtc.h>
-#include <linux/slab.h>
-#include <linux/module.h>
 #include <linux/err.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/regmap.h>
 #include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+
+#include <asm/byteorder.h>
 
 /* ISL register offsets */
 #define ISL12022_REG_SC                0x00
 
 #define ISL12022_BETA_TSE      (1 << 7)
 
-static struct i2c_driver isl12022_driver;
-
-struct isl12022 {
-       struct rtc_device *rtc;
-       struct regmap *regmap;
-};
-
 static umode_t isl12022_hwmon_is_visible(const void *data,
                                         enum hwmon_sensor_types type,
                                         u32 attr, int channel)
@@ -67,19 +60,17 @@ static umode_t isl12022_hwmon_is_visible(const void *data,
  */
 static int isl12022_hwmon_read_temp(struct device *dev, long *mC)
 {
-       struct isl12022 *isl12022 = dev_get_drvdata(dev);
-       struct regmap *regmap = isl12022->regmap;
-       u8 temp_buf[2];
+       struct regmap *regmap = dev_get_drvdata(dev);
        int temp, ret;
+       __le16 buf;
 
-       ret = regmap_bulk_read(regmap, ISL12022_REG_TEMP_L,
-                              temp_buf, sizeof(temp_buf));
+       ret = regmap_bulk_read(regmap, ISL12022_REG_TEMP_L, &buf, sizeof(buf));
        if (ret)
                return ret;
        /*
         * Temperature is represented as a 10-bit number, unit half-Kelvins.
         */
-       temp = (temp_buf[1] << 8) | temp_buf[0];
+       temp = le16_to_cpu(buf);
        temp *= 500;
        temp -= 273000;
 
@@ -115,23 +106,21 @@ static const struct hwmon_chip_info isl12022_hwmon_chip_info = {
 
 static void isl12022_hwmon_register(struct device *dev)
 {
-       struct isl12022 *isl12022;
+       struct regmap *regmap = dev_get_drvdata(dev);
        struct device *hwmon;
        int ret;
 
        if (!IS_REACHABLE(CONFIG_HWMON))
                return;
 
-       isl12022 = dev_get_drvdata(dev);
-
-       ret = regmap_update_bits(isl12022->regmap, ISL12022_REG_BETA,
+       ret = regmap_update_bits(regmap, ISL12022_REG_BETA,
                                 ISL12022_BETA_TSE, ISL12022_BETA_TSE);
        if (ret) {
                dev_warn(dev, "unable to enable temperature sensor\n");
                return;
        }
 
-       hwmon = devm_hwmon_device_register_with_info(dev, "isl12022", isl12022,
+       hwmon = devm_hwmon_device_register_with_info(dev, "isl12022", regmap,
                                                     &isl12022_hwmon_chip_info,
                                                     NULL);
        if (IS_ERR(hwmon))
@@ -144,8 +133,7 @@ static void isl12022_hwmon_register(struct device *dev)
  */
 static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       struct isl12022 *isl12022 = dev_get_drvdata(dev);
-       struct regmap *regmap = isl12022->regmap;
+       struct regmap *regmap = dev_get_drvdata(dev);
        uint8_t buf[ISL12022_REG_INT + 1];
        int ret;
 
@@ -155,16 +143,12 @@ static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        if (buf[ISL12022_REG_SR] & (ISL12022_SR_LBAT85 | ISL12022_SR_LBAT75)) {
                dev_warn(dev,
-                        "voltage dropped below %u%%, "
-                        "date and time is not reliable.\n",
+                        "voltage dropped below %u%%, date and time is not reliable.\n",
                         buf[ISL12022_REG_SR] & ISL12022_SR_LBAT85 ? 85 : 75);
        }
 
        dev_dbg(dev,
-               "%s: raw data is sec=%02x, min=%02x, hr=%02x, "
-               "mday=%02x, mon=%02x, year=%02x, wday=%02x, "
-               "sr=%02x, int=%02x",
-               __func__,
+               "raw data is sec=%02x, min=%02x, hr=%02x, mday=%02x, mon=%02x, year=%02x, wday=%02x, sr=%02x, int=%02x",
                buf[ISL12022_REG_SC],
                buf[ISL12022_REG_MN],
                buf[ISL12022_REG_HR],
@@ -190,8 +174,7 @@ static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-       struct isl12022 *isl12022 = dev_get_drvdata(dev);
-       struct regmap *regmap = isl12022->regmap;
+       struct regmap *regmap = dev_get_drvdata(dev);
        int ret;
        uint8_t buf[ISL12022_REG_DW + 1];
 
@@ -218,8 +201,7 @@ static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
        buf[ISL12022_REG_DW] = tm->tm_wday & 0x07;
 
-       return regmap_bulk_write(isl12022->regmap, ISL12022_REG_SC,
-                                buf, sizeof(buf));
+       return regmap_bulk_write(regmap, ISL12022_REG_SC, buf, sizeof(buf));
 }
 
 static const struct rtc_class_ops isl12022_rtc_ops = {
@@ -235,44 +217,39 @@ static const struct regmap_config regmap_config = {
 
 static int isl12022_probe(struct i2c_client *client)
 {
-       struct isl12022 *isl12022;
+       struct rtc_device *rtc;
+       struct regmap *regmap;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
 
-       isl12022 = devm_kzalloc(&client->dev, sizeof(struct isl12022),
-                               GFP_KERNEL);
-       if (!isl12022)
-               return -ENOMEM;
-       dev_set_drvdata(&client->dev, isl12022);
-
-       isl12022->regmap = devm_regmap_init_i2c(client, &regmap_config);
-       if (IS_ERR(isl12022->regmap)) {
+       regmap = devm_regmap_init_i2c(client, &regmap_config);
+       if (IS_ERR(regmap)) {
                dev_err(&client->dev, "regmap allocation failed\n");
-               return PTR_ERR(isl12022->regmap);
+               return PTR_ERR(regmap);
        }
 
+       dev_set_drvdata(&client->dev, regmap);
+
        isl12022_hwmon_register(&client->dev);
 
-       isl12022->rtc = devm_rtc_allocate_device(&client->dev);
-       if (IS_ERR(isl12022->rtc))
-               return PTR_ERR(isl12022->rtc);
+       rtc = devm_rtc_allocate_device(&client->dev);
+       if (IS_ERR(rtc))
+               return PTR_ERR(rtc);
 
-       isl12022->rtc->ops = &isl12022_rtc_ops;
-       isl12022->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
-       isl12022->rtc->range_max = RTC_TIMESTAMP_END_2099;
+       rtc->ops = &isl12022_rtc_ops;
+       rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+       rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       return devm_rtc_register_device(isl12022->rtc);
+       return devm_rtc_register_device(rtc);
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id isl12022_dt_match[] = {
        { .compatible = "isl,isl12022" }, /* for backward compat., don't use */
        { .compatible = "isil,isl12022" },
        { },
 };
 MODULE_DEVICE_TABLE(of, isl12022_dt_match);
-#endif
 
 static const struct i2c_device_id isl12022_id[] = {
        { "isl12022", 0 },
@@ -283,9 +260,7 @@ MODULE_DEVICE_TABLE(i2c, isl12022_id);
 static struct i2c_driver isl12022_driver = {
        .driver         = {
                .name   = "rtc-isl12022",
-#ifdef CONFIG_OF
-               .of_match_table = of_match_ptr(isl12022_dt_match),
-#endif
+               .of_match_table = isl12022_dt_match,
        },
        .probe_new      = isl12022_probe,
        .id_table       = isl12022_id,
index c383719292c7d17c4d38b03a227d93fdb4f1438c..59d279e3e6f5bb2c709d87c8cb8fed9bb9d55161 100644 (file)
@@ -6,12 +6,15 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
+#include <linux/property.h>
 #include <linux/reboot.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -25,6 +28,7 @@
 #define JZ_REG_RTC_WAKEUP_FILTER       0x24
 #define JZ_REG_RTC_RESET_COUNTER       0x28
 #define JZ_REG_RTC_SCRATCHPAD  0x34
+#define JZ_REG_RTC_CKPCR       0x40
 
 /* The following are present on the jz4780 */
 #define JZ_REG_RTC_WENR        0x3C
@@ -44,6 +48,9 @@
 #define JZ_RTC_WAKEUP_FILTER_MASK      0x0000FFE0
 #define JZ_RTC_RESET_COUNTER_MASK      0x00000FE0
 
+#define JZ_RTC_CKPCR_CK32PULL_DIS      BIT(4)
+#define JZ_RTC_CKPCR_CK32CTL_EN                (BIT(2) | BIT(1))
+
 enum jz4740_rtc_type {
        ID_JZ4740,
        ID_JZ4760,
@@ -56,6 +63,8 @@ struct jz4740_rtc {
 
        struct rtc_device *rtc;
 
+       struct clk_hw clk32k;
+
        spinlock_t lock;
 };
 
@@ -69,19 +78,15 @@ static inline uint32_t jz4740_rtc_reg_read(struct jz4740_rtc *rtc, size_t reg)
 static int jz4740_rtc_wait_write_ready(struct jz4740_rtc *rtc)
 {
        uint32_t ctrl;
-       int timeout = 10000;
 
-       do {
-               ctrl = jz4740_rtc_reg_read(rtc, JZ_REG_RTC_CTRL);
-       } while (!(ctrl & JZ_RTC_CTRL_WRDY) && --timeout);
-
-       return timeout ? 0 : -EIO;
+       return readl_poll_timeout(rtc->base + JZ_REG_RTC_CTRL, ctrl,
+                                 ctrl & JZ_RTC_CTRL_WRDY, 0, 1000);
 }
 
 static inline int jz4780_rtc_enable_write(struct jz4740_rtc *rtc)
 {
        uint32_t ctrl;
-       int ret, timeout = 10000;
+       int ret;
 
        ret = jz4740_rtc_wait_write_ready(rtc);
        if (ret != 0)
@@ -89,11 +94,8 @@ static inline int jz4780_rtc_enable_write(struct jz4740_rtc *rtc)
 
        writel(JZ_RTC_WENR_MAGIC, rtc->base + JZ_REG_RTC_WENR);
 
-       do {
-               ctrl = readl(rtc->base + JZ_REG_RTC_WENR);
-       } while (!(ctrl & JZ_RTC_WENR_WEN) && --timeout);
-
-       return timeout ? 0 : -EIO;
+       return readl_poll_timeout(rtc->base + JZ_REG_RTC_WENR, ctrl,
+                                 ctrl & JZ_RTC_WENR_WEN, 0, 1000);
 }
 
 static inline int jz4740_rtc_reg_write(struct jz4740_rtc *rtc, size_t reg,
@@ -260,6 +262,7 @@ static void jz4740_rtc_power_off(void)
 static const struct of_device_id jz4740_rtc_of_match[] = {
        { .compatible = "ingenic,jz4740-rtc", .data = (void *)ID_JZ4740 },
        { .compatible = "ingenic,jz4760-rtc", .data = (void *)ID_JZ4760 },
+       { .compatible = "ingenic,jz4770-rtc", .data = (void *)ID_JZ4780 },
        { .compatible = "ingenic,jz4780-rtc", .data = (void *)ID_JZ4780 },
        {},
 };
@@ -301,6 +304,38 @@ static void jz4740_rtc_set_wakeup_params(struct jz4740_rtc *rtc,
        jz4740_rtc_reg_write(rtc, JZ_REG_RTC_RESET_COUNTER, reset_ticks);
 }
 
+static int jz4740_rtc_clk32k_enable(struct clk_hw *hw)
+{
+       struct jz4740_rtc *rtc = container_of(hw, struct jz4740_rtc, clk32k);
+
+       return jz4740_rtc_reg_write(rtc, JZ_REG_RTC_CKPCR,
+                                   JZ_RTC_CKPCR_CK32PULL_DIS |
+                                   JZ_RTC_CKPCR_CK32CTL_EN);
+}
+
+static void jz4740_rtc_clk32k_disable(struct clk_hw *hw)
+{
+       struct jz4740_rtc *rtc = container_of(hw, struct jz4740_rtc, clk32k);
+
+       jz4740_rtc_reg_write(rtc, JZ_REG_RTC_CKPCR, 0);
+}
+
+static int jz4740_rtc_clk32k_is_enabled(struct clk_hw *hw)
+{
+       struct jz4740_rtc *rtc = container_of(hw, struct jz4740_rtc, clk32k);
+       u32 ckpcr;
+
+       ckpcr = jz4740_rtc_reg_read(rtc, JZ_REG_RTC_CKPCR);
+
+       return !!(ckpcr & JZ_RTC_CKPCR_CK32CTL_EN);
+}
+
+static const struct clk_ops jz4740_rtc_clk32k_ops = {
+       .enable = jz4740_rtc_clk32k_enable,
+       .disable = jz4740_rtc_clk32k_disable,
+       .is_enabled = jz4740_rtc_clk32k_is_enabled,
+};
+
 static int jz4740_rtc_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -335,17 +370,13 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
        device_init_wakeup(dev, 1);
 
        ret = dev_pm_set_wake_irq(dev, irq);
-       if (ret) {
-               dev_err(dev, "Failed to set wake irq: %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to set wake irq\n");
 
        rtc->rtc = devm_rtc_allocate_device(dev);
-       if (IS_ERR(rtc->rtc)) {
-               ret = PTR_ERR(rtc->rtc);
-               dev_err(dev, "Failed to allocate rtc device: %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(rtc->rtc))
+               return dev_err_probe(dev, PTR_ERR(rtc->rtc),
+                                    "Failed to allocate rtc device\n");
 
        rtc->rtc->ops = &jz4740_rtc_ops;
        rtc->rtc->range_max = U32_MAX;
@@ -362,10 +393,8 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
 
        ret = devm_request_irq(dev, irq, jz4740_rtc_irq, 0,
                               pdev->name, rtc);
-       if (ret) {
-               dev_err(dev, "Failed to request rtc irq: %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to request rtc irq\n");
 
        if (of_device_is_system_power_controller(np)) {
                dev_for_power_off = dev;
@@ -376,6 +405,21 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
                        dev_warn(dev, "Poweroff handler already present!\n");
        }
 
+       if (device_property_present(dev, "#clock-cells")) {
+               rtc->clk32k.init = CLK_HW_INIT_HW("clk32k", __clk_get_hw(clk),
+                                                 &jz4740_rtc_clk32k_ops, 0);
+
+               ret = devm_clk_hw_register(dev, &rtc->clk32k);
+               if (ret)
+                       return dev_err_probe(dev, ret,
+                                            "Unable to register clk32k clock\n");
+
+               ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &rtc->clk32k);
+               if (ret)
+                       return dev_err_probe(dev, ret,
+                                            "Unable to register clk32k clock provider\n");
+       }
+
        return 0;
 }
 
index 494052dbd39ff3b8c657b500693e35868aa55ec6..c1963f7c424d7032bb13fccc9cf158c7f7e6f2d0 100644 (file)
@@ -914,9 +914,14 @@ static int m41t80_probe(struct i2c_client *client)
                                              "wakeup-source");
 #endif
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                rc = devm_request_threaded_irq(&client->dev, client->irq,
                                               NULL, m41t80_handle_irq,
-                                              IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                              irqflags | IRQF_ONESHOT,
                                               "m41t80", client);
                if (rc) {
                        dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
index db3495d10274767a7092101e698bcc13951478b2..af97140dd00a5c079b14a00943e69d6393483c40 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #include <linux/bcd.h>
-#include <linux/i2c.h>
 #include <linux/mfd/max8907.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
index 6b24ac9e1cfa846bb7f8375c04654142039dbc35..2247dd39ee4b6c61bfe27ddd8fef83c9f442bbac 100644 (file)
  * Moxa Technology Co., Ltd. <www.moxa.com>
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/mod_devicetable.h>
+#include <linux/gpio/consumer.h>
 
 #define GPIO_RTC_RESERVED                      0x0C
 #define GPIO_RTC_DATA_SET                      0x10
@@ -55,7 +56,9 @@
 struct moxart_rtc {
        struct rtc_device *rtc;
        spinlock_t rtc_lock;
-       int gpio_data, gpio_sclk, gpio_reset;
+       struct gpio_desc *gpio_data;
+       struct gpio_desc *gpio_sclk;
+       struct gpio_desc *gpio_reset;
 };
 
 static int day_of_year[12] =   { 0, 31, 59, 90, 120, 151, 181,
@@ -67,10 +70,10 @@ static void moxart_rtc_write_byte(struct device *dev, u8 data)
        int i;
 
        for (i = 0; i < 8; i++, data >>= 1) {
-               gpio_set_value(moxart_rtc->gpio_sclk, 0);
-               gpio_set_value(moxart_rtc->gpio_data, ((data & 1) == 1));
+               gpiod_set_value(moxart_rtc->gpio_sclk, 0);
+               gpiod_set_value(moxart_rtc->gpio_data, ((data & 1) == 1));
                udelay(GPIO_RTC_DELAY_TIME);
-               gpio_set_value(moxart_rtc->gpio_sclk, 1);
+               gpiod_set_value(moxart_rtc->gpio_sclk, 1);
                udelay(GPIO_RTC_DELAY_TIME);
        }
 }
@@ -82,11 +85,11 @@ static u8 moxart_rtc_read_byte(struct device *dev)
        u8 data = 0;
 
        for (i = 0; i < 8; i++) {
-               gpio_set_value(moxart_rtc->gpio_sclk, 0);
+               gpiod_set_value(moxart_rtc->gpio_sclk, 0);
                udelay(GPIO_RTC_DELAY_TIME);
-               gpio_set_value(moxart_rtc->gpio_sclk, 1);
+               gpiod_set_value(moxart_rtc->gpio_sclk, 1);
                udelay(GPIO_RTC_DELAY_TIME);
-               if (gpio_get_value(moxart_rtc->gpio_data))
+               if (gpiod_get_value(moxart_rtc->gpio_data))
                        data |= (1 << i);
                udelay(GPIO_RTC_DELAY_TIME);
        }
@@ -101,15 +104,15 @@ static u8 moxart_rtc_read_register(struct device *dev, u8 cmd)
 
        local_irq_save(flags);
 
-       gpio_direction_output(moxart_rtc->gpio_data, 0);
-       gpio_set_value(moxart_rtc->gpio_reset, 1);
+       gpiod_direction_output(moxart_rtc->gpio_data, 0);
+       gpiod_set_value(moxart_rtc->gpio_reset, 1);
        udelay(GPIO_RTC_DELAY_TIME);
        moxart_rtc_write_byte(dev, cmd);
-       gpio_direction_input(moxart_rtc->gpio_data);
+       gpiod_direction_input(moxart_rtc->gpio_data);
        udelay(GPIO_RTC_DELAY_TIME);
        data = moxart_rtc_read_byte(dev);
-       gpio_set_value(moxart_rtc->gpio_sclk, 0);
-       gpio_set_value(moxart_rtc->gpio_reset, 0);
+       gpiod_set_value(moxart_rtc->gpio_sclk, 0);
+       gpiod_set_value(moxart_rtc->gpio_reset, 0);
        udelay(GPIO_RTC_DELAY_TIME);
 
        local_irq_restore(flags);
@@ -124,13 +127,13 @@ static void moxart_rtc_write_register(struct device *dev, u8 cmd, u8 data)
 
        local_irq_save(flags);
 
-       gpio_direction_output(moxart_rtc->gpio_data, 0);
-       gpio_set_value(moxart_rtc->gpio_reset, 1);
+       gpiod_direction_output(moxart_rtc->gpio_data, 0);
+       gpiod_set_value(moxart_rtc->gpio_reset, 1);
        udelay(GPIO_RTC_DELAY_TIME);
        moxart_rtc_write_byte(dev, cmd);
        moxart_rtc_write_byte(dev, data);
-       gpio_set_value(moxart_rtc->gpio_sclk, 0);
-       gpio_set_value(moxart_rtc->gpio_reset, 0);
+       gpiod_set_value(moxart_rtc->gpio_sclk, 0);
+       gpiod_set_value(moxart_rtc->gpio_reset, 0);
        udelay(GPIO_RTC_DELAY_TIME);
 
        local_irq_restore(flags);
@@ -247,53 +250,33 @@ static int moxart_rtc_probe(struct platform_device *pdev)
        if (!moxart_rtc)
                return -ENOMEM;
 
-       moxart_rtc->gpio_data = of_get_named_gpio(pdev->dev.of_node,
-                                                 "gpio-rtc-data", 0);
-       if (!gpio_is_valid(moxart_rtc->gpio_data)) {
-               dev_err(&pdev->dev, "invalid gpio (data): %d\n",
-                       moxart_rtc->gpio_data);
-               return moxart_rtc->gpio_data;
-       }
-
-       moxart_rtc->gpio_sclk = of_get_named_gpio(pdev->dev.of_node,
-                                                 "gpio-rtc-sclk", 0);
-       if (!gpio_is_valid(moxart_rtc->gpio_sclk)) {
-               dev_err(&pdev->dev, "invalid gpio (sclk): %d\n",
-                       moxart_rtc->gpio_sclk);
-               return moxart_rtc->gpio_sclk;
-       }
-
-       moxart_rtc->gpio_reset = of_get_named_gpio(pdev->dev.of_node,
-                                                  "gpio-rtc-reset", 0);
-       if (!gpio_is_valid(moxart_rtc->gpio_reset)) {
-               dev_err(&pdev->dev, "invalid gpio (reset): %d\n",
-                       moxart_rtc->gpio_reset);
-               return moxart_rtc->gpio_reset;
-       }
-
-       spin_lock_init(&moxart_rtc->rtc_lock);
-       platform_set_drvdata(pdev, moxart_rtc);
-
-       ret = devm_gpio_request(&pdev->dev, moxart_rtc->gpio_data, "rtc_data");
+       moxart_rtc->gpio_data = devm_gpiod_get(&pdev->dev, "rtc-data",
+                                              GPIOD_IN);
+       ret = PTR_ERR_OR_ZERO(moxart_rtc->gpio_data);
        if (ret) {
-               dev_err(&pdev->dev, "can't get rtc_data gpio\n");
+               dev_err(&pdev->dev, "can't get rtc data gpio: %d\n", ret);
                return ret;
        }
 
-       ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_sclk,
-                                   GPIOF_DIR_OUT, "rtc_sclk");
+       moxart_rtc->gpio_sclk = devm_gpiod_get(&pdev->dev, "rtc-sclk",
+                                              GPIOD_ASIS);
+       ret = PTR_ERR_OR_ZERO(moxart_rtc->gpio_sclk);
        if (ret) {
-               dev_err(&pdev->dev, "can't get rtc_sclk gpio\n");
+               dev_err(&pdev->dev, "can't get rtc sclk gpio: %d\n", ret);
                return ret;
        }
 
-       ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_reset,
-                                   GPIOF_DIR_OUT, "rtc_reset");
+       moxart_rtc->gpio_reset = devm_gpiod_get(&pdev->dev, "rtc-reset",
+                                               GPIOD_ASIS);
+       ret = PTR_ERR_OR_ZERO(moxart_rtc->gpio_reset);
        if (ret) {
-               dev_err(&pdev->dev, "can't get rtc_reset gpio\n");
+               dev_err(&pdev->dev, "can't get rtc reset gpio: %d\n", ret);
                return ret;
        }
 
+       spin_lock_init(&moxart_rtc->rtc_lock);
+       platform_set_drvdata(pdev, moxart_rtc);
+
        moxart_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
                                                   &moxart_rtc_ops,
                                                   THIS_MODULE);
diff --git a/drivers/rtc/rtc-nxp-bbnsm.c b/drivers/rtc/rtc-nxp-bbnsm.c
new file mode 100644 (file)
index 0000000..acbfbeb
--- /dev/null
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2022 NXP.
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+
+#define BBNSM_CTRL     0x8
+#define BBNSM_INT_EN   0x10
+#define BBNSM_EVENTS   0x14
+#define BBNSM_RTC_LS   0x40
+#define BBNSM_RTC_MS   0x44
+#define BBNSM_TA       0x50
+
+#define RTC_EN         0x2
+#define RTC_EN_MSK     0x3
+#define TA_EN          (0x2 << 2)
+#define TA_DIS         (0x1 << 2)
+#define TA_EN_MSK      (0x3 << 2)
+#define RTC_INT_EN     0x2
+#define TA_INT_EN      (0x2 << 2)
+
+#define BBNSM_EVENT_TA (0x2 << 2)
+
+#define CNTR_TO_SECS_SH        15
+
+struct bbnsm_rtc {
+       struct rtc_device *rtc;
+       struct regmap *regmap;
+       int irq;
+       struct clk *clk;
+};
+
+static u32 bbnsm_read_counter(struct bbnsm_rtc *bbnsm)
+{
+       u32 rtc_msb, rtc_lsb;
+       unsigned int timeout = 100;
+       u32 time;
+       u32 tmp = 0;
+
+       do {
+               time = tmp;
+               /* read the msb */
+               regmap_read(bbnsm->regmap, BBNSM_RTC_MS, &rtc_msb);
+               /* read the lsb */
+               regmap_read(bbnsm->regmap, BBNSM_RTC_LS, &rtc_lsb);
+               /* convert to seconds */
+               tmp = (rtc_msb << 17) | (rtc_lsb >> 15);
+       } while (tmp != time && --timeout);
+
+       return time;
+}
+
+static int bbnsm_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct bbnsm_rtc *bbnsm = dev_get_drvdata(dev);
+       unsigned long time;
+       u32 val;
+
+       regmap_read(bbnsm->regmap, BBNSM_CTRL, &val);
+       if ((val & RTC_EN_MSK) != RTC_EN)
+               return -EINVAL;
+
+       time = bbnsm_read_counter(bbnsm);
+       rtc_time64_to_tm(time, tm);
+
+       return 0;
+}
+
+static int bbnsm_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct bbnsm_rtc *bbnsm = dev_get_drvdata(dev);
+       unsigned long time = rtc_tm_to_time64(tm);
+
+       /* disable the RTC first */
+       regmap_update_bits(bbnsm->regmap, BBNSM_CTRL, RTC_EN_MSK, 0);
+
+       /* write the 32bit sec time to 47 bit timer counter, leaving 15 LSBs blank */
+       regmap_write(bbnsm->regmap, BBNSM_RTC_LS, time << CNTR_TO_SECS_SH);
+       regmap_write(bbnsm->regmap, BBNSM_RTC_MS, time >> (32 - CNTR_TO_SECS_SH));
+
+       /* Enable the RTC again */
+       regmap_update_bits(bbnsm->regmap, BBNSM_CTRL, RTC_EN_MSK, RTC_EN);
+
+       return 0;
+}
+
+static int bbnsm_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct bbnsm_rtc *bbnsm = dev_get_drvdata(dev);
+       u32 bbnsm_events, bbnsm_ta;
+
+       regmap_read(bbnsm->regmap, BBNSM_TA, &bbnsm_ta);
+       rtc_time64_to_tm(bbnsm_ta, &alrm->time);
+
+       regmap_read(bbnsm->regmap, BBNSM_EVENTS, &bbnsm_events);
+       alrm->pending = (bbnsm_events & BBNSM_EVENT_TA) ? 1 : 0;
+
+       return 0;
+}
+
+static int bbnsm_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
+{
+       struct bbnsm_rtc *bbnsm = dev_get_drvdata(dev);
+
+       /* enable the alarm event */
+       regmap_update_bits(bbnsm->regmap, BBNSM_CTRL, TA_EN_MSK, enable ? TA_EN : TA_DIS);
+       /* enable the alarm interrupt */
+       regmap_update_bits(bbnsm->regmap, BBNSM_INT_EN, TA_EN_MSK, enable ? TA_EN : TA_DIS);
+
+       return 0;
+}
+
+static int bbnsm_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct bbnsm_rtc *bbnsm = dev_get_drvdata(dev);
+       unsigned long time = rtc_tm_to_time64(&alrm->time);
+
+       /* disable the alarm */
+       regmap_update_bits(bbnsm->regmap, BBNSM_CTRL, TA_EN, TA_EN);
+
+       /* write the seconds to TA */
+       regmap_write(bbnsm->regmap, BBNSM_TA, time);
+
+       return bbnsm_rtc_alarm_irq_enable(dev, alrm->enabled);
+}
+
+static const struct rtc_class_ops bbnsm_rtc_ops = {
+       .read_time = bbnsm_rtc_read_time,
+       .set_time = bbnsm_rtc_set_time,
+       .read_alarm = bbnsm_rtc_read_alarm,
+       .set_alarm = bbnsm_rtc_set_alarm,
+       .alarm_irq_enable = bbnsm_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t bbnsm_rtc_irq_handler(int irq, void *dev_id)
+{
+       struct device *dev = dev_id;
+       struct bbnsm_rtc  *bbnsm = dev_get_drvdata(dev);
+       u32 val;
+
+       regmap_read(bbnsm->regmap, BBNSM_EVENTS, &val);
+       if (val & BBNSM_EVENT_TA) {
+               bbnsm_rtc_alarm_irq_enable(dev, false);
+               /* clear the alarm event */
+               regmap_write_bits(bbnsm->regmap, BBNSM_EVENTS, TA_EN_MSK, BBNSM_EVENT_TA);
+               rtc_update_irq(bbnsm->rtc, 1, RTC_AF | RTC_IRQF);
+
+               return IRQ_HANDLED;
+       }
+
+       return IRQ_NONE;
+}
+
+static int bbnsm_rtc_probe(struct platform_device *pdev)
+{
+       struct device_node *np = pdev->dev.of_node;
+       struct bbnsm_rtc *bbnsm;
+       int ret;
+
+       bbnsm = devm_kzalloc(&pdev->dev, sizeof(*bbnsm), GFP_KERNEL);
+       if (!bbnsm)
+               return -ENOMEM;
+
+       bbnsm->rtc = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(bbnsm->rtc))
+               return PTR_ERR(bbnsm->rtc);
+
+       bbnsm->regmap = syscon_node_to_regmap(np->parent);
+       if (IS_ERR(bbnsm->regmap)) {
+               dev_dbg(&pdev->dev, "bbnsm get regmap failed\n");
+               return PTR_ERR(bbnsm->regmap);
+       }
+
+       bbnsm->irq = platform_get_irq(pdev, 0);
+       if (bbnsm->irq < 0)
+               return bbnsm->irq;
+
+       platform_set_drvdata(pdev, bbnsm);
+
+       /* clear all the pending events */
+       regmap_write(bbnsm->regmap, BBNSM_EVENTS, 0x7A);
+
+       device_init_wakeup(&pdev->dev, true);
+       dev_pm_set_wake_irq(&pdev->dev, bbnsm->irq);
+
+       ret = devm_request_irq(&pdev->dev, bbnsm->irq, bbnsm_rtc_irq_handler,
+                              IRQF_SHARED, "rtc alarm", &pdev->dev);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to request irq %d: %d\n",
+                       bbnsm->irq, ret);
+               return ret;
+       }
+
+       bbnsm->rtc->ops = &bbnsm_rtc_ops;
+       bbnsm->rtc->range_max = U32_MAX;
+
+       return devm_rtc_register_device(bbnsm->rtc);
+}
+
+static const struct of_device_id bbnsm_dt_ids[] = {
+       { .compatible = "nxp,imx93-bbnsm-rtc" },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, bbnsm_dt_ids);
+
+static struct platform_driver bbnsm_rtc_driver = {
+       .driver = {
+               .name = "bbnsm_rtc",
+               .of_match_table = bbnsm_dt_ids,
+       },
+       .probe = bbnsm_rtc_probe,
+};
+module_platform_driver(bbnsm_rtc_driver);
+
+MODULE_AUTHOR("Jacky Bai <ping.bai@nxp.com>");
+MODULE_DESCRIPTION("NXP BBNSM RTC Driver");
+MODULE_LICENSE("GPL");
index e13b5e695d06a725ef6edb18972dc95ac8c6d84e..e714661e61a916707fc4be148540823ba7463fe5 100644 (file)
@@ -413,9 +413,14 @@ static int pcf2123_probe(struct spi_device *spi)
 
        /* Register alarm irq */
        if (spi->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&spi->dev))
+                       irqflags = 0;
+
                ret = devm_request_threaded_irq(&spi->dev, spi->irq, NULL,
                                pcf2123_rtc_irq,
-                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                               irqflags | IRQF_ONESHOT,
                                pcf2123_driver.driver.name, &spi->dev);
                if (!ret)
                        device_init_wakeup(&spi->dev, true);
index 754e03984f986dc2f5d09cfa59bff4087a15558b..71a4563559819473ebd7a5cf430edf90f2ef60ee 100644 (file)
@@ -621,9 +621,14 @@ static int pcf85063_probe(struct i2c_client *client)
        clear_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features);
 
        if (config->has_alarms && client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                err = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, pcf85063_rtc_handle_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                "pcf85063", pcf85063);
                if (err) {
                        dev_warn(&pcf85063->rtc->dev,
index 92de99f11a7a5a64dcbe8aa97e8c8cde6c7ffc06..2e111cdb94f76cb74d7e96ef4fa4b336f91eaf56 100644 (file)
@@ -445,13 +445,18 @@ static int pcf8523_probe(struct i2c_client *client)
        clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                err = regmap_write(pcf8523->regmap, PCF8523_TMR_CLKOUT_CTRL, 0x38);
                if (err < 0)
                        return err;
 
                err = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, pcf8523_irq,
-                                               IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+                                               IRQF_SHARED | IRQF_ONESHOT | irqflags,
                                                dev_name(&rtc->dev), pcf8523);
                if (err)
                        return err;
index c05b722f006058f2340a787a6c8d68dcd86e51a2..8958eadf1c3efb0e0281cc910df3bd5ed0c934ed 100644 (file)
 #define PIN_IO_INTA_OUT        2
 #define PIN_IO_INTA_HIZ        3
 
+#define OSC_CAP_SEL    GENMASK(1, 0)
+#define OSC_CAP_6000   0x01
+#define OSC_CAP_12500  0x02
+
 #define STOP_EN_STOP   BIT(0)
 
 #define RESET_CPR      0xa4
@@ -117,6 +121,32 @@ struct pcf85x63_config {
        unsigned int num_nvram;
 };
 
+static int pcf85363_load_capacitance(struct pcf85363 *pcf85363, struct device_node *node)
+{
+       u32 load = 7000;
+       u8 value = 0;
+
+       of_property_read_u32(node, "quartz-load-femtofarads", &load);
+
+       switch (load) {
+       default:
+               dev_warn(&pcf85363->rtc->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 7000",
+                        load);
+               fallthrough;
+       case 7000:
+               break;
+       case 6000:
+               value = OSC_CAP_6000;
+               break;
+       case 12500:
+               value = OSC_CAP_12500;
+               break;
+       }
+
+       return regmap_update_bits(pcf85363->regmap, CTRL_OSCILLATOR,
+                                 OSC_CAP_SEL, value);
+}
+
 static int pcf85363_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
@@ -372,7 +402,7 @@ static int pcf85363_probe(struct i2c_client *client)
                        .reg_write = pcf85363_nvram_write,
                },
        };
-       int ret, i;
+       int ret, i, err;
 
        if (data)
                config = data;
@@ -394,18 +424,28 @@ static int pcf85363_probe(struct i2c_client *client)
        if (IS_ERR(pcf85363->rtc))
                return PTR_ERR(pcf85363->rtc);
 
+       err = pcf85363_load_capacitance(pcf85363, client->dev.of_node);
+       if (err < 0)
+               dev_warn(&client->dev, "failed to set xtal load capacitance: %d",
+                        err);
+
        pcf85363->rtc->ops = &rtc_ops;
        pcf85363->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pcf85363->rtc->range_max = RTC_TIMESTAMP_END_2099;
        clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
                regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
                                   PIN_IO_INTA_OUT, PIN_IO_INTAPM);
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, pcf85363_rtc_handle_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                "pcf85363", client);
                if (ret)
                        dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
index 0a7fd94784651b9e604e8e626b79ee1c182fb9a3..7e720472213c7c76e2971880d92d68550875e127 100644 (file)
@@ -558,9 +558,14 @@ static int pcf8563_probe(struct i2c_client *client)
        pcf8563->rtc->set_start_time = true;
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                err = devm_request_threaded_irq(&client->dev, client->irq,
                                NULL, pcf8563_irq,
-                               IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+                               IRQF_SHARED | IRQF_ONESHOT | irqflags,
                                pcf8563_driver.driver.name, client);
                if (err) {
                        dev_err(&client->dev, "unable to request IRQ %d\n",
index 716e5d9ad74d14a27fbf827619bcec1043375f22..372494e82f405406b9c09ac4c9278c22f11d6caa 100644 (file)
@@ -1,8 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+/*
+ * pm8xxx RTC driver
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2023, Linaro Limited
  */
 #include <linux/of.h>
 #include <linux/module.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/init.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-/* RTC Register offsets from RTC CTRL REG */
-#define PM8XXX_ALARM_CTRL_OFFSET       0x01
-#define PM8XXX_RTC_WRITE_OFFSET                0x02
-#define PM8XXX_RTC_READ_OFFSET         0x06
-#define PM8XXX_ALARM_RW_OFFSET         0x0A
+#include <asm/unaligned.h>
 
 /* RTC_CTRL register bit fields */
 #define PM8xxx_RTC_ENABLE              BIT(7)
 
 /**
  * struct pm8xxx_rtc_regs - describe RTC registers per PMIC versions
- * @ctrl: base address of control register
- * @write: base address of write register
- * @read: base address of read register
- * @alarm_ctrl: base address of alarm control register
- * @alarm_ctrl2: base address of alarm control2 register
- * @alarm_rw: base address of alarm read-write register
- * @alarm_en: alarm enable mask
+ * @ctrl:              address of control register
+ * @write:             base address of write registers
+ * @read:              base address of read registers
+ * @alarm_ctrl:                address of alarm control register
+ * @alarm_ctrl2:       address of alarm control2 register
+ * @alarm_rw:          base address of alarm read-write registers
+ * @alarm_en:          alarm enable mask
  */
 struct pm8xxx_rtc_regs {
        unsigned int ctrl;
@@ -46,25 +47,135 @@ struct pm8xxx_rtc_regs {
 };
 
 /**
- * struct pm8xxx_rtc -  rtc driver internal structure
- * @rtc:               rtc device for this driver.
- * @regmap:            regmap used to access RTC registers
- * @allow_set_time:    indicates whether writing to the RTC is allowed
- * @rtc_alarm_irq:     rtc alarm irq number.
- * @regs:              rtc registers description.
- * @rtc_dev:           device structure.
- * @ctrl_reg_lock:     spinlock protecting access to ctrl_reg.
+ * struct pm8xxx_rtc -  RTC driver internal structure
+ * @rtc:               RTC device
+ * @regmap:            regmap used to access registers
+ * @allow_set_time:    whether the time can be set
+ * @alarm_irq:         alarm irq number
+ * @regs:              register description
+ * @dev:               device structure
+ * @nvmem_cell:                nvmem cell for offset
+ * @offset:            offset from epoch in seconds
  */
 struct pm8xxx_rtc {
        struct rtc_device *rtc;
        struct regmap *regmap;
        bool allow_set_time;
-       int rtc_alarm_irq;
+       int alarm_irq;
        const struct pm8xxx_rtc_regs *regs;
-       struct device *rtc_dev;
-       spinlock_t ctrl_reg_lock;
+       struct device *dev;
+       struct nvmem_cell *nvmem_cell;
+       u32 offset;
 };
 
+static int pm8xxx_rtc_read_nvmem_offset(struct pm8xxx_rtc *rtc_dd)
+{
+       size_t len;
+       void *buf;
+       int rc;
+
+       buf = nvmem_cell_read(rtc_dd->nvmem_cell, &len);
+       if (IS_ERR(buf)) {
+               rc = PTR_ERR(buf);
+               dev_dbg(rtc_dd->dev, "failed to read nvmem offset: %d\n", rc);
+               return rc;
+       }
+
+       if (len != sizeof(u32)) {
+               dev_dbg(rtc_dd->dev, "unexpected nvmem cell size %zu\n", len);
+               kfree(buf);
+               return -EINVAL;
+       }
+
+       rtc_dd->offset = get_unaligned_le32(buf);
+
+       kfree(buf);
+
+       return 0;
+}
+
+static int pm8xxx_rtc_write_nvmem_offset(struct pm8xxx_rtc *rtc_dd, u32 offset)
+{
+       u8 buf[sizeof(u32)];
+       int rc;
+
+       put_unaligned_le32(offset, buf);
+
+       rc = nvmem_cell_write(rtc_dd->nvmem_cell, buf, sizeof(buf));
+       if (rc < 0) {
+               dev_dbg(rtc_dd->dev, "failed to write nvmem offset: %d\n", rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+static int pm8xxx_rtc_read_offset(struct pm8xxx_rtc *rtc_dd)
+{
+       if (!rtc_dd->nvmem_cell)
+               return 0;
+
+       return pm8xxx_rtc_read_nvmem_offset(rtc_dd);
+}
+
+static int pm8xxx_rtc_read_raw(struct pm8xxx_rtc *rtc_dd, u32 *secs)
+{
+       const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+       u8 value[NUM_8_BIT_RTC_REGS];
+       unsigned int reg;
+       int rc;
+
+       rc = regmap_bulk_read(rtc_dd->regmap, regs->read, value, sizeof(value));
+       if (rc)
+               return rc;
+
+       /*
+        * Read the LSB again and check if there has been a carry over.
+        * If there has, redo the read operation.
+        */
+       rc = regmap_read(rtc_dd->regmap, regs->read, &reg);
+       if (rc < 0)
+               return rc;
+
+       if (reg < value[0]) {
+               rc = regmap_bulk_read(rtc_dd->regmap, regs->read, value,
+                                     sizeof(value));
+               if (rc)
+                       return rc;
+       }
+
+       *secs = get_unaligned_le32(value);
+
+       return 0;
+}
+
+static int pm8xxx_rtc_update_offset(struct pm8xxx_rtc *rtc_dd, u32 secs)
+{
+       u32 raw_secs;
+       u32 offset;
+       int rc;
+
+       if (!rtc_dd->nvmem_cell)
+               return -ENODEV;
+
+       rc = pm8xxx_rtc_read_raw(rtc_dd, &raw_secs);
+       if (rc)
+               return rc;
+
+       offset = secs - raw_secs;
+
+       if (offset == rtc_dd->offset)
+               return 0;
+
+       rc = pm8xxx_rtc_write_nvmem_offset(rtc_dd, offset);
+       if (rc)
+               return rc;
+
+       rtc_dd->offset = offset;
+
+       return 0;
+}
+
 /*
  * Steps to write the RTC registers.
  * 1. Disable alarm if enabled.
@@ -74,269 +185,186 @@ struct pm8xxx_rtc {
  * 5. Enable rtc if disabled in step 2.
  * 6. Enable alarm if disabled in step 1.
  */
-static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
+static int __pm8xxx_rtc_set_time(struct pm8xxx_rtc *rtc_dd, u32 secs)
 {
-       int rc, i;
-       unsigned long secs, irq_flags;
-       u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0, rtc_disabled = 0;
-       unsigned int ctrl_reg, rtc_ctrl_reg;
-       struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+       u8 value[NUM_8_BIT_RTC_REGS];
+       bool alarm_enabled;
+       int rc;
 
-       if (!rtc_dd->allow_set_time)
-               return -ENODEV;
-
-       secs = rtc_tm_to_time64(tm);
-
-       dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
-
-       for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) {
-               value[i] = secs & 0xFF;
-               secs >>= 8;
-       }
-
-       spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
+       put_unaligned_le32(secs, value);
 
-       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
+       rc = regmap_update_bits_check(rtc_dd->regmap, regs->alarm_ctrl,
+                                     regs->alarm_en, 0, &alarm_enabled);
        if (rc)
-               goto rtc_rw_fail;
-
-       if (ctrl_reg & regs->alarm_en) {
-               alarm_enabled = 1;
-               ctrl_reg &= ~regs->alarm_en;
-               rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
-               if (rc) {
-                       dev_err(dev, "Write to RTC Alarm control register failed\n");
-                       goto rtc_rw_fail;
-               }
-       }
+               return rc;
 
-       /* Disable RTC H/w before writing on RTC register */
-       rc = regmap_read(rtc_dd->regmap, regs->ctrl, &rtc_ctrl_reg);
+       /* Disable RTC */
+       rc = regmap_update_bits(rtc_dd->regmap, regs->ctrl, PM8xxx_RTC_ENABLE, 0);
        if (rc)
-               goto rtc_rw_fail;
-
-       if (rtc_ctrl_reg & PM8xxx_RTC_ENABLE) {
-               rtc_disabled = 1;
-               rtc_ctrl_reg &= ~PM8xxx_RTC_ENABLE;
-               rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
-               if (rc) {
-                       dev_err(dev, "Write to RTC control register failed\n");
-                       goto rtc_rw_fail;
-               }
-       }
+               return rc;
 
        /* Write 0 to Byte[0] */
        rc = regmap_write(rtc_dd->regmap, regs->write, 0);
-       if (rc) {
-               dev_err(dev, "Write to RTC write data register failed\n");
-               goto rtc_rw_fail;
-       }
+       if (rc)
+               return rc;
 
        /* Write Byte[1], Byte[2], Byte[3] */
        rc = regmap_bulk_write(rtc_dd->regmap, regs->write + 1,
                               &value[1], sizeof(value) - 1);
-       if (rc) {
-               dev_err(dev, "Write to RTC write data register failed\n");
-               goto rtc_rw_fail;
-       }
+       if (rc)
+               return rc;
 
        /* Write Byte[0] */
        rc = regmap_write(rtc_dd->regmap, regs->write, value[0]);
-       if (rc) {
-               dev_err(dev, "Write to RTC write data register failed\n");
-               goto rtc_rw_fail;
-       }
+       if (rc)
+               return rc;
 
-       /* Enable RTC H/w after writing on RTC register */
-       if (rtc_disabled) {
-               rtc_ctrl_reg |= PM8xxx_RTC_ENABLE;
-               rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
-               if (rc) {
-                       dev_err(dev, "Write to RTC control register failed\n");
-                       goto rtc_rw_fail;
-               }
-       }
+       /* Enable RTC */
+       rc = regmap_update_bits(rtc_dd->regmap, regs->ctrl, PM8xxx_RTC_ENABLE,
+                               PM8xxx_RTC_ENABLE);
+       if (rc)
+               return rc;
 
        if (alarm_enabled) {
-               ctrl_reg |= regs->alarm_en;
-               rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
-               if (rc) {
-                       dev_err(dev, "Write to RTC Alarm control register failed\n");
-                       goto rtc_rw_fail;
-               }
+               rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
+                                       regs->alarm_en, regs->alarm_en);
+               if (rc)
+                       return rc;
        }
 
-rtc_rw_fail:
-       spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags);
-
-       return rc;
+       return 0;
 }
 
-static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
+static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-       int rc;
-       u8 value[NUM_8_BIT_RTC_REGS];
-       unsigned long secs;
-       unsigned int reg;
        struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
-       const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+       u32 secs;
+       int rc;
 
-       rc = regmap_bulk_read(rtc_dd->regmap, regs->read, value, sizeof(value));
-       if (rc) {
-               dev_err(dev, "RTC read data register failed\n");
-               return rc;
-       }
+       secs = rtc_tm_to_time64(tm);
 
-       /*
-        * Read the LSB again and check if there has been a carry over.
-        * If there is, redo the read operation.
-        */
-       rc = regmap_read(rtc_dd->regmap, regs->read, &reg);
-       if (rc < 0) {
-               dev_err(dev, "RTC read data register failed\n");
+       if (rtc_dd->allow_set_time)
+               rc = __pm8xxx_rtc_set_time(rtc_dd, secs);
+       else
+               rc = pm8xxx_rtc_update_offset(rtc_dd, secs);
+
+       if (rc)
                return rc;
-       }
 
-       if (unlikely(reg < value[0])) {
-               rc = regmap_bulk_read(rtc_dd->regmap, regs->read,
-                                     value, sizeof(value));
-               if (rc) {
-                       dev_err(dev, "RTC read data register failed\n");
-                       return rc;
-               }
-       }
+       dev_dbg(dev, "set time: %ptRd %ptRt (%u + %u)\n", tm, tm,
+                       secs - rtc_dd->offset, rtc_dd->offset);
+       return 0;
+}
 
-       secs = value[0] | (value[1] << 8) | (value[2] << 16) |
-              ((unsigned long)value[3] << 24);
+static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
+       u32 secs;
+       int rc;
 
-       rtc_time64_to_tm(secs, tm);
+       rc = pm8xxx_rtc_read_raw(rtc_dd, &secs);
+       if (rc)
+               return rc;
 
-       dev_dbg(dev, "secs = %lu, h:m:s == %ptRt, y-m-d = %ptRdr\n", secs, tm, tm);
+       secs += rtc_dd->offset;
+       rtc_time64_to_tm(secs, tm);
 
+       dev_dbg(dev, "read time: %ptRd %ptRt (%u + %u)\n", tm, tm,
+                       secs - rtc_dd->offset, rtc_dd->offset);
        return 0;
 }
 
 static int pm8xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-       int rc, i;
-       u8 value[NUM_8_BIT_RTC_REGS];
-       unsigned int ctrl_reg;
-       unsigned long secs, irq_flags;
        struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+       u8 value[NUM_8_BIT_RTC_REGS];
+       u32 secs;
+       int rc;
 
        secs = rtc_tm_to_time64(&alarm->time);
+       secs -= rtc_dd->offset;
+       put_unaligned_le32(secs, value);
 
-       for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) {
-               value[i] = secs & 0xFF;
-               secs >>= 8;
-       }
-
-       spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
+       rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
+                               regs->alarm_en, 0);
+       if (rc)
+               return rc;
 
        rc = regmap_bulk_write(rtc_dd->regmap, regs->alarm_rw, value,
                               sizeof(value));
-       if (rc) {
-               dev_err(dev, "Write to RTC ALARM register failed\n");
-               goto rtc_rw_fail;
-       }
-
-       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
        if (rc)
-               goto rtc_rw_fail;
-
-       if (alarm->enabled)
-               ctrl_reg |= regs->alarm_en;
-       else
-               ctrl_reg &= ~regs->alarm_en;
+               return rc;
 
-       rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
-       if (rc) {
-               dev_err(dev, "Write to RTC alarm control register failed\n");
-               goto rtc_rw_fail;
+       if (alarm->enabled) {
+               rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
+                                       regs->alarm_en, regs->alarm_en);
+               if (rc)
+                       return rc;
        }
 
-       dev_dbg(dev, "Alarm Set for h:m:s=%ptRt, y-m-d=%ptRdr\n",
-               &alarm->time, &alarm->time);
-rtc_rw_fail:
-       spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags);
-       return rc;
+       dev_dbg(dev, "set alarm: %ptRd %ptRt\n", &alarm->time, &alarm->time);
+
+       return 0;
 }
 
 static int pm8xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-       int rc;
-       unsigned int ctrl_reg;
-       u8 value[NUM_8_BIT_RTC_REGS];
-       unsigned long secs;
        struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+       u8 value[NUM_8_BIT_RTC_REGS];
+       unsigned int ctrl_reg;
+       u32 secs;
+       int rc;
 
        rc = regmap_bulk_read(rtc_dd->regmap, regs->alarm_rw, value,
                              sizeof(value));
-       if (rc) {
-               dev_err(dev, "RTC alarm time read failed\n");
+       if (rc)
                return rc;
-       }
-
-       secs = value[0] | (value[1] << 8) | (value[2] << 16) |
-              ((unsigned long)value[3] << 24);
 
+       secs = get_unaligned_le32(value);
+       secs += rtc_dd->offset;
        rtc_time64_to_tm(secs, &alarm->time);
 
        rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
-       if (rc) {
-               dev_err(dev, "Read from RTC alarm control register failed\n");
+       if (rc)
                return rc;
-       }
+
        alarm->enabled = !!(ctrl_reg & PM8xxx_RTC_ALARM_ENABLE);
 
-       dev_dbg(dev, "Alarm set for - h:m:s=%ptRt, y-m-d=%ptRdr\n",
-               &alarm->time, &alarm->time);
+       dev_dbg(dev, "read alarm: %ptRd %ptRt\n", &alarm->time, &alarm->time);
 
        return 0;
 }
 
 static int pm8xxx_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 {
-       int rc;
-       unsigned long irq_flags;
        struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
-       unsigned int ctrl_reg;
        u8 value[NUM_8_BIT_RTC_REGS] = {0};
-
-       spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
-
-       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
-       if (rc)
-               goto rtc_rw_fail;
+       unsigned int val;
+       int rc;
 
        if (enable)
-               ctrl_reg |= regs->alarm_en;
+               val = regs->alarm_en;
        else
-               ctrl_reg &= ~regs->alarm_en;
+               val = 0;
 
-       rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
-       if (rc) {
-               dev_err(dev, "Write to RTC control register failed\n");
-               goto rtc_rw_fail;
-       }
+       rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
+                               regs->alarm_en, val);
+       if (rc)
+               return rc;
 
-       /* Clear Alarm register */
+       /* Clear alarm register */
        if (!enable) {
                rc = regmap_bulk_write(rtc_dd->regmap, regs->alarm_rw, value,
                                       sizeof(value));
-               if (rc) {
-                       dev_err(dev, "Clear RTC ALARM register failed\n");
-                       goto rtc_rw_fail;
-               }
+               if (rc)
+                       return rc;
        }
 
-rtc_rw_fail:
-       spin_unlock_irqrestore(&rtc_dd->ctrl_reg_lock, irq_flags);
-       return rc;
+       return 0;
 }
 
 static const struct rtc_class_ops pm8xxx_rtc_ops = {
@@ -351,69 +379,31 @@ static irqreturn_t pm8xxx_alarm_trigger(int irq, void *dev_id)
 {
        struct pm8xxx_rtc *rtc_dd = dev_id;
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
-       unsigned int ctrl_reg;
        int rc;
 
        rtc_update_irq(rtc_dd->rtc, 1, RTC_IRQF | RTC_AF);
 
-       spin_lock(&rtc_dd->ctrl_reg_lock);
-
-       /* Clear the alarm enable bit */
-       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
-       if (rc) {
-               spin_unlock(&rtc_dd->ctrl_reg_lock);
-               goto rtc_alarm_handled;
-       }
-
-       ctrl_reg &= ~regs->alarm_en;
-
-       rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
-       if (rc) {
-               spin_unlock(&rtc_dd->ctrl_reg_lock);
-               dev_err(rtc_dd->rtc_dev,
-                       "Write to alarm control register failed\n");
-               goto rtc_alarm_handled;
-       }
-
-       spin_unlock(&rtc_dd->ctrl_reg_lock);
-
-       /* Clear RTC alarm register */
-       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl2, &ctrl_reg);
-       if (rc) {
-               dev_err(rtc_dd->rtc_dev,
-                       "RTC Alarm control2 register read failed\n");
-               goto rtc_alarm_handled;
-       }
+       /* Disable alarm */
+       rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
+                               regs->alarm_en, 0);
+       if (rc)
+               return IRQ_NONE;
 
-       ctrl_reg |= PM8xxx_RTC_ALARM_CLEAR;
-       rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl2, ctrl_reg);
+       /* Clear alarm status */
+       rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl2,
+                               PM8xxx_RTC_ALARM_CLEAR, 0);
        if (rc)
-               dev_err(rtc_dd->rtc_dev,
-                       "Write to RTC Alarm control2 register failed\n");
+               return IRQ_NONE;
 
-rtc_alarm_handled:
        return IRQ_HANDLED;
 }
 
 static int pm8xxx_rtc_enable(struct pm8xxx_rtc *rtc_dd)
 {
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
-       unsigned int ctrl_reg;
-       int rc;
-
-       /* Check if the RTC is on, else turn it on */
-       rc = regmap_read(rtc_dd->regmap, regs->ctrl, &ctrl_reg);
-       if (rc)
-               return rc;
 
-       if (!(ctrl_reg & PM8xxx_RTC_ENABLE)) {
-               ctrl_reg |= PM8xxx_RTC_ENABLE;
-               rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
-               if (rc)
-                       return rc;
-       }
-
-       return 0;
+       return regmap_update_bits(rtc_dd->regmap, regs->ctrl, PM8xxx_RTC_ENABLE,
+                                 PM8xxx_RTC_ENABLE);
 }
 
 static const struct pm8xxx_rtc_regs pm8921_regs = {
@@ -456,9 +446,6 @@ static const struct pm8xxx_rtc_regs pmk8350_regs = {
        .alarm_en       = BIT(7),
 };
 
-/*
- * Hardcoded RTC bases until IORESOURCE_REG mapping is figured out
- */
 static const struct of_device_id pm8xxx_id_table[] = {
        { .compatible = "qcom,pm8921-rtc", .data = &pm8921_regs },
        { .compatible = "qcom,pm8058-rtc", .data = &pm8058_regs },
@@ -470,9 +457,9 @@ MODULE_DEVICE_TABLE(of, pm8xxx_id_table);
 
 static int pm8xxx_rtc_probe(struct platform_device *pdev)
 {
-       int rc;
-       struct pm8xxx_rtc *rtc_dd;
        const struct of_device_id *match;
+       struct pm8xxx_rtc *rtc_dd;
+       int rc;
 
        match = of_match_node(pm8xxx_id_table, pdev->dev.of_node);
        if (!match)
@@ -482,24 +469,33 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
        if (rtc_dd == NULL)
                return -ENOMEM;
 
-       /* Initialise spinlock to protect RTC control register */
-       spin_lock_init(&rtc_dd->ctrl_reg_lock);
-
        rtc_dd->regmap = dev_get_regmap(pdev->dev.parent, NULL);
-       if (!rtc_dd->regmap) {
-               dev_err(&pdev->dev, "Parent regmap unavailable.\n");
+       if (!rtc_dd->regmap)
                return -ENXIO;
-       }
 
-       rtc_dd->rtc_alarm_irq = platform_get_irq(pdev, 0);
-       if (rtc_dd->rtc_alarm_irq < 0)
+       rtc_dd->alarm_irq = platform_get_irq(pdev, 0);
+       if (rtc_dd->alarm_irq < 0)
                return -ENXIO;
 
        rtc_dd->allow_set_time = of_property_read_bool(pdev->dev.of_node,
                                                      "allow-set-time");
 
+       rtc_dd->nvmem_cell = devm_nvmem_cell_get(&pdev->dev, "offset");
+       if (IS_ERR(rtc_dd->nvmem_cell)) {
+               rc = PTR_ERR(rtc_dd->nvmem_cell);
+               if (rc != -ENOENT)
+                       return rc;
+               rtc_dd->nvmem_cell = NULL;
+       }
+
        rtc_dd->regs = match->data;
-       rtc_dd->rtc_dev = &pdev->dev;
+       rtc_dd->dev = &pdev->dev;
+
+       if (!rtc_dd->allow_set_time) {
+               rc = pm8xxx_rtc_read_offset(rtc_dd);
+               if (rc)
+                       return rc;
+       }
 
        rc = pm8xxx_rtc_enable(rtc_dd);
        if (rc)
@@ -509,7 +505,6 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
 
        device_init_wakeup(&pdev->dev, 1);
 
-       /* Register the RTC device */
        rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc_dd->rtc))
                return PTR_ERR(rtc_dd->rtc);
@@ -517,21 +512,18 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
        rtc_dd->rtc->ops = &pm8xxx_rtc_ops;
        rtc_dd->rtc->range_max = U32_MAX;
 
-       /* Request the alarm IRQ */
-       rc = devm_request_any_context_irq(&pdev->dev, rtc_dd->rtc_alarm_irq,
+       rc = devm_request_any_context_irq(&pdev->dev, rtc_dd->alarm_irq,
                                          pm8xxx_alarm_trigger,
                                          IRQF_TRIGGER_RISING,
                                          "pm8xxx_rtc_alarm", rtc_dd);
-       if (rc < 0) {
-               dev_err(&pdev->dev, "Request IRQ failed (%d)\n", rc);
+       if (rc < 0)
                return rc;
-       }
 
        rc = devm_rtc_register_device(rtc_dd->rtc);
        if (rc)
                return rc;
 
-       rc = dev_pm_set_wake_irq(&pdev->dev, rtc_dd->rtc_alarm_irq);
+       rc = dev_pm_set_wake_irq(&pdev->dev, rtc_dd->alarm_irq);
        if (rc)
                return rc;
 
@@ -559,3 +551,4 @@ MODULE_ALIAS("platform:rtc-pm8xxx");
 MODULE_DESCRIPTION("PMIC8xxx RTC driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Anirudh Ghayal <aghayal@codeaurora.org>");
+MODULE_AUTHOR("Johan Hovold <johan@kernel.org>");
index b0099e26e3b05fbfccb3927b1b96a638ef79dbca..ec5d7a614e2dd25ba5b5a5e3e76e558ca3fbcf33 100644 (file)
@@ -982,6 +982,12 @@ static int rv3028_probe(struct i2c_client *client)
        return 0;
 }
 
+static const struct acpi_device_id rv3028_i2c_acpi_match[] = {
+       { "MCRY3028" },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, rv3028_i2c_acpi_match);
+
 static const __maybe_unused struct of_device_id rv3028_of_match[] = {
        { .compatible = "microcrystal,rv3028", },
        { }
@@ -991,6 +997,7 @@ MODULE_DEVICE_TABLE(of, rv3028_of_match);
 static struct i2c_driver rv3028_driver = {
        .driver = {
                .name = "rtc-rv3028",
+               .acpi_match_table = rv3028_i2c_acpi_match,
                .of_match_table = of_match_ptr(rv3028_of_match),
        },
        .probe_new      = rv3028_probe,
index e4fdd47ae066c0d5be83086441a430035587b992..0852f6709a8591cf374056f932ff99aed9c2e769 100644 (file)
@@ -735,9 +735,14 @@ static int rv3029_probe(struct device *dev, struct regmap *regmap, int irq,
                return PTR_ERR(rv3029->rtc);
 
        if (rv3029->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(dev))
+                       irqflags = 0;
+
                rc = devm_request_threaded_irq(dev, rv3029->irq,
                                               NULL, rv3029_handle_irq,
-                                              IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                              irqflags | IRQF_ONESHOT,
                                               "rv3029", dev);
                if (rc) {
                        dev_warn(dev, "unable to request IRQ, alarms disabled\n");
index c3bee305eacc6ba4735551017a2a4bd691915e5a..1ff4f2e6fa77e78c3f6368ee09e9d9fb2378fbe9 100644 (file)
@@ -930,9 +930,14 @@ static int rv3032_probe(struct i2c_client *client)
                return PTR_ERR(rv3032->rtc);
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, rv3032_handle_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                "rv3032", rv3032);
                if (ret) {
                        dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
@@ -975,6 +980,12 @@ static int rv3032_probe(struct i2c_client *client)
        return 0;
 }
 
+static const struct acpi_device_id rv3032_i2c_acpi_match[] = {
+       { "MCRY3032" },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, rv3032_i2c_acpi_match);
+
 static const __maybe_unused struct of_device_id rv3032_of_match[] = {
        { .compatible = "microcrystal,rv3032", },
        { }
@@ -984,6 +995,7 @@ MODULE_DEVICE_TABLE(of, rv3032_of_match);
 static struct i2c_driver rv3032_driver = {
        .driver = {
                .name = "rtc-rv3032",
+               .acpi_match_table = rv3032_i2c_acpi_match,
                .of_match_table = of_match_ptr(rv3032_of_match),
        },
        .probe_new      = rv3032_probe,
index b581b6d5ad731730bbe1f4e18601917cf26f541e..25c3b9e4f515af3dcf6c68d7f689036203935264 100644 (file)
@@ -70,6 +70,7 @@ struct rv8803_data {
        struct mutex flags_lock;
        u8 ctrl;
        u8 backup;
+       u8 alarm_invalid:1;
        enum rv8803_type type;
 };
 
@@ -165,13 +166,13 @@ static int rv8803_regs_init(struct rv8803_data *rv8803)
 
 static int rv8803_regs_configure(struct rv8803_data *rv8803);
 
-static int rv8803_regs_reset(struct rv8803_data *rv8803)
+static int rv8803_regs_reset(struct rv8803_data *rv8803, bool full)
 {
        /*
         * The RV-8803 resets all registers to POR defaults after voltage-loss,
         * the Epson RTCs don't, so we manually reset the remainder here.
         */
-       if (rv8803->type == rx_8803 || rv8803->type == rx_8900) {
+       if (full || rv8803->type == rx_8803 || rv8803->type == rx_8900) {
                int ret = rv8803_regs_init(rv8803);
                if (ret)
                        return ret;
@@ -238,6 +239,11 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm)
        u8 *date = date1;
        int ret, flags;
 
+       if (rv8803->alarm_invalid) {
+               dev_warn(dev, "Corruption detected, data may be invalid.\n");
+               return -EINVAL;
+       }
+
        flags = rv8803_read_reg(rv8803->client, RV8803_FLAG);
        if (flags < 0)
                return flags;
@@ -313,12 +319,19 @@ static int rv8803_set_time(struct device *dev, struct rtc_time *tm)
                return flags;
        }
 
-       if (flags & RV8803_FLAG_V2F) {
-               ret = rv8803_regs_reset(rv8803);
+       if ((flags & RV8803_FLAG_V2F) || rv8803->alarm_invalid) {
+               /*
+                * If we sense corruption in the alarm registers, but see no
+                * voltage loss flag, we can't rely on other registers having
+                * sensible values. Reset them fully.
+                */
+               ret = rv8803_regs_reset(rv8803, rv8803->alarm_invalid);
                if (ret) {
                        mutex_unlock(&rv8803->flags_lock);
                        return ret;
                }
+
+               rv8803->alarm_invalid = false;
        }
 
        ret = rv8803_write_reg(rv8803->client, RV8803_FLAG,
@@ -344,15 +357,33 @@ static int rv8803_get_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        if (flags < 0)
                return flags;
 
+       alarmvals[0] &= 0x7f;
+       alarmvals[1] &= 0x3f;
+       alarmvals[2] &= 0x3f;
+
+       if (!bcd_is_valid(alarmvals[0]) ||
+           !bcd_is_valid(alarmvals[1]) ||
+           !bcd_is_valid(alarmvals[2]))
+               goto err_invalid;
+
        alrm->time.tm_sec  = 0;
-       alrm->time.tm_min  = bcd2bin(alarmvals[0] & 0x7f);
-       alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f);
-       alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f);
+       alrm->time.tm_min  = bcd2bin(alarmvals[0]);
+       alrm->time.tm_hour = bcd2bin(alarmvals[1]);
+       alrm->time.tm_mday = bcd2bin(alarmvals[2]);
 
        alrm->enabled = !!(rv8803->ctrl & RV8803_CTRL_AIE);
        alrm->pending = (flags & RV8803_FLAG_AF) && alrm->enabled;
 
+       if ((unsigned int)alrm->time.tm_mday > 31 ||
+           (unsigned int)alrm->time.tm_hour >= 24 ||
+           (unsigned int)alrm->time.tm_min >= 60)
+               goto err_invalid;
+
        return 0;
+
+err_invalid:
+       rv8803->alarm_invalid = true;
+       return -EINVAL;
 }
 
 static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -641,9 +672,14 @@ static int rv8803_probe(struct i2c_client *client)
                return PTR_ERR(rv8803->rtc);
 
        if (client->irq > 0) {
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                err = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, rv8803_handle_irq,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                "rv8803", client);
                if (err) {
                        dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
index 76a49838014ba31643210571c98b55d4efd970ad..37608883a796d5be0349ce6d7675f84f3f5f227a 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
index d090565707390daf27f6411336c51606298e6692..b9c8dad2620853ff0398f477d02bad2a96bc5426 100644 (file)
@@ -394,10 +394,14 @@ static int rx8010_probe(struct i2c_client *client)
                return PTR_ERR(rx8010->rtc);
 
        if (client->irq > 0) {
-               dev_info(dev, "IRQ %d supplied\n", client->irq);
+               unsigned long irqflags = IRQF_TRIGGER_LOW;
+
+               if (dev_fwnode(&client->dev))
+                       irqflags = 0;
+
                err = devm_request_threaded_irq(dev, client->irq, NULL,
                                                rx8010_irq_1_handler,
-                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               irqflags | IRQF_ONESHOT,
                                                "rx8010", client);
                if (err) {
                        dev_err(dev, "unable to request IRQ\n");
index ed5516089e9a05986fd7ff090c6eab669115eded..7038f47d77ff4fcd79eeedd0dc52218824c42486 100644 (file)
@@ -136,7 +136,6 @@ struct sun6i_rtc_clk_data {
        unsigned int fixed_prescaler : 16;
        unsigned int has_prescaler : 1;
        unsigned int has_out_clk : 1;
-       unsigned int export_iosc : 1;
        unsigned int has_losc_en : 1;
        unsigned int has_auto_swt : 1;
 };
@@ -271,10 +270,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
        /* Yes, I know, this is ugly. */
        sun6i_rtc = rtc;
 
-       /* Only read IOSC name from device tree if it is exported */
-       if (rtc->data->export_iosc)
-               of_property_read_string_index(node, "clock-output-names", 2,
-                                             &iosc_name);
+       of_property_read_string_index(node, "clock-output-names", 2,
+                                     &iosc_name);
 
        rtc->int_osc = clk_hw_register_fixed_rate_with_accuracy(NULL,
                                                                iosc_name,
@@ -315,13 +312,10 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
                goto err_register;
        }
 
-       clk_data->num = 2;
+       clk_data->num = 3;
        clk_data->hws[0] = &rtc->hw;
        clk_data->hws[1] = __clk_get_hw(rtc->ext_losc);
-       if (rtc->data->export_iosc) {
-               clk_data->hws[2] = rtc->int_osc;
-               clk_data->num = 3;
-       }
+       clk_data->hws[2] = rtc->int_osc;
        of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
        return;
 
@@ -361,7 +355,6 @@ static const struct sun6i_rtc_clk_data sun8i_h3_rtc_data = {
        .fixed_prescaler = 32,
        .has_prescaler = 1,
        .has_out_clk = 1,
-       .export_iosc = 1,
 };
 
 static void __init sun8i_h3_rtc_clk_init(struct device_node *node)
@@ -379,7 +372,6 @@ static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
        .fixed_prescaler = 32,
        .has_prescaler = 1,
        .has_out_clk = 1,
-       .export_iosc = 1,
        .has_losc_en = 1,
        .has_auto_swt = 1,
 };
index a32457b4cbb8a9125f0dbcd29dda90a6d28c0437..2637fe1df72775205f922b0a8e4d31e2aacbbd0d 100644 (file)
@@ -29,8 +29,8 @@ static void __ap_flush_queue(struct ap_queue *aq);
  */
 static int ap_queue_enable_irq(struct ap_queue *aq, void *ind)
 {
+       union ap_qirq_ctrl qirqctrl = { .value = 0 };
        struct ap_queue_status status;
-       struct ap_qirq_ctrl qirqctrl = { 0 };
 
        qirqctrl.ir = 1;
        qirqctrl.isc = AP_ISC;
index 28a36e016ea9105aca425ab6e176ffbb07051a02..72e10abb103a09cc194ae3ccbd1d084f4b47ff83 100644 (file)
@@ -301,7 +301,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
  */
 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
 {
-       struct ap_qirq_ctrl aqic_gisa = {};
+       union ap_qirq_ctrl aqic_gisa = { .value = 0 };
        struct ap_queue_status status;
        int retries = 5;
 
@@ -384,7 +384,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
                                                 int isc,
                                                 struct kvm_vcpu *vcpu)
 {
-       struct ap_qirq_ctrl aqic_gisa = {};
+       union ap_qirq_ctrl aqic_gisa = { .value = 0 };
        struct ap_queue_status status = {};
        struct kvm_s390_gisa *gisa;
        struct page *h_page;
index cbc3b62cd9e5987a30c84e40176afca266e42c69..d904625afd4087536c86381126035198df5e4495 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Debug traces for zfcp.
  *
- * Copyright IBM Corp. 2002, 2020
+ * Copyright IBM Corp. 2002, 2023
  */
 
 #define KMSG_COMPONENT "zfcp"
@@ -145,6 +145,48 @@ void zfcp_dbf_hba_fsf_fces(char *tag, const struct zfcp_fsf_req *req, u64 wwpn,
        spin_unlock_irqrestore(&dbf->hba_lock, flags);
 }
 
+/**
+ * zfcp_dbf_hba_fsf_reqid - trace only the tag and a request ID
+ * @tag: tag documenting the source
+ * @level: trace level
+ * @adapter: adapter instance the request ID belongs to
+ * @req_id: the request ID to trace
+ */
+void zfcp_dbf_hba_fsf_reqid(const char *const tag, const int level,
+                           struct zfcp_adapter *const adapter,
+                           const u64 req_id)
+{
+       struct zfcp_dbf *const dbf = adapter->dbf;
+       struct zfcp_dbf_hba *const rec = &dbf->hba_buf;
+       struct zfcp_dbf_hba_res *const res = &rec->u.res;
+       unsigned long flags;
+
+       if (unlikely(!debug_level_enabled(dbf->hba, level)))
+               return;
+
+       spin_lock_irqsave(&dbf->hba_lock, flags);
+       memset(rec, 0, sizeof(*rec));
+
+       memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN);
+
+       rec->id = ZFCP_DBF_HBA_RES;
+       rec->fsf_req_id = req_id;
+       rec->fsf_req_status = ~0u;
+       rec->fsf_cmd = ~0u;
+       rec->fsf_seq_no = ~0u;
+
+       res->req_issued = ~0ull;
+       res->prot_status = ~0u;
+       memset(res->prot_status_qual, 0xff, sizeof(res->prot_status_qual));
+       res->fsf_status = ~0u;
+       memset(res->fsf_status_qual, 0xff, sizeof(res->fsf_status_qual));
+       res->port_handle = ~0u;
+       res->lun_handle = ~0u;
+
+       debug_event(dbf->hba, level, rec, sizeof(*rec));
+       spin_unlock_irqrestore(&dbf->hba_lock, flags);
+}
+
 /**
  * zfcp_dbf_hba_fsf_uss - trace event for an unsolicited status buffer
  * @tag: tag indicating which kind of unsolicited status has been received
@@ -649,7 +691,7 @@ void zfcp_dbf_scsi_common(char *tag, int level, struct scsi_device *sdev,
                rec->scsi_id = sc->device->id;
                rec->scsi_lun = (u32)sc->device->lun;
                rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32);
-               rec->host_scribble = (unsigned long)sc->host_scribble;
+               rec->host_scribble = (u64)sc->host_scribble;
 
                memcpy(rec->scsi_opcode, sc->cmnd,
                       min_t(int, sc->cmd_len, ZFCP_DBF_SCSI_OPCODE));
index 94de55304a02b79464aef2b02d54e6ce9278eb32..6c761299a22f41de77e55e48aa9e49a78ba9c672 100644 (file)
@@ -129,7 +129,7 @@ struct zfcp_erp_action {
        struct scsi_device *sdev;
        u32             status;       /* recovery status */
        enum zfcp_erp_steps     step;   /* active step of this erp action */
-       unsigned long           fsf_req_id;
+       u64                     fsf_req_id;
        struct timer_list timer;
 };
 
@@ -163,7 +163,7 @@ struct zfcp_adapter {
        struct Scsi_Host        *scsi_host;        /* Pointer to mid-layer */
        struct list_head        port_list;         /* remote port list */
        rwlock_t                port_list_lock;    /* port list lock */
-       unsigned long           req_no;            /* unique FSF req number */
+       u64                     req_no;            /* unique FSF req number */
        struct zfcp_reqlist     *req_list;
        u32                     fsf_req_seq_no;    /* FSF cmnd seq number */
        rwlock_t                abort_lock;        /* Protects against SCSI
@@ -325,7 +325,7 @@ static inline u64 zfcp_scsi_dev_lun(struct scsi_device *sdev)
  */
 struct zfcp_fsf_req {
        struct list_head        list;
-       unsigned long           req_id;
+       u64                     req_id;
        struct zfcp_adapter     *adapter;
        struct zfcp_qdio_req    qdio_req;
        struct completion       completion;
index c302cbb18a55c604f2f922920605ef5868857cd3..9f5152b42b0ea28cc68427575a312565b8fd0cf5 100644 (file)
@@ -4,7 +4,7 @@
  *
  * External function declarations.
  *
- * Copyright IBM Corp. 2002, 2020
+ * Copyright IBM Corp. 2002, 2023
  */
 
 #ifndef ZFCP_EXT_H
@@ -46,6 +46,9 @@ extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *);
 extern void zfcp_dbf_hba_fsf_fces(char *tag, const struct zfcp_fsf_req *req,
                                  u64 wwpn, u32 fc_security_old,
                                  u32 fc_security_new);
+extern void zfcp_dbf_hba_fsf_reqid(const char *const tag, const int level,
+                                  struct zfcp_adapter *const adapter,
+                                  const u64 req_id);
 extern void zfcp_dbf_hba_bit_err(char *, struct zfcp_fsf_req *);
 extern void zfcp_dbf_hba_def_err(struct zfcp_adapter *, u64, u16, void **);
 extern void zfcp_dbf_san_req(char *, struct zfcp_fsf_req *, u32);
index ab3ea529cca709377a95e0a8f2b52589a18c779e..ceed1b6f7cb6114a50d241b522aafccf61a295b7 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Implementation of FSF commands.
  *
- * Copyright IBM Corp. 2002, 2020
+ * Copyright IBM Corp. 2002, 2023
  */
 
 #define KMSG_COMPONENT "zfcp"
@@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
        const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req);
        struct zfcp_adapter *adapter = req->adapter;
        struct zfcp_qdio *qdio = adapter->qdio;
-       unsigned long req_id = req->req_id;
+       u64 req_id = req->req_id;
 
        zfcp_reqlist_add(adapter->req_list, req);
 
@@ -892,8 +892,11 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
        req->issued = get_tod_clock();
        if (zfcp_qdio_send(qdio, &req->qdio_req)) {
                del_timer_sync(&req->timer);
+
                /* lookup request again, list might have changed */
-               zfcp_reqlist_find_rm(adapter->req_list, req_id);
+               if (zfcp_reqlist_find_rm(adapter->req_list, req_id) == NULL)
+                       zfcp_dbf_hba_fsf_reqid("fsrsrmf", 1, adapter, req_id);
+
                zfcp_erp_adapter_reopen(adapter, 0, "fsrs__1");
                return -EIO;
        }
@@ -1042,7 +1045,7 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd)
        struct scsi_device *sdev = scmnd->device;
        struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
        struct zfcp_qdio *qdio = zfcp_sdev->port->adapter->qdio;
-       unsigned long old_req_id = (unsigned long) scmnd->host_scribble;
+       u64 old_req_id = (u64) scmnd->host_scribble;
 
        spin_lock_irq(&qdio->req_q_lock);
        if (zfcp_qdio_sbal_get(qdio))
@@ -1065,7 +1068,7 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd)
        req->handler = zfcp_fsf_abort_fcp_command_handler;
        req->qtcb->header.lun_handle = zfcp_sdev->lun_handle;
        req->qtcb->header.port_handle = zfcp_sdev->port->handle;
-       req->qtcb->bottom.support.req_handle = (u64) old_req_id;
+       req->qtcb->bottom.support.req_handle = old_req_id;
 
        zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
        if (!zfcp_fsf_req_send(req)) {
@@ -1919,7 +1922,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
        struct zfcp_qdio *qdio = wka_port->adapter->qdio;
        struct zfcp_fsf_req *req;
-       unsigned long req_id = 0;
+       u64 req_id = 0;
        int retval = -EIO;
 
        spin_lock_irq(&qdio->req_q_lock);
@@ -1978,7 +1981,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
        struct zfcp_qdio *qdio = wka_port->adapter->qdio;
        struct zfcp_fsf_req *req;
-       unsigned long req_id = 0;
+       u64 req_id = 0;
        int retval = -EIO;
 
        spin_lock_irq(&qdio->req_q_lock);
@@ -2587,6 +2590,7 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd)
                goto out;
        }
 
+       BUILD_BUG_ON(sizeof(scsi_cmnd->host_scribble) < sizeof(req->req_id));
        scsi_cmnd->host_scribble = (unsigned char *) req->req_id;
 
        io = &req->qtcb->bottom.io;
@@ -2732,7 +2736,7 @@ void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx)
        struct qdio_buffer *sbal = qdio->res_q[sbal_idx];
        struct qdio_buffer_element *sbale;
        struct zfcp_fsf_req *fsf_req;
-       unsigned long req_id;
+       u64 req_id;
        int idx;
 
        for (idx = 0; idx < QDIO_MAX_ELEMENTS_PER_BUFFER; idx++) {
@@ -2747,7 +2751,7 @@ void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx)
                         * corruption and must stop the machine immediately.
                         */
                        zfcp_qdio_siosl(adapter);
-                       panic("error: unknown req_id (%lx) on adapter %s.\n",
+                       panic("error: unknown req_id (%llx) on adapter %s.\n",
                              req_id, dev_name(&adapter->ccw_device->dev));
                }
 
index 390706867df357a77e98aaa167e4a4de71e7b3df..90134d9b69a77adba7044c91042cc08e0135e631 100644 (file)
@@ -115,7 +115,7 @@ zfcp_qdio_sbale_curr(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req)
  */
 static inline
 void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req,
-                       unsigned long req_id, u8 sbtype, void *data, u32 len)
+                       u64 req_id, u8 sbtype, void *data, u32 len)
 {
        struct qdio_buffer_element *sbale;
        int count = min(atomic_read(&qdio->req_q_free),
index 9b8ff249e31ca9aa45ba3f29206d9b6a0cb4d43d..59fbb1b128cb9aadb74178dd2251413b59f7d9ad 100644 (file)
@@ -5,14 +5,16 @@
  * Data structure and helper functions for tracking pending FSF
  * requests.
  *
- * Copyright IBM Corp. 2009, 2016
+ * Copyright IBM Corp. 2009, 2023
  */
 
 #ifndef ZFCP_REQLIST_H
 #define ZFCP_REQLIST_H
 
+#include <linux/types.h>
+
 /* number of hash buckets */
-#define ZFCP_REQ_LIST_BUCKETS 128
+#define ZFCP_REQ_LIST_BUCKETS 128u
 
 /**
  * struct zfcp_reqlist - Container for request list (reqlist)
@@ -24,7 +26,7 @@ struct zfcp_reqlist {
        struct list_head buckets[ZFCP_REQ_LIST_BUCKETS];
 };
 
-static inline int zfcp_reqlist_hash(unsigned long req_id)
+static inline size_t zfcp_reqlist_hash(u64 req_id)
 {
        return req_id % ZFCP_REQ_LIST_BUCKETS;
 }
@@ -37,7 +39,7 @@ static inline int zfcp_reqlist_hash(unsigned long req_id)
  */
 static inline struct zfcp_reqlist *zfcp_reqlist_alloc(void)
 {
-       unsigned int i;
+       size_t i;
        struct zfcp_reqlist *rl;
 
        rl = kzalloc(sizeof(struct zfcp_reqlist), GFP_KERNEL);
@@ -60,7 +62,7 @@ static inline struct zfcp_reqlist *zfcp_reqlist_alloc(void)
  */
 static inline int zfcp_reqlist_isempty(struct zfcp_reqlist *rl)
 {
-       unsigned int i;
+       size_t i;
 
        for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++)
                if (!list_empty(&rl->buckets[i]))
@@ -81,10 +83,10 @@ static inline void zfcp_reqlist_free(struct zfcp_reqlist *rl)
 }
 
 static inline struct zfcp_fsf_req *
-_zfcp_reqlist_find(struct zfcp_reqlist *rl, unsigned long req_id)
+_zfcp_reqlist_find(struct zfcp_reqlist *rl, u64 req_id)
 {
        struct zfcp_fsf_req *req;
-       unsigned int i;
+       size_t i;
 
        i = zfcp_reqlist_hash(req_id);
        list_for_each_entry(req, &rl->buckets[i], list)
@@ -102,7 +104,7 @@ _zfcp_reqlist_find(struct zfcp_reqlist *rl, unsigned long req_id)
  * or NULL if there is no known FSF request with this id.
  */
 static inline struct zfcp_fsf_req *
-zfcp_reqlist_find(struct zfcp_reqlist *rl, unsigned long req_id)
+zfcp_reqlist_find(struct zfcp_reqlist *rl, u64 req_id)
 {
        unsigned long flags;
        struct zfcp_fsf_req *req;
@@ -127,7 +129,7 @@ zfcp_reqlist_find(struct zfcp_reqlist *rl, unsigned long req_id)
  * NULL if it has not been found.
  */
 static inline struct zfcp_fsf_req *
-zfcp_reqlist_find_rm(struct zfcp_reqlist *rl, unsigned long req_id)
+zfcp_reqlist_find_rm(struct zfcp_reqlist *rl, u64 req_id)
 {
        unsigned long flags;
        struct zfcp_fsf_req *req;
@@ -154,7 +156,7 @@ zfcp_reqlist_find_rm(struct zfcp_reqlist *rl, unsigned long req_id)
 static inline void zfcp_reqlist_add(struct zfcp_reqlist *rl,
                                    struct zfcp_fsf_req *req)
 {
-       unsigned int i;
+       size_t i;
        unsigned long flags;
 
        i = zfcp_reqlist_hash(req->req_id);
@@ -172,7 +174,7 @@ static inline void zfcp_reqlist_add(struct zfcp_reqlist *rl,
 static inline void zfcp_reqlist_move(struct zfcp_reqlist *rl,
                                     struct list_head *list)
 {
-       unsigned int i;
+       size_t i;
        unsigned long flags;
 
        spin_lock_irqsave(&rl->lock, flags);
@@ -200,7 +202,7 @@ zfcp_reqlist_apply_for_all(struct zfcp_reqlist *rl,
 {
        struct zfcp_fsf_req *req;
        unsigned long flags;
-       unsigned int i;
+       size_t i;
 
        spin_lock_irqsave(&rl->lock, flags);
        for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++)
index 526ac240d9fe8e3eb93deec97b97cbf4514474cf..3dbf4b21d127636ec96de9027a87935cf61397ad 100644 (file)
@@ -170,7 +170,7 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
                (struct zfcp_adapter *) scsi_host->hostdata[0];
        struct zfcp_fsf_req *old_req, *abrt_req;
        unsigned long flags;
-       unsigned long old_reqid = (unsigned long) scpnt->host_scribble;
+       u64 old_reqid = (u64) scpnt->host_scribble;
        int retval = SUCCESS, ret;
        int retry = 3;
        char *dbf_tag;
index 3687b5c0cf905827aa4ba70fbc21d42c15e7b3f7..d8fc7beafa205a390633d6e1c223f15cabfc59e8 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/scatterlist.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
-#include <linux/version.h>
 #include <scsi/scsi_device.h>
 #include <scsi/libiscsi_tcp.h>
 
index 12346e2297fdb97903e68df29d9aea9849103cb9..f7f62e56afcae6db20711357ed93242562ddcfed 100644 (file)
@@ -181,6 +181,7 @@ void scsi_remove_host(struct Scsi_Host *shost)
        scsi_forget_host(shost);
        mutex_unlock(&shost->scan_mutex);
        scsi_proc_host_rm(shost);
+       scsi_proc_hostdir_rm(shost->hostt);
 
        /*
         * New SCSI devices cannot be attached anymore because of the SCSI host
@@ -340,6 +341,7 @@ static void scsi_host_dev_release(struct device *dev)
        struct Scsi_Host *shost = dev_to_shost(dev);
        struct device *parent = dev->parent;
 
+       /* In case scsi_remove_host() has not been called. */
        scsi_proc_hostdir_rm(shost->hostt);
 
        /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
@@ -356,7 +358,7 @@ static void scsi_host_dev_release(struct device *dev)
                /*
                 * Free the shost_dev device name here if scsi_host_alloc()
                 * and scsi_host_put() have been called but neither
-                * scsi_host_add() nor scsi_host_remove() has been called.
+                * scsi_host_add() nor scsi_remove_host() has been called.
                 * This avoids that the memory allocated for the shost_dev
                 * name is leaked.
                 */
index 198d3f20d682887cf550b2d6bfadcc95b4db7cb1..c74053f0b72f464d2cae1cf694a116d932d22501 100644 (file)
@@ -1516,23 +1516,22 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
 }
 
 /**
- * strip_and_pad_whitespace - Strip and pad trailing whitespace.
- * @i:         index into buffer
- * @buf:               string to modify
+ * strip_whitespace - Strip and pad trailing whitespace.
+ * @i:         size of buffer
+ * @buf:       string to modify
  *
- * This function will strip all trailing whitespace, pad the end
- * of the string with a single space, and NULL terminate the string.
+ * This function will strip all trailing whitespace and
+ * NUL terminate the string.
  *
- * Return value:
- *     new length of string
  **/
-static int strip_and_pad_whitespace(int i, char *buf)
+static void strip_whitespace(int i, char *buf)
 {
+       if (i < 1)
+               return;
+       i--;
        while (i && buf[i] == ' ')
                i--;
-       buf[i+1] = ' ';
-       buf[i+2] = '\0';
-       return i + 2;
+       buf[i+1] = '\0';
 }
 
 /**
@@ -1547,19 +1546,21 @@ static int strip_and_pad_whitespace(int i, char *buf)
 static void ipr_log_vpd_compact(char *prefix, struct ipr_hostrcb *hostrcb,
                                struct ipr_vpd *vpd)
 {
-       char buffer[IPR_VENDOR_ID_LEN + IPR_PROD_ID_LEN + IPR_SERIAL_NUM_LEN + 3];
-       int i = 0;
+       char vendor_id[IPR_VENDOR_ID_LEN + 1];
+       char product_id[IPR_PROD_ID_LEN + 1];
+       char sn[IPR_SERIAL_NUM_LEN + 1];
 
-       memcpy(buffer, vpd->vpids.vendor_id, IPR_VENDOR_ID_LEN);
-       i = strip_and_pad_whitespace(IPR_VENDOR_ID_LEN - 1, buffer);
+       memcpy(vendor_id, vpd->vpids.vendor_id, IPR_VENDOR_ID_LEN);
+       strip_whitespace(IPR_VENDOR_ID_LEN, vendor_id);
 
-       memcpy(&buffer[i], vpd->vpids.product_id, IPR_PROD_ID_LEN);
-       i = strip_and_pad_whitespace(i + IPR_PROD_ID_LEN - 1, buffer);
+       memcpy(product_id, vpd->vpids.product_id, IPR_PROD_ID_LEN);
+       strip_whitespace(IPR_PROD_ID_LEN, product_id);
 
-       memcpy(&buffer[i], vpd->sn, IPR_SERIAL_NUM_LEN);
-       buffer[IPR_SERIAL_NUM_LEN + i] = '\0';
+       memcpy(sn, vpd->sn, IPR_SERIAL_NUM_LEN);
+       strip_whitespace(IPR_SERIAL_NUM_LEN, sn);
 
-       ipr_hcam_err(hostrcb, "%s VPID/SN: %s\n", prefix, buffer);
+       ipr_hcam_err(hostrcb, "%s VPID/SN: %s %s %s\n", prefix,
+                    vendor_id, product_id, sn);
 }
 
 /**
@@ -9495,11 +9496,10 @@ static pci_ers_result_t ipr_pci_error_detected(struct pci_dev *pdev,
  * This function takes care of initilizing the adapter to the point
  * where it can accept new commands.
  * Return value:
- *     0 on success / -EIO on failure
+ *     none
  **/
-static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
+static void ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
 {
-       int rc = 0;
        unsigned long host_lock_flags = 0;
 
        ENTER;
@@ -9515,7 +9515,6 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 
        LEAVE;
-       return rc;
 }
 
 /**
@@ -10558,12 +10557,7 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
                return rc;
 
        ioa_cfg = pci_get_drvdata(pdev);
-       rc = ipr_probe_ioa_part2(ioa_cfg);
-
-       if (rc) {
-               __ipr_remove(pdev);
-               return rc;
-       }
+       ipr_probe_ioa_part2(ioa_cfg);
 
        rc = scsi_add_host(ioa_cfg->host, &pdev->dev);
 
index 76c3434f897669b1be7f63c7cce5d9b2f97da9c7..22f2e046e8eb704998f5866ab1e62da7481f3226 100644 (file)
@@ -2541,7 +2541,7 @@ lpfc_sriov_hw_max_virtfn_show(struct device *dev,
 
 /**
  * lpfc_enable_bbcr_set: Sets an attribute value.
- * @phba: pointer the the adapter structure.
+ * @phba: pointer to the adapter structure.
  * @val: integer attribute value.
  *
  * Description:
@@ -2632,7 +2632,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \
  * takes a default argument, a minimum and maximum argument.
  *
  * lpfc_##attr##_init: Initializes an attribute.
- * @phba: pointer the the adapter structure.
+ * @phba: pointer to the adapter structure.
  * @val: integer attribute value.
  *
  * Validates the min and max values then sets the adapter config field
@@ -2665,7 +2665,7 @@ lpfc_##attr##_init(struct lpfc_hba *phba, uint val) \
  * into a function with the name lpfc_hba_queue_depth_set
  *
  * lpfc_##attr##_set: Sets an attribute value.
- * @phba: pointer the the adapter structure.
+ * @phba: pointer to the adapter structure.
  * @val: integer attribute value.
  *
  * Description:
@@ -2794,7 +2794,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \
  * lpfc_##attr##_init: validates the min and max values then sets the
  * adapter config field accordingly, or uses the default if out of range
  * and prints an error message.
- * @phba: pointer the the adapter structure.
+ * @phba: pointer to the adapter structure.
  * @val: integer attribute value.
  *
  * Returns:
@@ -2826,7 +2826,7 @@ lpfc_##attr##_init(struct lpfc_vport *vport, uint val) \
  * lpfc_##attr##_set: validates the min and max values then sets the
  * adapter config field if in the valid range. prints error message
  * and does not set the parameter if invalid.
- * @phba: pointer the the adapter structure.
+ * @phba: pointer to the adapter structure.
  * @val:       integer attribute value.
  *
  * Returns:
index 569639dc8b2c8e162a712b727a9762a2cc77b911..35b252f1ef7311bb790ce5d448de889d46837960 100644 (file)
@@ -8886,7 +8886,7 @@ reject_out:
  * @rrq: Pointer to the rrq struct.
  *
  * Build a ELS RRQ command and send it to the target. If the issue_iocb is
- * Successful the the completion handler will clear the RRQ.
+ * successful, the completion handler will clear the RRQ.
  *
  * Return codes
  *   0 - Successfully sent rrq els iocb.
@@ -10287,7 +10287,7 @@ lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
                /* Send every descriptor individually to the upper layer */
                if (deliver)
                        fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
-                                        fpin_length, (char *)fpin);
+                                        fpin_length, (char *)fpin, 0);
                desc_cnt++;
        }
 }
index a6df0a5b40060abe075e3c70e2a86838be5a356c..66cd0b1dbbd027e693133633da01a9292ce88de9 100644 (file)
@@ -2459,7 +2459,7 @@ static void lpfc_sli4_fcf_pri_list_del(struct lpfc_hba *phba,
  * @phba: pointer to lpfc hba data structure.
  * @fcf_index: the index of the fcf record to update
  * This routine acquires the hbalock and then set the LPFC_FCF_FLOGI_FAILED
- * flag so the the round robin slection for the particular priority level
+ * flag so the round robin selection for the particular priority level
  * will try a different fcf record that does not have this bit set.
  * If the fcf record is re-read for any reason this flag is cleared brfore
  * adding it to the priority list.
index 6eb4085a3a222d9b295f0311fc4dd658cc11561f..73b544bfbb2e64376cc0b0bd9ac90ac3b0c2a540 100644 (file)
@@ -5502,7 +5502,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
        bf_set(lpfc_mbx_read_top_link_spd, la,
               (bf_get(lpfc_acqe_link_speed, acqe_link)));
 
-       /* Fake the the following irrelvant fields */
+       /* Fake the following irrelevant fields */
        bf_set(lpfc_mbx_read_top_topology, la, LPFC_TOPOLOGY_PT_PT);
        bf_set(lpfc_mbx_read_top_alpa_granted, la, 0);
        bf_set(lpfc_mbx_read_top_il, la, 0);
@@ -12549,7 +12549,7 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
                        /* Mark CPU as IRQ not assigned by the kernel */
                        cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
 
-                       /* If so, find a new_cpup thats on the the SAME
+                       /* If so, find a new_cpup that is on the SAME
                         * phys_id as cpup. start_cpu will start where we
                         * left off so all unassigned entries don't get assgined
                         * the IRQ of the first entry.
@@ -12563,7 +12563,7 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
                                        goto found_same;
                                new_cpu = cpumask_next(
                                        new_cpu, cpu_present_mask);
-                               if (new_cpu == nr_cpumask_bits)
+                               if (new_cpu >= nr_cpu_ids)
                                        new_cpu = first_cpu;
                        }
                        /* At this point, we leave the CPU as unassigned */
@@ -12577,7 +12577,7 @@ found_same:
                         * selecting the same IRQ.
                         */
                        start_cpu = cpumask_next(new_cpu, cpu_present_mask);
-                       if (start_cpu == nr_cpumask_bits)
+                       if (start_cpu >= nr_cpu_ids)
                                start_cpu = first_cpu;
 
                        lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -12613,7 +12613,7 @@ found_same:
                                        goto found_any;
                                new_cpu = cpumask_next(
                                        new_cpu, cpu_present_mask);
-                               if (new_cpu == nr_cpumask_bits)
+                               if (new_cpu >= nr_cpu_ids)
                                        new_cpu = first_cpu;
                        }
                        /* We should never leave an entry unassigned */
@@ -12631,7 +12631,7 @@ found_any:
                         * selecting the same IRQ.
                         */
                        start_cpu = cpumask_next(new_cpu, cpu_present_mask);
-                       if (start_cpu == nr_cpumask_bits)
+                       if (start_cpu >= nr_cpu_ids)
                                start_cpu = first_cpu;
 
                        lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -12704,7 +12704,7 @@ found_any:
                                goto found_hdwq;
                        }
                        new_cpu = cpumask_next(new_cpu, cpu_present_mask);
-                       if (new_cpu == nr_cpumask_bits)
+                       if (new_cpu >= nr_cpu_ids)
                                new_cpu = first_cpu;
                }
 
@@ -12719,7 +12719,7 @@ found_any:
                                goto found_hdwq;
 
                        new_cpu = cpumask_next(new_cpu, cpu_present_mask);
-                       if (new_cpu == nr_cpumask_bits)
+                       if (new_cpu >= nr_cpu_ids)
                                new_cpu = first_cpu;
                }
 
@@ -12730,7 +12730,7 @@ found_any:
  found_hdwq:
                /* We found an available entry, copy the IRQ info */
                start_cpu = cpumask_next(new_cpu, cpu_present_mask);
-               if (start_cpu == nr_cpumask_bits)
+               if (start_cpu >= nr_cpu_ids)
                        start_cpu = first_cpu;
                cpup->hdwq = new_cpup->hdwq;
  logit:
index 9858b174376970c8394997ad665c7bda4db74bcd..0dfdc0c4c08c65ef67895db061fcfc5490bbd7e6 100644 (file)
@@ -2509,7 +2509,7 @@ lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
  * information via a READ_FCF mailbox command. This mailbox command also is used
  * to indicate where received unsolicited frames from this FCF will be sent. By
  * default this routine will set up the FCF to forward all unsolicited frames
- * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * to the RQ ID passed in the @phba. This can be overridden by the caller for
  * more complicated setups.
  **/
 void
@@ -2577,7 +2577,7 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
  * information via a READ_FCF mailbox command. This mailbox command also is used
  * to indicate where received unsolicited frames from this FCF will be sent. By
  * default this routine will set up the FCF to forward all unsolicited frames
- * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * to the RQ ID passed in the @phba. This can be overridden by the caller for
  * more complicated setups.
  **/
 void
index f7cfac0da9b6e0779fc93fc364b7eef5b538816a..7517dd55fe91953375e91054bb72e2cb5f8ab620 100644 (file)
@@ -1469,7 +1469,7 @@ lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
        if (!infop)
                return;
 
-       /* Cycle the the entire CPU context list for every MRQ */
+       /* Cycle the entire CPU context list for every MRQ */
        for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
                for_each_present_cpu(j) {
                        infop = lpfc_get_ctx_list(phba, j, i);
index edbd81c3b64326563cc84b906778a2318e11ee32..c5b69f313af365694f77f83fb569e427f084acd4 100644 (file)
@@ -20804,7 +20804,7 @@ lpfc_log_fw_write_cmpl(struct lpfc_hba *phba, u32 shdr_status,
  * the offset after the write object mailbox has completed. @size is used to
  * determine the end of the object and whether the eof bit should be set.
  *
- * Return 0 is successful and offset will contain the the new offset to use
+ * Return 0 is successful and offset will contain the new offset to use
  * for the next write.
  * Return negative value for error cases.
  **/
index def4c5e15cd89b65fb0ec59479f00f962c7ce24d..23de2603e71fdd3736ceef7ca1d5fb1b91d35240 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/utsname.h>
-#include <linux/version.h>
 #include <linux/workqueue.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi.h>
@@ -955,19 +954,16 @@ struct scmd_priv {
  * @chain_buf_count: Chain buffer count
  * @chain_buf_pool: Chain buffer pool
  * @chain_sgl_list: Chain SGL list
- * @chain_bitmap_sz: Chain buffer allocator bitmap size
  * @chain_bitmap: Chain buffer allocator bitmap
  * @chain_buf_lock: Chain buffer list lock
  * @bsg_cmds: Command tracker for BSG command
  * @host_tm_cmds: Command tracker for task management commands
  * @dev_rmhs_cmds: Command tracker for device removal commands
  * @evtack_cmds: Command tracker for event ack commands
- * @devrem_bitmap_sz: Device removal bitmap size
  * @devrem_bitmap: Device removal bitmap
- * @dev_handle_bitmap_sz: Device handle bitmap size
+ * @dev_handle_bitmap_bits: Number of bits in device handle bitmap
  * @removepend_bitmap: Remove pending bitmap
  * @delayed_rmhs_list: Delayed device removal list
- * @evtack_cmds_bitmap_sz: Event Ack bitmap size
  * @evtack_cmds_bitmap: Event Ack bitmap
  * @delayed_evtack_cmds_list: Delayed event acknowledgment list
  * @ts_update_counter: Timestamp update counter
@@ -1128,7 +1124,6 @@ struct mpi3mr_ioc {
        u32 chain_buf_count;
        struct dma_pool *chain_buf_pool;
        struct chain_element *chain_sgl_list;
-       u16  chain_bitmap_sz;
        void *chain_bitmap;
        spinlock_t chain_buf_lock;
 
@@ -1136,12 +1131,10 @@ struct mpi3mr_ioc {
        struct mpi3mr_drv_cmd host_tm_cmds;
        struct mpi3mr_drv_cmd dev_rmhs_cmds[MPI3MR_NUM_DEVRMCMD];
        struct mpi3mr_drv_cmd evtack_cmds[MPI3MR_NUM_EVTACKCMD];
-       u16 devrem_bitmap_sz;
        void *devrem_bitmap;
-       u16 dev_handle_bitmap_sz;
+       u16 dev_handle_bitmap_bits;
        void *removepend_bitmap;
        struct list_head delayed_rmhs_list;
-       u16 evtack_cmds_bitmap_sz;
        void *evtack_cmds_bitmap;
        struct list_head delayed_evtack_cmds_list;
 
index 9baac224b2135d399f0550598dc41867dbbbdf73..bff63770239797ce2e9a5e6be4c3d82281d42c14 100644 (file)
@@ -293,7 +293,6 @@ out:
 static long mpi3mr_get_all_tgt_info(struct mpi3mr_ioc *mrioc,
        struct bsg_job *job)
 {
-       long rval = -EINVAL;
        u16 num_devices = 0, i = 0, size;
        unsigned long flags;
        struct mpi3mr_tgt_dev *tgtdev;
@@ -304,7 +303,7 @@ static long mpi3mr_get_all_tgt_info(struct mpi3mr_ioc *mrioc,
        if (job->request_payload.payload_len < sizeof(u32)) {
                dprint_bsg_err(mrioc, "%s: invalid size argument\n",
                    __func__);
-               return rval;
+               return -EINVAL;
        }
 
        spin_lock_irqsave(&mrioc->tgtdev_lock, flags);
@@ -312,7 +311,7 @@ static long mpi3mr_get_all_tgt_info(struct mpi3mr_ioc *mrioc,
                num_devices++;
        spin_unlock_irqrestore(&mrioc->tgtdev_lock, flags);
 
-       if ((job->request_payload.payload_len == sizeof(u32)) ||
+       if ((job->request_payload.payload_len <= sizeof(u64)) ||
                list_empty(&mrioc->tgtdev_list)) {
                sg_copy_from_buffer(job->request_payload.sg_list,
                                    job->request_payload.sg_cnt,
@@ -320,14 +319,14 @@ static long mpi3mr_get_all_tgt_info(struct mpi3mr_ioc *mrioc,
                return 0;
        }
 
-       kern_entrylen = (num_devices - 1) * sizeof(*devmap_info);
-       size = sizeof(*alltgt_info) + kern_entrylen;
+       kern_entrylen = num_devices * sizeof(*devmap_info);
+       size = sizeof(u64) + kern_entrylen;
        alltgt_info = kzalloc(size, GFP_KERNEL);
        if (!alltgt_info)
                return -ENOMEM;
 
        devmap_info = alltgt_info->dmi;
-       memset((u8 *)devmap_info, 0xFF, (kern_entrylen + sizeof(*devmap_info)));
+       memset((u8 *)devmap_info, 0xFF, kern_entrylen);
        spin_lock_irqsave(&mrioc->tgtdev_lock, flags);
        list_for_each_entry(tgtdev, &mrioc->tgtdev_list, list) {
                if (i < num_devices) {
@@ -344,25 +343,18 @@ static long mpi3mr_get_all_tgt_info(struct mpi3mr_ioc *mrioc,
        num_devices = i;
        spin_unlock_irqrestore(&mrioc->tgtdev_lock, flags);
 
-       memcpy(&alltgt_info->num_devices, &num_devices, sizeof(num_devices));
+       alltgt_info->num_devices = num_devices;
 
-       usr_entrylen = (job->request_payload.payload_len - sizeof(u32)) / sizeof(*devmap_info);
+       usr_entrylen = (job->request_payload.payload_len - sizeof(u64)) /
+               sizeof(*devmap_info);
        usr_entrylen *= sizeof(*devmap_info);
        min_entrylen = min(usr_entrylen, kern_entrylen);
-       if (min_entrylen && (!memcpy(&alltgt_info->dmi, devmap_info, min_entrylen))) {
-               dprint_bsg_err(mrioc, "%s:%d: device map info copy failed\n",
-                   __func__, __LINE__);
-               rval = -EFAULT;
-               goto out;
-       }
 
        sg_copy_from_buffer(job->request_payload.sg_list,
                            job->request_payload.sg_cnt,
-                           alltgt_info, job->request_payload.payload_len);
-       rval = 0;
-out:
+                           alltgt_info, (min_entrylen + sizeof(u64)));
        kfree(alltgt_info);
-       return rval;
+       return 0;
 }
 /**
  * mpi3mr_get_change_count - Get topology change count
index 286a44506578b40665e1a45e0e0c88da9267b3b3..758f7ca9e0ee803bad899d35fef7bbd04b222f69 100644 (file)
@@ -1128,7 +1128,6 @@ static int mpi3mr_issue_and_process_mur(struct mpi3mr_ioc *mrioc,
 static int
 mpi3mr_revalidate_factsdata(struct mpi3mr_ioc *mrioc)
 {
-       u16 dev_handle_bitmap_sz;
        void *removepend_bitmap;
 
        if (mrioc->facts.reply_sz > mrioc->reply_sz) {
@@ -1160,25 +1159,23 @@ mpi3mr_revalidate_factsdata(struct mpi3mr_ioc *mrioc)
                    "\tcontroller while sas transport support is enabled at the\n"
                    "\tdriver, please reboot the system or reload the driver\n");
 
-       dev_handle_bitmap_sz = mrioc->facts.max_devhandle / 8;
-       if (mrioc->facts.max_devhandle % 8)
-               dev_handle_bitmap_sz++;
-       if (dev_handle_bitmap_sz > mrioc->dev_handle_bitmap_sz) {
-               removepend_bitmap = krealloc(mrioc->removepend_bitmap,
-                   dev_handle_bitmap_sz, GFP_KERNEL);
+       if (mrioc->facts.max_devhandle > mrioc->dev_handle_bitmap_bits) {
+               removepend_bitmap = bitmap_zalloc(mrioc->facts.max_devhandle,
+                                                 GFP_KERNEL);
                if (!removepend_bitmap) {
                        ioc_err(mrioc,
-                           "failed to increase removepend_bitmap sz from: %d to %d\n",
-                           mrioc->dev_handle_bitmap_sz, dev_handle_bitmap_sz);
+                               "failed to increase removepend_bitmap bits from %d to %d\n",
+                               mrioc->dev_handle_bitmap_bits,
+                               mrioc->facts.max_devhandle);
                        return -EPERM;
                }
-               memset(removepend_bitmap + mrioc->dev_handle_bitmap_sz, 0,
-                   dev_handle_bitmap_sz - mrioc->dev_handle_bitmap_sz);
+               bitmap_free(mrioc->removepend_bitmap);
                mrioc->removepend_bitmap = removepend_bitmap;
                ioc_info(mrioc,
-                   "increased dev_handle_bitmap_sz from %d to %d\n",
-                   mrioc->dev_handle_bitmap_sz, dev_handle_bitmap_sz);
-               mrioc->dev_handle_bitmap_sz = dev_handle_bitmap_sz;
+                        "increased bits of dev_handle_bitmap from %d to %d\n",
+                        mrioc->dev_handle_bitmap_bits,
+                        mrioc->facts.max_devhandle);
+               mrioc->dev_handle_bitmap_bits = mrioc->facts.max_devhandle;
        }
 
        return 0;
@@ -2957,27 +2954,18 @@ static int mpi3mr_alloc_reply_sense_bufs(struct mpi3mr_ioc *mrioc)
        if (!mrioc->pel_abort_cmd.reply)
                goto out_failed;
 
-       mrioc->dev_handle_bitmap_sz = mrioc->facts.max_devhandle / 8;
-       if (mrioc->facts.max_devhandle % 8)
-               mrioc->dev_handle_bitmap_sz++;
-       mrioc->removepend_bitmap = kzalloc(mrioc->dev_handle_bitmap_sz,
-           GFP_KERNEL);
+       mrioc->dev_handle_bitmap_bits = mrioc->facts.max_devhandle;
+       mrioc->removepend_bitmap = bitmap_zalloc(mrioc->dev_handle_bitmap_bits,
+                                                GFP_KERNEL);
        if (!mrioc->removepend_bitmap)
                goto out_failed;
 
-       mrioc->devrem_bitmap_sz = MPI3MR_NUM_DEVRMCMD / 8;
-       if (MPI3MR_NUM_DEVRMCMD % 8)
-               mrioc->devrem_bitmap_sz++;
-       mrioc->devrem_bitmap = kzalloc(mrioc->devrem_bitmap_sz,
-           GFP_KERNEL);
+       mrioc->devrem_bitmap = bitmap_zalloc(MPI3MR_NUM_DEVRMCMD, GFP_KERNEL);
        if (!mrioc->devrem_bitmap)
                goto out_failed;
 
-       mrioc->evtack_cmds_bitmap_sz = MPI3MR_NUM_EVTACKCMD / 8;
-       if (MPI3MR_NUM_EVTACKCMD % 8)
-               mrioc->evtack_cmds_bitmap_sz++;
-       mrioc->evtack_cmds_bitmap = kzalloc(mrioc->evtack_cmds_bitmap_sz,
-           GFP_KERNEL);
+       mrioc->evtack_cmds_bitmap = bitmap_zalloc(MPI3MR_NUM_EVTACKCMD,
+                                                 GFP_KERNEL);
        if (!mrioc->evtack_cmds_bitmap)
                goto out_failed;
 
@@ -3415,10 +3403,7 @@ static int mpi3mr_alloc_chain_bufs(struct mpi3mr_ioc *mrioc)
                if (!mrioc->chain_sgl_list[i].addr)
                        goto out_failed;
        }
-       mrioc->chain_bitmap_sz = num_chains / 8;
-       if (num_chains % 8)
-               mrioc->chain_bitmap_sz++;
-       mrioc->chain_bitmap = kzalloc(mrioc->chain_bitmap_sz, GFP_KERNEL);
+       mrioc->chain_bitmap = bitmap_zalloc(num_chains, GFP_KERNEL);
        if (!mrioc->chain_bitmap)
                goto out_failed;
        return retval;
@@ -4189,10 +4174,11 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc)
                for (i = 0; i < MPI3MR_NUM_EVTACKCMD; i++)
                        memset(mrioc->evtack_cmds[i].reply, 0,
                            sizeof(*mrioc->evtack_cmds[i].reply));
-               memset(mrioc->removepend_bitmap, 0, mrioc->dev_handle_bitmap_sz);
-               memset(mrioc->devrem_bitmap, 0, mrioc->devrem_bitmap_sz);
-               memset(mrioc->evtack_cmds_bitmap, 0,
-                   mrioc->evtack_cmds_bitmap_sz);
+               bitmap_clear(mrioc->removepend_bitmap, 0,
+                            mrioc->dev_handle_bitmap_bits);
+               bitmap_clear(mrioc->devrem_bitmap, 0, MPI3MR_NUM_DEVRMCMD);
+               bitmap_clear(mrioc->evtack_cmds_bitmap, 0,
+                            MPI3MR_NUM_EVTACKCMD);
        }
 
        for (i = 0; i < mrioc->num_queues; i++) {
@@ -4318,16 +4304,16 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc)
                mrioc->evtack_cmds[i].reply = NULL;
        }
 
-       kfree(mrioc->removepend_bitmap);
+       bitmap_free(mrioc->removepend_bitmap);
        mrioc->removepend_bitmap = NULL;
 
-       kfree(mrioc->devrem_bitmap);
+       bitmap_free(mrioc->devrem_bitmap);
        mrioc->devrem_bitmap = NULL;
 
-       kfree(mrioc->evtack_cmds_bitmap);
+       bitmap_free(mrioc->evtack_cmds_bitmap);
        mrioc->evtack_cmds_bitmap = NULL;
 
-       kfree(mrioc->chain_bitmap);
+       bitmap_free(mrioc->chain_bitmap);
        mrioc->chain_bitmap = NULL;
 
        kfree(mrioc->transport_cmds.reply);
@@ -4886,9 +4872,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
 
        mpi3mr_flush_delayed_cmd_lists(mrioc);
        mpi3mr_flush_drv_cmds(mrioc);
-       memset(mrioc->devrem_bitmap, 0, mrioc->devrem_bitmap_sz);
-       memset(mrioc->removepend_bitmap, 0, mrioc->dev_handle_bitmap_sz);
-       memset(mrioc->evtack_cmds_bitmap, 0, mrioc->evtack_cmds_bitmap_sz);
+       bitmap_clear(mrioc->devrem_bitmap, 0, MPI3MR_NUM_DEVRMCMD);
+       bitmap_clear(mrioc->removepend_bitmap, 0,
+                    mrioc->dev_handle_bitmap_bits);
+       bitmap_clear(mrioc->evtack_cmds_bitmap, 0, MPI3MR_NUM_EVTACKCMD);
        mpi3mr_flush_host_io(mrioc);
        mpi3mr_cleanup_fwevt_list(mrioc);
        mpi3mr_invalidate_devhandles(mrioc);
index 3306de7170f644b49436ce8d576a5286156e483e..6eaeba41072cb53e4ff2354fa4241815b2c7a24c 100644 (file)
@@ -4952,6 +4952,10 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                mpi3mr_init_drv_cmd(&mrioc->dev_rmhs_cmds[i],
                    MPI3MR_HOSTTAG_DEVRMCMD_MIN + i);
 
+       for (i = 0; i < MPI3MR_NUM_EVTACKCMD; i++)
+               mpi3mr_init_drv_cmd(&mrioc->evtack_cmds[i],
+                                   MPI3MR_HOSTTAG_EVTACKCMD_MIN + i);
+
        if (pdev->revision)
                mrioc->enable_segqueue = true;
 
index 3fc897336b5e097ac1f80eeb37546b381eb02874..3b61815979dabe5c9a1113a410f34e3333905229 100644 (file)
@@ -1280,7 +1280,7 @@ void mpi3mr_sas_host_add(struct mpi3mr_ioc *mrioc)
 
        if (mrioc->sas_hba.enclosure_handle) {
                if (!(mpi3mr_cfg_get_enclosure_pg0(mrioc, &ioc_status,
-                   &encl_pg0, sizeof(dev_pg0),
+                   &encl_pg0, sizeof(encl_pg0),
                    MPI3_ENCLOS_PGAD_FORM_HANDLE,
                    mrioc->sas_hba.enclosure_handle)) &&
                    (ioc_status == MPI3_IOCSTATUS_SUCCESS))
index 37d084086fd4341079b63d31286a73e298624f86..fdda12ef13b0fd1f69103a28b8a9e8641281e151 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/compiler.h>
 #include <linux/string.h>
-#include <linux/version.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <scsi/scsi_transport.h>
index 46e8b38603f0495f1919d51cff583eeef1772e0a..030625ebb4e653a0da4df20f62751f4926332354 100644 (file)
@@ -45,7 +45,7 @@ qla27xx_process_purex_fpin(struct scsi_qla_host *vha, struct purex_item *item)
        ql_dump_buffer(ql_dbg_init + ql_dbg_verbose, vha, 0x508f,
                       pkt, pkt_size);
 
-       fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt);
+       fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
 }
 
 const char *const port_state_str[] = {
index abe93ec8b7d08c7fc4e0bc16d60ed746a737d1f0..b7c569a42aa477551597ff0629fc6c04888277fb 100644 (file)
@@ -229,6 +229,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
        scmd->cmd_len = COMMAND_SIZE(cmd[0]);
        memcpy(scmd->cmnd, cmd, scmd->cmd_len);
        scmd->allowed = retries;
+       scmd->flags |= args->scmd_flags;
        req->timeout = timeout;
        req->rq_flags |= RQF_QUIET;
 
index 0965f8a7134f08079dab58fccd4b1f35bcba128a..f12e9467ebb430a1d9cfbcabccc708dded0cb128 100644 (file)
@@ -137,6 +137,7 @@ static const struct {
        { FCH_EVT_PORT_FABRIC,          "port_fabric" },
        { FCH_EVT_LINK_UNKNOWN,         "link_unknown" },
        { FCH_EVT_LINK_FPIN,            "link_FPIN" },
+       { FCH_EVT_LINK_FPIN_ACK,        "link_FPIN_ACK" },
        { FCH_EVT_VENDOR_UNIQUE,        "vendor_unique" },
 };
 fc_enum_name_search(host_event_code, fc_host_event_code,
@@ -894,17 +895,20 @@ fc_fpin_congn_stats_update(struct Scsi_Host *shost,
  * @shost:             host the FPIN was received on
  * @fpin_len:          length of FPIN payload, in bytes
  * @fpin_buf:          pointer to FPIN payload
- *
+ * @event_acknowledge: 1, if LLDD handles this event.
  * Notes:
  *     This routine assumes no locks are held on entry.
  */
 void
-fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf)
+fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf,
+               u8 event_acknowledge)
 {
        struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
        struct fc_tlv_desc *tlv;
        u32 desc_cnt = 0, bytes_remain;
        u32 dtag;
+       enum fc_host_event_code event_code =
+               event_acknowledge ? FCH_EVT_LINK_FPIN_ACK : FCH_EVT_LINK_FPIN;
 
        /* Update Statistics */
        tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
@@ -934,7 +938,7 @@ fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf)
        }
 
        fc_host_post_fc_event(shost, fc_get_event_number(),
-                               FCH_EVT_LINK_FPIN, fpin_len, fpin_buf, 0);
+                               event_code, fpin_len, fpin_buf, 0);
 }
 EXPORT_SYMBOL(fc_host_fpin_rcv);
 
index a38c71511bc94b7a8637eb4a2d81471edc3b2ce3..4f28dd617ecadaa5636adde4b892834df7cfb1a1 100644 (file)
@@ -121,7 +121,6 @@ static void scsi_disk_release(struct device *cdev);
 
 static DEFINE_IDA(sd_index_ida);
 
-static struct kmem_cache *sd_cdb_cache;
 static mempool_t *sd_page_pool;
 static struct lock_class_key sd_bio_compl_lkclass;
 
@@ -2252,23 +2251,20 @@ static void sd_config_protection(struct scsi_disk *sdkp)
 {
        struct scsi_device *sdp = sdkp->device;
 
-       if (!sdkp->first_scan)
-               return;
-
        sd_dif_config_host(sdkp);
 
        if (!sdkp->protection_type)
                return;
 
        if (!scsi_host_dif_capable(sdp->host, sdkp->protection_type)) {
-               sd_printk(KERN_NOTICE, sdkp,
-                         "Disabling DIF Type %u protection\n",
-                         sdkp->protection_type);
+               sd_first_printk(KERN_NOTICE, sdkp,
+                               "Disabling DIF Type %u protection\n",
+                               sdkp->protection_type);
                sdkp->protection_type = 0;
        }
 
-       sd_printk(KERN_NOTICE, sdkp, "Enabling DIF Type %u protection\n",
-                 sdkp->protection_type);
+       sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIF Type %u protection\n",
+                       sdkp->protection_type);
 }
 
 static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
@@ -3851,19 +3847,11 @@ static int __init init_sd(void)
        if (err)
                goto err_out;
 
-       sd_cdb_cache = kmem_cache_create("sd_ext_cdb", SD_EXT_CDB_SIZE,
-                                        0, 0, NULL);
-       if (!sd_cdb_cache) {
-               printk(KERN_ERR "sd: can't init extended cdb cache\n");
-               err = -ENOMEM;
-               goto err_out_class;
-       }
-
        sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
        if (!sd_page_pool) {
                printk(KERN_ERR "sd: can't init discard page pool\n");
                err = -ENOMEM;
-               goto err_out_cache;
+               goto err_out_class;
        }
 
        err = scsi_register_driver(&sd_template.gendrv);
@@ -3874,10 +3862,6 @@ static int __init init_sd(void)
 
 err_out_driver:
        mempool_destroy(sd_page_pool);
-
-err_out_cache:
-       kmem_cache_destroy(sd_cdb_cache);
-
 err_out_class:
        class_unregister(&sd_disk_class);
 err_out:
@@ -3899,7 +3883,6 @@ static void __exit exit_sd(void)
 
        scsi_unregister_driver(&sd_template.gendrv);
        mempool_destroy(sd_page_pool);
-       kmem_cache_destroy(sd_cdb_cache);
 
        class_unregister(&sd_disk_class);
 
index 968993ee6d5d30de67ef106ec59df83b7fa2f0ae..1df847b5f7476417f0290faaf92dd497a606d441 100644 (file)
@@ -39,8 +39,10 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
                dif = 0; dix = 1;
        }
 
-       if (!dix)
+       if (!dix) {
+               blk_integrity_unregister(disk);
                return;
+       }
 
        memset(&bi, 0, sizeof(bi));
 
@@ -72,9 +74,9 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
                        bi.tag_size = sizeof(u16);
        }
 
-       sd_printk(KERN_NOTICE, sdkp,
-                 "Enabling DIX %s, application tag size %u bytes\n",
-                 bi.profile->name, bi.tag_size);
+       sd_first_printk(KERN_NOTICE, sdkp,
+                       "Enabling DIX %s, application tag size %u bytes\n",
+                       bi.profile->name, bi.tag_size);
 out:
        blk_integrity_register(disk, &bi);
 }
index 869ca9c7f23f731377e38beb21e930fe2cac92ea..b11a9162e73aaeed994667a3ba04f2fa59cae240 100644 (file)
@@ -439,8 +439,8 @@ int ses_match_host(struct enclosure_device *edev, void *data)
 }
 #endif  /*  0  */
 
-static void ses_process_descriptor(struct enclosure_component *ecomp,
-                                  unsigned char *desc)
+static int ses_process_descriptor(struct enclosure_component *ecomp,
+                                  unsigned char *desc, int max_desc_len)
 {
        int eip = desc[0] & 0x10;
        int invalid = desc[0] & 0x80;
@@ -451,22 +451,32 @@ static void ses_process_descriptor(struct enclosure_component *ecomp,
        unsigned char *d;
 
        if (invalid)
-               return;
+               return 0;
 
        switch (proto) {
        case SCSI_PROTOCOL_FCP:
                if (eip) {
+                       if (max_desc_len <= 7)
+                               return 1;
                        d = desc + 4;
                        slot = d[3];
                }
                break;
        case SCSI_PROTOCOL_SAS:
+
                if (eip) {
+                       if (max_desc_len <= 27)
+                               return 1;
                        d = desc + 4;
                        slot = d[3];
                        d = desc + 8;
-               } else
+               } else {
+                       if (max_desc_len <= 23)
+                               return 1;
                        d = desc + 4;
+               }
+
+
                /* only take the phy0 addr */
                addr = (u64)d[12] << 56 |
                        (u64)d[13] << 48 |
@@ -483,6 +493,8 @@ static void ses_process_descriptor(struct enclosure_component *ecomp,
        }
        ecomp->slot = slot;
        scomp->addr = addr;
+
+       return 0;
 }
 
 struct efd {
@@ -555,7 +567,7 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
                /* skip past overall descriptor */
                desc_ptr += len + 4;
        }
-       if (ses_dev->page10)
+       if (ses_dev->page10 && ses_dev->page10_len > 9)
                addl_desc_ptr = ses_dev->page10 + 8;
        type_ptr = ses_dev->page1_types;
        components = 0;
@@ -563,17 +575,22 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
                for (j = 0; j < type_ptr[1]; j++) {
                        char *name = NULL;
                        struct enclosure_component *ecomp;
+                       int max_desc_len;
 
                        if (desc_ptr) {
-                               if (desc_ptr >= buf + page7_len) {
+                               if (desc_ptr + 3 >= buf + page7_len) {
                                        desc_ptr = NULL;
                                } else {
                                        len = (desc_ptr[2] << 8) + desc_ptr[3];
                                        desc_ptr += 4;
-                                       /* Add trailing zero - pushes into
-                                        * reserved space */
-                                       desc_ptr[len] = '\0';
-                                       name = desc_ptr;
+                                       if (desc_ptr + len > buf + page7_len)
+                                               desc_ptr = NULL;
+                                       else {
+                                               /* Add trailing zero - pushes into
+                                                * reserved space */
+                                               desc_ptr[len] = '\0';
+                                               name = desc_ptr;
+                                       }
                                }
                        }
                        if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
@@ -589,10 +606,14 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
                                        ecomp = &edev->component[components++];
 
                                if (!IS_ERR(ecomp)) {
-                                       if (addl_desc_ptr)
-                                               ses_process_descriptor(
-                                                       ecomp,
-                                                       addl_desc_ptr);
+                                       if (addl_desc_ptr) {
+                                               max_desc_len = ses_dev->page10_len -
+                                                   (addl_desc_ptr - ses_dev->page10);
+                                               if (ses_process_descriptor(ecomp,
+                                                   addl_desc_ptr,
+                                                   max_desc_len))
+                                                       addl_desc_ptr = NULL;
+                                       }
                                        if (create)
                                                enclosure_component_register(
                                                        ecomp);
@@ -609,9 +630,11 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
                             /* these elements are optional */
                             type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
                             type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
-                            type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
+                            type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) {
                                addl_desc_ptr += addl_desc_ptr[1] + 2;
-
+                               if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len)
+                                       addl_desc_ptr = NULL;
+                       }
                }
        }
        kfree(buf);
@@ -710,6 +733,12 @@ static int ses_intf_add(struct device *cdev,
                    type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
                        components += type_ptr[1];
        }
+
+       if (components == 0) {
+               sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
+               goto err_free;
+       }
+
        ses_dev->page1 = buf;
        ses_dev->page1_len = len;
        buf = NULL;
@@ -833,7 +862,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
        kfree(ses_dev->page2);
        kfree(ses_dev);
 
-       kfree(edev->component[0].scratch);
+       if (edev->components)
+               kfree(edev->component[0].scratch);
 
        put_device(&edev->edev);
        enclosure_unregister(edev);
index d996782a710642cd6da617457e0ed7ac682d642f..7a73f5e4a1fc70cc1288a18bd6a3dcbb5d5e0653 100644 (file)
@@ -295,7 +295,7 @@ int clk_enable(struct clk *clk)
        int ret;
 
        if (!clk)
-               return -EINVAL;
+               return 0;
 
        spin_lock_irqsave(&clock_lock, flags);
        ret = __clk_enable(clk);
index 2954c06a7f57f64a71ccd19db83c77ef3747de35..64b6a460d739b778fad3703fccfe3fb4e688103e 100644 (file)
@@ -786,7 +786,7 @@ failrd:
        writel(0, reg_base + CQSPI_REG_IRQMASK);
 
        /* Cancel the indirect read */
-       writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
+       writel(CQSPI_REG_INDIRECTRD_CANCEL_MASK,
               reg_base + CQSPI_REG_INDIRECTRD);
        return ret;
 }
index 348c6e1edd38a67b683e4ceb873c79a4c16e779a..333b22dfd8dba879403f9d8fe0c8fdcb23017567 100644 (file)
@@ -611,7 +611,7 @@ static int f_ospi_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        ctlr->mode_bits = SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL
-               | SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_OCTAL
+               | SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL
                | SPI_MODE_0 | SPI_MODE_1 | SPI_LSB_FIRST;
        ctlr->mem_ops = &f_ospi_mem_ops;
        ctlr->bus_num = -1;
index 9f356612ba7e54f46fb5868e6675904a6ad20c6c..0b9bc3b7f53a720c5f6984d6c27642d22e6e936d 100644 (file)
@@ -1156,6 +1156,10 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
                                ret = -EIO;
                                goto exit;
                        }
+                       if (!xfer->cs_change) {
+                               tegra_qspi_transfer_end(spi);
+                               spi_transfer_delay_exec(xfer);
+                       }
                        break;
                default:
                        ret = -EINVAL;
@@ -1164,14 +1168,14 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
                msg->actual_length += xfer->len;
                transfer_phase++;
        }
-       if (!xfer->cs_change) {
-               tegra_qspi_transfer_end(spi);
-               spi_transfer_delay_exec(xfer);
-       }
        ret = 0;
 
 exit:
        msg->status = ret;
+       if (ret < 0) {
+               tegra_qspi_transfer_end(spi);
+               spi_transfer_delay_exec(xfer);
+       }
 
        return ret;
 }
@@ -1297,7 +1301,7 @@ static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
        if (xfer->len > 4 || xfer->len < 3)
                return false;
        xfer = list_next_entry(xfer, transfer_list);
-       if (!tqspi->soc_data->has_dma || xfer->len > (QSPI_FIFO_DEPTH << 2))
+       if (!tqspi->soc_data->has_dma && xfer->len > (QSPI_FIFO_DEPTH << 2))
                return false;
 
        return true;
@@ -1532,6 +1536,7 @@ static int tegra_qspi_probe(struct platform_device *pdev)
        master->mode_bits = SPI_MODE_0 | SPI_MODE_3 | SPI_CS_HIGH |
                            SPI_TX_DUAL | SPI_RX_DUAL | SPI_TX_QUAD | SPI_RX_QUAD;
        master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
+       master->flags = SPI_CONTROLLER_HALF_DUPLEX;
        master->setup = tegra_qspi_setup;
        master->transfer_one_message = tegra_qspi_transfer_one_message;
        master->num_chipselect = 1;
index b5808f92702df3dfc84cfd4021cb4c919eba1075..cb7e7697cf1e3774110fa7df1c18c4051c36fe12 100644 (file)
@@ -73,7 +73,8 @@ endmenu
 
 config INTEL_BXT_PMIC_THERMAL
        tristate "Intel Broxton PMIC thermal driver"
-       depends on X86 && INTEL_SOC_PMIC_BXTWC && REGMAP
+       depends on X86 && INTEL_SOC_PMIC_BXTWC
+       select REGMAP
        help
          Select this driver for Intel Broxton PMIC with ADC channels monitoring
          system temperature measurements and alerts.
index 97b843fa756805ea5dbb39aec2ad526d2a6f337e..ffdc95047838f37acc0ddc9972db7b229f129095 100644 (file)
@@ -400,22 +400,14 @@ MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids);
 
 static int __init intel_quark_thermal_init(void)
 {
-       int err = 0;
-
        if (!x86_match_cpu(qrk_thermal_ids) || !iosf_mbi_available())
                return -ENODEV;
 
        soc_dts = alloc_soc_dts();
-       if (IS_ERR(soc_dts)) {
-               err = PTR_ERR(soc_dts);
-               goto err_free;
-       }
+       if (IS_ERR(soc_dts))
+               return PTR_ERR(soc_dts);
 
        return 0;
-
-err_free:
-       free_soc_dts(soc_dts);
-       return err;
 }
 
 static void __exit intel_quark_thermal_exit(void)
index f566eb1839dc501edb1942f2ea41ab0a9ed75afd..1dc07f9214d575e0ce583f7ddfbef68a06b28351 100644 (file)
@@ -403,10 +403,11 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                unsigned int this_round, skip = 0;
                int size;
 
-               ret = -ENXIO;
                vc = vcs_vc(inode, &viewed);
-               if (!vc)
-                       goto unlock_out;
+               if (!vc) {
+                       ret = -ENXIO;
+                       break;
+               }
 
                /* Check whether we are above size each round,
                 * as copy_to_user at the end of this loop
@@ -414,10 +415,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                 */
                size = vcs_size(vc, attr, uni_mode);
                if (size < 0) {
-                       if (read)
-                               break;
                        ret = size;
-                       goto unlock_out;
+                       break;
                }
                if (pos >= size)
                        break;
index 276a82b2e5ee3e691bbf0c327ab5f49387824ae6..172d25fef740d354a73f156acdbc37a1febb9b00 100644 (file)
@@ -1409,6 +1409,13 @@ static int ufshcd_devfreq_target(struct device *dev,
        struct ufs_clk_info *clki;
        unsigned long irq_flags;
 
+       /*
+        * Skip devfreq if UFS initialization is not finished.
+        * Otherwise ufs could be in a inconsistent state.
+        */
+       if (!smp_load_acquire(&hba->logical_unit_scan_finished))
+               return 0;
+
        if (!ufshcd_is_clkscaling_supported(hba))
                return -EINVAL;
 
@@ -8392,22 +8399,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
        if (ret)
                goto out;
 
-       /* Initialize devfreq after UFS device is detected */
-       if (ufshcd_is_clkscaling_supported(hba)) {
-               memcpy(&hba->clk_scaling.saved_pwr_info.info,
-                       &hba->pwr_info,
-                       sizeof(struct ufs_pa_layer_attr));
-               hba->clk_scaling.saved_pwr_info.is_valid = true;
-               hba->clk_scaling.is_allowed = true;
-
-               ret = ufshcd_devfreq_init(hba);
-               if (ret)
-                       goto out;
-
-               hba->clk_scaling.is_enabled = true;
-               ufshcd_init_clk_scaling_sysfs(hba);
-       }
-
        ufs_bsg_probe(hba);
        ufshpb_init(hba);
        scsi_scan_host(hba->host);
@@ -8538,7 +8529,9 @@ static int ufshcd_device_init(struct ufs_hba *hba, bool init_dev_params)
                        return ret;
                if (is_mcq_supported(hba) && !hba->scsi_host_added) {
                        ret = ufshcd_alloc_mcq(hba);
-                       if (ret) {
+                       if (!ret) {
+                               ufshcd_config_mcq(hba);
+                       } else {
                                /* Continue with SDB mode */
                                use_mcq_mode = false;
                                dev_err(hba->dev, "MCQ mode is disabled, err=%d\n",
@@ -8550,10 +8543,10 @@ static int ufshcd_device_init(struct ufs_hba *hba, bool init_dev_params)
                                return ret;
                        }
                        hba->scsi_host_added = true;
-               }
-               /* MCQ may be disabled if ufshcd_alloc_mcq() fails */
-               if (is_mcq_supported(hba) && use_mcq_mode)
+               } else if (is_mcq_supported(hba)) {
+                       /* UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH is set */
                        ufshcd_config_mcq(hba);
+               }
        }
 
        ufshcd_tune_unipro_params(hba);
@@ -8677,6 +8670,12 @@ out:
        if (ret) {
                pm_runtime_put_sync(hba->dev);
                ufshcd_hba_exit(hba);
+       } else {
+               /*
+                * Make sure that when reader code sees UFS initialization has finished,
+                * all initialization steps have really been executed.
+                */
+               smp_store_release(&hba->logical_unit_scan_finished, true);
        }
 }
 
@@ -9143,34 +9142,15 @@ static int ufshcd_execute_start_stop(struct scsi_device *sdev,
                                     enum ufs_dev_pwr_mode pwr_mode,
                                     struct scsi_sense_hdr *sshdr)
 {
-       unsigned char cdb[6] = { START_STOP, 0, 0, 0, pwr_mode << 4, 0 };
-       struct request *req;
-       struct scsi_cmnd *scmd;
-       int ret;
-
-       req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
-                                BLK_MQ_REQ_PM);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       scmd = blk_mq_rq_to_pdu(req);
-       scmd->cmd_len = COMMAND_SIZE(cdb[0]);
-       memcpy(scmd->cmnd, cdb, scmd->cmd_len);
-       scmd->allowed = 0/*retries*/;
-       scmd->flags |= SCMD_FAIL_IF_RECOVERING;
-       req->timeout = 1 * HZ;
-       req->rq_flags |= RQF_PM | RQF_QUIET;
-
-       blk_execute_rq(req, /*at_head=*/true);
-
-       if (sshdr)
-               scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len,
-                                    sshdr);
-       ret = scmd->result;
-
-       blk_mq_free_request(req);
+       const unsigned char cdb[6] = { START_STOP, 0, 0, 0, pwr_mode << 4, 0 };
+       const struct scsi_exec_args args = {
+               .sshdr = sshdr,
+               .req_flags = BLK_MQ_REQ_PM,
+               .scmd_flags = SCMD_FAIL_IF_RECOVERING,
+       };
 
-       return ret;
+       return scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, /*buffer=*/NULL,
+                       /*bufflen=*/0, /*timeout=*/HZ, /*retries=*/0, &args);
 }
 
 /**
@@ -10336,12 +10316,30 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
         */
        ufshcd_set_ufs_dev_active(hba);
 
+       /* Initialize devfreq */
+       if (ufshcd_is_clkscaling_supported(hba)) {
+               memcpy(&hba->clk_scaling.saved_pwr_info.info,
+                       &hba->pwr_info,
+                       sizeof(struct ufs_pa_layer_attr));
+               hba->clk_scaling.saved_pwr_info.is_valid = true;
+               hba->clk_scaling.is_allowed = true;
+
+               err = ufshcd_devfreq_init(hba);
+               if (err)
+                       goto rpm_put_sync;
+
+               hba->clk_scaling.is_enabled = true;
+               ufshcd_init_clk_scaling_sysfs(hba);
+       }
+
        async_schedule(ufshcd_async_scan, hba);
        ufs_sysfs_add_nodes(hba->dev);
 
        device_enable_async_suspend(dev);
        return 0;
 
+rpm_put_sync:
+       pm_runtime_put_sync(dev);
 free_tmf_queue:
        blk_mq_destroy_queue(hba->tmf_queue);
        blk_put_queue(hba->tmf_queue);
index 66388143792187286227e9b143b38f8600b79326..8793e343358061a6f3a1166d806d126a18970b01 100644 (file)
@@ -48,7 +48,7 @@ config SCSI_UFS_CDNS_PLATFORM
 
 config SCSI_UFS_DWC_TC_PLATFORM
        tristate "DesignWare platform support using a G210 Test Chip"
-       depends on SCSI_UFSHCD_PLATFORM
+       depends on OF && SCSI_UFSHCD_PLATFORM
        help
          Synopsys Test Chip is a PHY for prototyping purposes.
 
index 21d9b047539fbf1eac38884d3ef9fdcba4c41adb..73e217260390ef5fa75002b1a9d0ba068e2779b2 100644 (file)
@@ -1613,6 +1613,7 @@ static int ufs_mtk_system_resume(struct device *dev)
 }
 #endif
 
+#ifdef CONFIG_PM
 static int ufs_mtk_runtime_suspend(struct device *dev)
 {
        struct ufs_hba *hba = dev_get_drvdata(dev);
@@ -1635,6 +1636,7 @@ static int ufs_mtk_runtime_resume(struct device *dev)
 
        return ufshcd_runtime_resume(dev);
 }
+#endif
 
 static const struct dev_pm_ops ufs_mtk_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(ufs_mtk_system_suspend,
index 4ec4174e05a3c3148f0c288d421257a7d7ac5cf4..7b4e9009f33559fe04a11c17af5433715b625893 100644 (file)
@@ -377,9 +377,26 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
                snp_dev->input.data_npages = certs_npages;
        }
 
+       /*
+        * Increment the message sequence number. There is no harm in doing
+        * this now because decryption uses the value stored in the response
+        * structure and any failure will wipe the VMPCK, preventing further
+        * use anyway.
+        */
+       snp_inc_msg_seqno(snp_dev);
+
        if (fw_err)
                *fw_err = err;
 
+       /*
+        * If an extended guest request was issued and the supplied certificate
+        * buffer was not large enough, a standard guest request was issued to
+        * prevent IV reuse. If the standard request was successful, return -EIO
+        * back to the caller as would have originally been returned.
+        */
+       if (!rc && err == SNP_GUEST_REQ_INVALID_LEN)
+               return -EIO;
+
        if (rc) {
                dev_alert(snp_dev->dev,
                          "Detected error from ASP request. rc: %d, fw_err: %llu\n",
@@ -395,9 +412,6 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
                goto disable_vmpck;
        }
 
-       /* Increment to new message sequence after payload decryption was successful. */
-       snp_inc_msg_seqno(snp_dev);
-
        return 0;
 
 disable_vmpck:
index 5de74686f12b0224ae12490b4793d46cf610dfbf..f0872970daf9ae2ad978969919c5f1d462e7c51e 100644 (file)
@@ -1871,7 +1871,9 @@ config GXP_WATCHDOG
 config MT7621_WDT
        tristate "Mediatek SoC watchdog"
        select WATCHDOG_CORE
-       depends on SOC_MT7620 || SOC_MT7621
+       select REGMAP_MMIO
+       select MFD_SYSCON
+       depends on SOC_MT7620 || SOC_MT7621 || COMPILE_TEST
        help
          Hardware driver for the Mediatek/Ralink MT7621/8 SoC Watchdog Timer.
 
index 16aca21f13d6aec71df4f2e08cd4a219daef200a..eddeb0fede896d88e8c1ea2283cd305f076d5df8 100644 (file)
@@ -136,11 +136,6 @@ static int apple_wdt_restart(struct watchdog_device *wdd, unsigned long mode,
        return 0;
 }
 
-static void apple_wdt_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static struct watchdog_ops apple_wdt_ops = {
        .owner = THIS_MODULE,
        .start = apple_wdt_start,
@@ -162,7 +157,6 @@ static int apple_wdt_probe(struct platform_device *pdev)
        struct apple_wdt *wdt;
        struct clk *clk;
        u32 wdt_ctrl;
-       int ret;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
        if (!wdt)
@@ -172,19 +166,9 @@ static int apple_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(wdt->regs))
                return PTR_ERR(wdt->regs);
 
-       clk = devm_clk_get(dev, NULL);
+       clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(clk))
                return PTR_ERR(clk);
-
-       ret = clk_prepare_enable(clk);
-       if (ret)
-               return ret;
-
-       ret = devm_add_action_or_reset(dev, apple_wdt_clk_disable_unprepare,
-                                      clk);
-       if (ret)
-               return ret;
-
        wdt->clk_rate = clk_get_rate(clk);
        if (!wdt->clk_rate)
                return -EINVAL;
index ac9fed1ef681b39ea0c4bd53cc08927cc88716a8..e58652939f8a99085ca536ab61a8c662bdbfe1ec 100644 (file)
@@ -246,11 +246,6 @@ static const struct watchdog_ops armada_37xx_wdt_ops = {
        .get_timeleft = armada_37xx_wdt_get_timeleft,
 };
 
-static void armada_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int armada_37xx_wdt_probe(struct platform_device *pdev)
 {
        struct armada_37xx_watchdog *dev;
@@ -280,18 +275,10 @@ static int armada_37xx_wdt_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        /* init clock */
-       dev->clk = devm_clk_get(&pdev->dev, NULL);
+       dev->clk = devm_clk_get_enabled(&pdev->dev, NULL);
        if (IS_ERR(dev->clk))
                return PTR_ERR(dev->clk);
 
-       ret = clk_prepare_enable(dev->clk);
-       if (ret)
-               return ret;
-       ret = devm_add_action_or_reset(&pdev->dev,
-                                      armada_clk_disable_unprepare, dev->clk);
-       if (ret)
-               return ret;
-
        dev->clk_rate = clk_get_rate(dev->clk);
        if (!dev->clk_rate)
                return -EINVAL;
index 86b5331bc491166bd1eedc89796cecae624b0e7c..c1e79874a2bbc0b2e10496bcda9f74ba4651bb76 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
index 5126454bb86138116f976bd69c1ba94df24b5bcd..d57409c1a4d12f830fbe246f9ef109245063f4cd 100644 (file)
@@ -270,7 +270,7 @@ static int at91wdt_remove(struct platform_device *pdev)
        misc_deregister(&at91wdt_miscdev);
        at91wdt_miscdev.parent = NULL;
 
-       return res;
+       return 0;
 }
 
 static void at91wdt_shutdown(struct platform_device *pdev)
index 292b5a1ca8318a50040957f68effe0685095530b..fed7be24644209919bcbfd0332e294dbd62c44db 100644 (file)
@@ -206,10 +206,9 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt)
                         "min heartbeat and max heartbeat might be too close for the system to handle it correctly\n");
 
        if ((tmp & AT91_WDT_WDFIEN) && wdt->irq) {
-               err = request_irq(wdt->irq, wdt_interrupt,
-                                 IRQF_SHARED | IRQF_IRQPOLL |
-                                 IRQF_NO_SUSPEND,
-                                 pdev->name, wdt);
+               err = devm_request_irq(dev, wdt->irq, wdt_interrupt,
+                                      IRQF_SHARED | IRQF_IRQPOLL | IRQF_NO_SUSPEND,
+                                      pdev->name, wdt);
                if (err)
                        return err;
        }
index 9388838899aca792ef616ed77fef0dd3528d4373..e038dd66b819823c69f5e2879018acc6081999fa 100644 (file)
@@ -127,11 +127,6 @@ static const struct watchdog_ops bcm7038_wdt_ops = {
        .get_timeleft   = bcm7038_wdt_get_timeleft,
 };
 
-static void bcm7038_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int bcm7038_wdt_probe(struct platform_device *pdev)
 {
        struct bcm7038_wdt_platform_data *pdata = pdev->dev.platform_data;
@@ -153,17 +148,9 @@ static int bcm7038_wdt_probe(struct platform_device *pdev)
        if (pdata && pdata->clk_name)
                clk_name = pdata->clk_name;
 
-       wdt->clk = devm_clk_get(dev, clk_name);
+       wdt->clk = devm_clk_get_enabled(dev, clk_name);
        /* If unable to get clock, use default frequency */
        if (!IS_ERR(wdt->clk)) {
-               err = clk_prepare_enable(wdt->clk);
-               if (err)
-                       return err;
-               err = devm_add_action_or_reset(dev,
-                                              bcm7038_clk_disable_unprepare,
-                                              wdt->clk);
-               if (err)
-                       return err;
                wdt->rate = clk_get_rate(wdt->clk);
                /* Prevent divide-by-zero exception */
                if (!wdt->rate)
index bc99e916493064b371be73f09e772daa40563a30..23d41043863f6e3152498d7f977dd8478fb24cbf 100644 (file)
@@ -274,11 +274,6 @@ static const struct watchdog_ops cdns_wdt_ops = {
        .set_timeout = cdns_wdt_settimeout,
 };
 
-static void cdns_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 /************************Platform Operations*****************************/
 /**
  * cdns_wdt_probe - Probe call for the device.
@@ -333,21 +328,11 @@ static int cdns_wdt_probe(struct platform_device *pdev)
        watchdog_stop_on_reboot(cdns_wdt_device);
        watchdog_set_drvdata(cdns_wdt_device, wdt);
 
-       wdt->clk = devm_clk_get(dev, NULL);
+       wdt->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(wdt->clk))
                return dev_err_probe(dev, PTR_ERR(wdt->clk),
                                     "input clock not found\n");
 
-       ret = clk_prepare_enable(wdt->clk);
-       if (ret) {
-               dev_err(dev, "unable to enable clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, cdns_clk_disable_unprepare,
-                                      wdt->clk);
-       if (ret)
-               return ret;
-
        clock_f = clk_get_rate(wdt->clk);
        if (clock_f <= CDNS_WDT_CLK_75MHZ) {
                wdt->prescaler = CDNS_WDT_PRESCALE_512;
index f02cbd530538b4b10f0326d20f637b4d141065d6..426962547df160385cb7d6093513468ecb3116cd 100644 (file)
@@ -155,11 +155,20 @@ static int da9062_wdt_restart(struct watchdog_device *wdd, unsigned long action,
 {
        struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd);
        struct i2c_client *client = to_i2c_client(wdt->hw->dev);
+       union i2c_smbus_data msg;
        int ret;
 
-       /* Don't use regmap because it is not atomic safe */
-       ret = i2c_smbus_write_byte_data(client, DA9062AA_CONTROL_F,
-                                       DA9062AA_SHUTDOWN_MASK);
+       /*
+        * Don't use regmap because it is not atomic safe. Additionally, use
+        * unlocked flavor of i2c_smbus_xfer to avoid scenario where i2c bus
+        * might be previously locked by some process unable to release the
+        * lock due to interrupts already being disabled at this late stage.
+        */
+       msg.byte = DA9062AA_SHUTDOWN_MASK;
+       ret = __i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                              I2C_SMBUS_WRITE, DA9062AA_CONTROL_F,
+                              I2C_SMBUS_BYTE_DATA, &msg);
+
        if (ret < 0)
                dev_alert(wdt->hw->dev, "Failed to shutdown (err = %d)\n",
                          ret);
index 09a4af4c58fc85f6d9cc858325a1b43bf18819fc..684667469b10c0bc1db3b31a236b0444082145f4 100644 (file)
@@ -174,11 +174,20 @@ static int da9063_wdt_restart(struct watchdog_device *wdd, unsigned long action,
 {
        struct da9063 *da9063 = watchdog_get_drvdata(wdd);
        struct i2c_client *client = to_i2c_client(da9063->dev);
+       union i2c_smbus_data msg;
        int ret;
 
-       /* Don't use regmap because it is not atomic safe */
-       ret = i2c_smbus_write_byte_data(client, DA9063_REG_CONTROL_F,
-                                       DA9063_SHUTDOWN);
+       /*
+        * Don't use regmap because it is not atomic safe. Additionally, use
+        * unlocked flavor of i2c_smbus_xfer to avoid scenario where i2c bus
+        * might previously be locked by some process unable to release the
+        * lock due to interrupts already being disabled at this late stage.
+        */
+       msg.byte = DA9063_SHUTDOWN;
+       ret = __i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+                       I2C_SMBUS_WRITE, DA9063_REG_CONTROL_F,
+                       I2C_SMBUS_BYTE_DATA, &msg);
+
        if (ret < 0)
                dev_alert(da9063->dev, "Failed to shutdown (err = %d)\n",
                          ret);
index 584a56893b81c42961d6df8c8974837f61e7565c..5f2184bda7b27bc3f5e5ebc6d6afcc5f25afc49b 100644 (file)
@@ -189,14 +189,8 @@ static const struct watchdog_ops davinci_wdt_ops = {
        .restart        = davinci_wdt_restart,
 };
 
-static void davinci_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int davinci_wdt_probe(struct platform_device *pdev)
 {
-       int ret = 0;
        struct device *dev = &pdev->dev;
        struct watchdog_device *wdd;
        struct davinci_wdt_device *davinci_wdt;
@@ -205,21 +199,11 @@ static int davinci_wdt_probe(struct platform_device *pdev)
        if (!davinci_wdt)
                return -ENOMEM;
 
-       davinci_wdt->clk = devm_clk_get(dev, NULL);
+       davinci_wdt->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(davinci_wdt->clk))
                return dev_err_probe(dev, PTR_ERR(davinci_wdt->clk),
                                     "failed to get clock node\n");
 
-       ret = clk_prepare_enable(davinci_wdt->clk);
-       if (ret) {
-               dev_err(dev, "failed to prepare clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, davinci_clk_disable_unprepare,
-                                      davinci_wdt->clk);
-       if (ret)
-               return ret;
-
        platform_set_drvdata(pdev, davinci_wdt);
 
        wdd                     = &davinci_wdt->wdd;
index 52962e8d11a6f9f1a23b36eb411239c1b00a9643..462f15bd5ffa611cbc51bfbb0af32beda3567fcb 100644 (file)
@@ -663,6 +663,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dw_wdt);
 
        watchdog_set_restart_priority(wdd, 128);
+       watchdog_stop_on_reboot(wdd);
 
        ret = watchdog_register_device(wdd);
        if (ret)
index e937b4dd28be7542564b4e602053652bd81ed70a..264857d314da8b5833f994b1907b2ef254c5f80b 100644 (file)
@@ -441,11 +441,10 @@ static bool iTCO_wdt_set_running(struct iTCO_wdt_private *p)
  *     Kernel Interfaces
  */
 
-static const struct watchdog_info ident = {
+static struct watchdog_info ident = {
        .options =              WDIOF_SETTIMEOUT |
                                WDIOF_KEEPALIVEPING |
                                WDIOF_MAGICCLOSE,
-       .firmware_version =     0,
        .identity =             DRV_NAME,
 };
 
@@ -563,6 +562,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
                break;
        }
 
+       ident.firmware_version = p->iTCO_version;
        p->wddev.info = &ident,
        p->wddev.ops = &iTCO_wdt_ops,
        p->wddev.bootstatus = 0;
index b57ff3787052d486b1b0df9f921f2fd1c8f38fe8..a55f801895d483e1a3bc9e6e7f6f1e03a73f6961 100644 (file)
@@ -175,16 +175,11 @@ static const struct watchdog_ops pdc_wdt_ops = {
        .restart        = pdc_wdt_restart,
 };
 
-static void pdc_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int pdc_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        u64 div;
-       int ret, val;
+       int val;
        unsigned long clk_rate;
        struct pdc_wdt_dev *pdc_wdt;
 
@@ -196,38 +191,18 @@ static int pdc_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(pdc_wdt->base))
                return PTR_ERR(pdc_wdt->base);
 
-       pdc_wdt->sys_clk = devm_clk_get(dev, "sys");
+       pdc_wdt->sys_clk = devm_clk_get_enabled(dev, "sys");
        if (IS_ERR(pdc_wdt->sys_clk)) {
                dev_err(dev, "failed to get the sys clock\n");
                return PTR_ERR(pdc_wdt->sys_clk);
        }
 
-       pdc_wdt->wdt_clk = devm_clk_get(dev, "wdt");
+       pdc_wdt->wdt_clk = devm_clk_get_enabled(dev, "wdt");
        if (IS_ERR(pdc_wdt->wdt_clk)) {
                dev_err(dev, "failed to get the wdt clock\n");
                return PTR_ERR(pdc_wdt->wdt_clk);
        }
 
-       ret = clk_prepare_enable(pdc_wdt->sys_clk);
-       if (ret) {
-               dev_err(dev, "could not prepare or enable sys clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, pdc_clk_disable_unprepare,
-                                      pdc_wdt->sys_clk);
-       if (ret)
-               return ret;
-
-       ret = clk_prepare_enable(pdc_wdt->wdt_clk);
-       if (ret) {
-               dev_err(dev, "could not prepare or enable wdt clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, pdc_clk_disable_unprepare,
-                                      pdc_wdt->wdt_clk);
-       if (ret)
-               return ret;
-
        /* We use the clock rate to calculate the max timeout */
        clk_rate = clk_get_rate(pdc_wdt->wdt_clk);
        if (clk_rate == 0) {
index d0c5d47ddede26f1e02c34c9250a388096598669..19ab7b3d286b9d74cb0fb7cfa47ad4b4b0f7bce2 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
@@ -35,6 +36,7 @@
 
 #define IMX2_WDT_WCR           0x00            /* Control Register */
 #define IMX2_WDT_WCR_WT                (0xFF << 8)     /* -> Watchdog Timeout Field */
+#define IMX2_WDT_WCR_WDW       BIT(7)          /* -> Watchdog disable for WAIT */
 #define IMX2_WDT_WCR_WDA       BIT(5)          /* -> External Reset WDOG_B */
 #define IMX2_WDT_WCR_SRS       BIT(4)          /* -> Software Reset Signal */
 #define IMX2_WDT_WCR_WRE       BIT(3)          /* -> WDOG Reset Enable */
 
 #define WDOG_SEC_TO_COUNT(s)   ((s * 2 - 1) << 8)
 
+struct imx2_wdt_data {
+       bool wdw_supported;
+};
+
 struct imx2_wdt_device {
        struct clk *clk;
        struct regmap *regmap;
        struct watchdog_device wdog;
+       const struct imx2_wdt_data *data;
        bool ext_reset;
        bool clk_is_on;
        bool no_ping;
+       bool sleep_wait;
 };
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -129,6 +137,9 @@ static inline void imx2_wdt_setup(struct watchdog_device *wdog)
 
        /* Suspend timer in low power mode, write once-only */
        val |= IMX2_WDT_WCR_WDZST;
+       /* Suspend timer in low power WAIT mode, write once-only */
+       if (wdev->sleep_wait)
+               val |= IMX2_WDT_WCR_WDW;
        /* Strip the old watchdog Time-Out value */
        val &= ~IMX2_WDT_WCR_WT;
        /* Generate internal chip-level reset if WDOG times out */
@@ -292,6 +303,8 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
        wdog->max_hw_heartbeat_ms = IMX2_WDT_MAX_TIME * 1000;
        wdog->parent            = dev;
 
+       wdev->data = of_device_get_match_data(dev);
+
        ret = platform_get_irq(pdev, 0);
        if (ret > 0)
                if (!devm_request_irq(dev, ret, imx2_wdt_isr, 0,
@@ -313,9 +326,18 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
 
        wdev->ext_reset = of_property_read_bool(dev->of_node,
                                                "fsl,ext-reset-output");
+
+       if (of_property_read_bool(dev->of_node, "fsl,suspend-in-wait")) {
+               if (!wdev->data->wdw_supported) {
+                       dev_err(dev, "suspend-in-wait not supported\n");
+                       return -EINVAL;
+               }
+               wdev->sleep_wait = true;
+       }
+
        /*
         * The i.MX7D doesn't support low power mode, so we need to ping the watchdog
-        * during suspend.
+        * during suspend. Interaction with "fsl,suspend-in-wait" is unknown!
         */
        wdev->no_ping = !of_device_is_compatible(dev->of_node, "fsl,imx7d-wdt");
        platform_set_drvdata(pdev, wdog);
@@ -417,9 +439,36 @@ static int __maybe_unused imx2_wdt_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(imx2_wdt_pm_ops, imx2_wdt_suspend,
                         imx2_wdt_resume);
 
+struct imx2_wdt_data imx_wdt = {
+       .wdw_supported = true,
+};
+
+struct imx2_wdt_data imx_wdt_legacy = {
+       .wdw_supported = false,
+};
+
 static const struct of_device_id imx2_wdt_dt_ids[] = {
-       { .compatible = "fsl,imx21-wdt", },
-       { .compatible = "fsl,imx7d-wdt", },
+       { .compatible = "fsl,imx21-wdt", .data = &imx_wdt_legacy },
+       { .compatible = "fsl,imx25-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx27-wdt", .data = &imx_wdt_legacy },
+       { .compatible = "fsl,imx31-wdt", .data = &imx_wdt_legacy },
+       { .compatible = "fsl,imx35-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx50-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx51-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx53-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx6q-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx6sl-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx6sll-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx6sx-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx6ul-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx7d-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx8mm-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx8mn-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx8mp-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,imx8mq-wdt", .data = &imx_wdt },
+       { .compatible = "fsl,ls1012a-wdt", .data = &imx_wdt_legacy },
+       { .compatible = "fsl,ls1043a-wdt", .data = &imx_wdt_legacy },
+       { .compatible = "fsl,vf610-wdt", .data = &imx_wdt },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, imx2_wdt_dt_ids);
index 2897902090b3971d1fbc2bf0abc72d868a68dbd2..7ca486794ba7fede52e01a0f7f4181a46bd4061e 100644 (file)
@@ -299,11 +299,6 @@ static int imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout
        return ret;
 }
 
-static void imx7ulp_wdt_action(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int imx7ulp_wdt_probe(struct platform_device *pdev)
 {
        struct imx7ulp_wdt_device *imx7ulp_wdt;
@@ -321,7 +316,7 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(imx7ulp_wdt->base))
                return PTR_ERR(imx7ulp_wdt->base);
 
-       imx7ulp_wdt->clk = devm_clk_get(dev, NULL);
+       imx7ulp_wdt->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(imx7ulp_wdt->clk)) {
                dev_err(dev, "Failed to get watchdog clock\n");
                return PTR_ERR(imx7ulp_wdt->clk);
@@ -336,14 +331,6 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
                dev_info(dev, "imx7ulp wdt probe\n");
        }
 
-       ret = clk_prepare_enable(imx7ulp_wdt->clk);
-       if (ret)
-               return ret;
-
-       ret = devm_add_action_or_reset(dev, imx7ulp_wdt_action, imx7ulp_wdt->clk);
-       if (ret)
-               return ret;
-
        wdog = &imx7ulp_wdt->wdd;
        wdog->info = &imx7ulp_wdt_info;
        wdog->ops = &imx7ulp_wdt_ops;
index 60b6d74f267dd67dbd89b5346f9711295c2e93c7..1b9b5f21a0df53b64efac5552dd6130bca6dbf8e 100644 (file)
@@ -197,16 +197,10 @@ static const struct watchdog_ops lpc18xx_wdt_ops = {
        .restart        = lpc18xx_wdt_restart,
 };
 
-static void lpc18xx_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int lpc18xx_wdt_probe(struct platform_device *pdev)
 {
        struct lpc18xx_wdt_dev *lpc18xx_wdt;
        struct device *dev = &pdev->dev;
-       int ret;
 
        lpc18xx_wdt = devm_kzalloc(dev, sizeof(*lpc18xx_wdt), GFP_KERNEL);
        if (!lpc18xx_wdt)
@@ -216,38 +210,18 @@ static int lpc18xx_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(lpc18xx_wdt->base))
                return PTR_ERR(lpc18xx_wdt->base);
 
-       lpc18xx_wdt->reg_clk = devm_clk_get(dev, "reg");
+       lpc18xx_wdt->reg_clk = devm_clk_get_enabled(dev, "reg");
        if (IS_ERR(lpc18xx_wdt->reg_clk)) {
                dev_err(dev, "failed to get the reg clock\n");
                return PTR_ERR(lpc18xx_wdt->reg_clk);
        }
 
-       lpc18xx_wdt->wdt_clk = devm_clk_get(dev, "wdtclk");
+       lpc18xx_wdt->wdt_clk = devm_clk_get_enabled(dev, "wdtclk");
        if (IS_ERR(lpc18xx_wdt->wdt_clk)) {
                dev_err(dev, "failed to get the wdt clock\n");
                return PTR_ERR(lpc18xx_wdt->wdt_clk);
        }
 
-       ret = clk_prepare_enable(lpc18xx_wdt->reg_clk);
-       if (ret) {
-               dev_err(dev, "could not prepare or enable sys clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, lpc18xx_clk_disable_unprepare,
-                                      lpc18xx_wdt->reg_clk);
-       if (ret)
-               return ret;
-
-       ret = clk_prepare_enable(lpc18xx_wdt->wdt_clk);
-       if (ret) {
-               dev_err(dev, "could not prepare or enable wdt clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, lpc18xx_clk_disable_unprepare,
-                                      lpc18xx_wdt->wdt_clk);
-       if (ret)
-               return ret;
-
        /* We use the clock rate to calculate timeouts */
        lpc18xx_wdt->clk_rate = clk_get_rate(lpc18xx_wdt->wdt_clk);
        if (lpc18xx_wdt->clk_rate == 0) {
index 981a2f7c3bec2641c9f551e11c9767149215f363..35d80cb39856c137095671339ab65255b749ad77 100644 (file)
@@ -146,16 +146,10 @@ static const struct of_device_id meson_gxbb_wdt_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, meson_gxbb_wdt_dt_ids);
 
-static void meson_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int meson_gxbb_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct meson_gxbb_wdt *data;
-       int ret;
        u32 ctrl_reg;
 
        data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -166,18 +160,10 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(data->reg_base))
                return PTR_ERR(data->reg_base);
 
-       data->clk = devm_clk_get(dev, NULL);
+       data->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(data->clk))
                return PTR_ERR(data->clk);
 
-       ret = clk_prepare_enable(data->clk);
-       if (ret)
-               return ret;
-       ret = devm_add_action_or_reset(dev, meson_clk_disable_unprepare,
-                                      data->clk);
-       if (ret)
-               return ret;
-
        platform_set_drvdata(pdev, data);
 
        data->wdt_dev.parent = dev;
index a8aa3522cfda8972ce02c858cf04edb05ccadac3..442731bba19469f5e61c0e7dc7351d4703d85beb 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
-
-#include <asm/mach-ralink/ralink_regs.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #define SYSC_RSTSTAT                   0x38
 #define WDT_RST_CAUSE                  BIT(1)
 #define TMR1CTL_RESTART                        BIT(9)
 #define TMR1CTL_PRESCALE_SHIFT         16
 
-static void __iomem *mt7621_wdt_base;
-static struct reset_control *mt7621_wdt_reset;
+struct mt7621_wdt_data {
+       void __iomem *base;
+       struct reset_control *rst;
+       struct regmap *sysc;
+       struct watchdog_device wdt;
+};
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
@@ -40,27 +44,31 @@ MODULE_PARM_DESC(nowayout,
                 "Watchdog cannot be stopped once started (default="
                 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-static inline void rt_wdt_w32(unsigned reg, u32 val)
+static inline void rt_wdt_w32(void __iomem *base, unsigned int reg, u32 val)
 {
-       iowrite32(val, mt7621_wdt_base + reg);
+       iowrite32(val, base + reg);
 }
 
-static inline u32 rt_wdt_r32(unsigned reg)
+static inline u32 rt_wdt_r32(void __iomem *base, unsigned int reg)
 {
-       return ioread32(mt7621_wdt_base + reg);
+       return ioread32(base + reg);
 }
 
 static int mt7621_wdt_ping(struct watchdog_device *w)
 {
-       rt_wdt_w32(TIMER_REG_TMRSTAT, TMR1CTL_RESTART);
+       struct mt7621_wdt_data *drvdata = watchdog_get_drvdata(w);
+
+       rt_wdt_w32(drvdata->base, TIMER_REG_TMRSTAT, TMR1CTL_RESTART);
 
        return 0;
 }
 
 static int mt7621_wdt_set_timeout(struct watchdog_device *w, unsigned int t)
 {
+       struct mt7621_wdt_data *drvdata = watchdog_get_drvdata(w);
+
        w->timeout = t;
-       rt_wdt_w32(TIMER_REG_TMR1LOAD, t * 1000);
+       rt_wdt_w32(drvdata->base, TIMER_REG_TMR1LOAD, t * 1000);
        mt7621_wdt_ping(w);
 
        return 0;
@@ -68,36 +76,41 @@ static int mt7621_wdt_set_timeout(struct watchdog_device *w, unsigned int t)
 
 static int mt7621_wdt_start(struct watchdog_device *w)
 {
+       struct mt7621_wdt_data *drvdata = watchdog_get_drvdata(w);
        u32 t;
 
        /* set the prescaler to 1ms == 1000us */
-       rt_wdt_w32(TIMER_REG_TMR1CTL, 1000 << TMR1CTL_PRESCALE_SHIFT);
+       rt_wdt_w32(drvdata->base, TIMER_REG_TMR1CTL, 1000 << TMR1CTL_PRESCALE_SHIFT);
 
        mt7621_wdt_set_timeout(w, w->timeout);
 
-       t = rt_wdt_r32(TIMER_REG_TMR1CTL);
+       t = rt_wdt_r32(drvdata->base, TIMER_REG_TMR1CTL);
        t |= TMR1CTL_ENABLE;
-       rt_wdt_w32(TIMER_REG_TMR1CTL, t);
+       rt_wdt_w32(drvdata->base, TIMER_REG_TMR1CTL, t);
 
        return 0;
 }
 
 static int mt7621_wdt_stop(struct watchdog_device *w)
 {
+       struct mt7621_wdt_data *drvdata = watchdog_get_drvdata(w);
        u32 t;
 
        mt7621_wdt_ping(w);
 
-       t = rt_wdt_r32(TIMER_REG_TMR1CTL);
+       t = rt_wdt_r32(drvdata->base, TIMER_REG_TMR1CTL);
        t &= ~TMR1CTL_ENABLE;
-       rt_wdt_w32(TIMER_REG_TMR1CTL, t);
+       rt_wdt_w32(drvdata->base, TIMER_REG_TMR1CTL, t);
 
        return 0;
 }
 
-static int mt7621_wdt_bootcause(void)
+static int mt7621_wdt_bootcause(struct mt7621_wdt_data *d)
 {
-       if (rt_sysc_r32(SYSC_RSTSTAT) & WDT_RST_CAUSE)
+       u32 val;
+
+       regmap_read(d->sysc, SYSC_RSTSTAT, &val);
+       if (val & WDT_RST_CAUSE)
                return WDIOF_CARDRESET;
 
        return 0;
@@ -105,7 +118,9 @@ static int mt7621_wdt_bootcause(void)
 
 static int mt7621_wdt_is_running(struct watchdog_device *w)
 {
-       return !!(rt_wdt_r32(TIMER_REG_TMR1CTL) & TMR1CTL_ENABLE);
+       struct mt7621_wdt_data *drvdata = watchdog_get_drvdata(w);
+
+       return !!(rt_wdt_r32(drvdata->base, TIMER_REG_TMR1CTL) & TMR1CTL_ENABLE);
 }
 
 static const struct watchdog_info mt7621_wdt_info = {
@@ -121,30 +136,47 @@ static const struct watchdog_ops mt7621_wdt_ops = {
        .set_timeout = mt7621_wdt_set_timeout,
 };
 
-static struct watchdog_device mt7621_wdt_dev = {
-       .info = &mt7621_wdt_info,
-       .ops = &mt7621_wdt_ops,
-       .min_timeout = 1,
-       .max_timeout = 0xfffful / 1000,
-};
-
 static int mt7621_wdt_probe(struct platform_device *pdev)
 {
+       struct device_node *np = pdev->dev.of_node;
        struct device *dev = &pdev->dev;
-       mt7621_wdt_base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(mt7621_wdt_base))
-               return PTR_ERR(mt7621_wdt_base);
+       struct watchdog_device *mt7621_wdt;
+       struct mt7621_wdt_data *drvdata;
+       int err;
+
+       drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+       if (!drvdata)
+               return -ENOMEM;
+
+       drvdata->sysc = syscon_regmap_lookup_by_phandle(np, "mediatek,sysctl");
+       if (IS_ERR(drvdata->sysc)) {
+               drvdata->sysc = syscon_regmap_lookup_by_compatible("mediatek,mt7621-sysc");
+               if (IS_ERR(drvdata->sysc))
+                       return PTR_ERR(drvdata->sysc);
+       }
+
+       drvdata->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(drvdata->base))
+               return PTR_ERR(drvdata->base);
+
+       drvdata->rst = devm_reset_control_get_exclusive(dev, NULL);
+       if (!IS_ERR(drvdata->rst))
+               reset_control_deassert(drvdata->rst);
+
+       mt7621_wdt = &drvdata->wdt;
+       mt7621_wdt->info = &mt7621_wdt_info;
+       mt7621_wdt->ops = &mt7621_wdt_ops;
+       mt7621_wdt->min_timeout = 1;
+       mt7621_wdt->max_timeout = 0xfffful / 1000;
+       mt7621_wdt->parent = dev;
 
-       mt7621_wdt_reset = devm_reset_control_get_exclusive(dev, NULL);
-       if (!IS_ERR(mt7621_wdt_reset))
-               reset_control_deassert(mt7621_wdt_reset);
+       mt7621_wdt->bootstatus = mt7621_wdt_bootcause(drvdata);
 
-       mt7621_wdt_dev.bootstatus = mt7621_wdt_bootcause();
+       watchdog_init_timeout(mt7621_wdt, mt7621_wdt->max_timeout, dev);
+       watchdog_set_nowayout(mt7621_wdt, nowayout);
+       watchdog_set_drvdata(mt7621_wdt, drvdata);
 
-       watchdog_init_timeout(&mt7621_wdt_dev, mt7621_wdt_dev.max_timeout,
-                             dev);
-       watchdog_set_nowayout(&mt7621_wdt_dev, nowayout);
-       if (mt7621_wdt_is_running(&mt7621_wdt_dev)) {
+       if (mt7621_wdt_is_running(mt7621_wdt)) {
                /*
                 * Make sure to apply timeout from watchdog core, taking
                 * the prescaler of this driver here into account (the
@@ -154,17 +186,25 @@ static int mt7621_wdt_probe(struct platform_device *pdev)
                 * we first disable the watchdog, set the new prescaler
                 * and timeout, and then re-enable the watchdog.
                 */
-               mt7621_wdt_stop(&mt7621_wdt_dev);
-               mt7621_wdt_start(&mt7621_wdt_dev);
-               set_bit(WDOG_HW_RUNNING, &mt7621_wdt_dev.status);
+               mt7621_wdt_stop(mt7621_wdt);
+               mt7621_wdt_start(mt7621_wdt);
+               set_bit(WDOG_HW_RUNNING, &mt7621_wdt->status);
        }
 
-       return devm_watchdog_register_device(dev, &mt7621_wdt_dev);
+       err = devm_watchdog_register_device(dev, &drvdata->wdt);
+       if (err)
+               return err;
+
+       platform_set_drvdata(pdev, drvdata);
+
+       return 0;
 }
 
 static void mt7621_wdt_shutdown(struct platform_device *pdev)
 {
-       mt7621_wdt_stop(&mt7621_wdt_dev);
+       struct mt7621_wdt_data *drvdata = platform_get_drvdata(pdev);
+
+       mt7621_wdt_stop(&drvdata->wdt);
 }
 
 static const struct of_device_id mt7621_wdt_match[] = {
index 3e6212591e697b8e7f4f5c88a6b7395ec2701d95..a9c437598e7eba1480b62577b0b4a91bbec39f69 100644 (file)
@@ -50,6 +50,7 @@
 #define WDT_MODE_IRQ_EN                (1 << 3)
 #define WDT_MODE_AUTO_START    (1 << 4)
 #define WDT_MODE_DUAL_EN       (1 << 6)
+#define WDT_MODE_CNT_SEL       (1 << 8)
 #define WDT_MODE_KEY           0x22000000
 
 #define WDT_SWRST              0x14
@@ -70,6 +71,7 @@ struct mtk_wdt_dev {
        spinlock_t lock; /* protects WDT_SWSYSRST reg */
        struct reset_controller_dev rcdev;
        bool disable_wdt_extrst;
+       bool reset_by_toprgu;
 };
 
 struct mtk_wdt_data {
@@ -279,6 +281,8 @@ static int mtk_wdt_start(struct watchdog_device *wdt_dev)
                reg &= ~(WDT_MODE_IRQ_EN | WDT_MODE_DUAL_EN);
        if (mtk_wdt->disable_wdt_extrst)
                reg &= ~WDT_MODE_EXRST_EN;
+       if (mtk_wdt->reset_by_toprgu)
+               reg |= WDT_MODE_CNT_SEL;
        reg |= (WDT_MODE_EN | WDT_MODE_KEY);
        iowrite32(reg, wdt_base + WDT_MODE);
 
@@ -408,6 +412,9 @@ static int mtk_wdt_probe(struct platform_device *pdev)
        mtk_wdt->disable_wdt_extrst =
                of_property_read_bool(dev->of_node, "mediatek,disable-extrst");
 
+       mtk_wdt->reset_by_toprgu =
+               of_property_read_bool(dev->of_node, "mediatek,reset-by-toprgu");
+
        return 0;
 }
 
index 3318544366b894801a0a2344bc21195ccf4e694b..2a079ca04aa3bd04762a3c65a2e7d543cae54def 100644 (file)
@@ -154,11 +154,6 @@ static u32 xwdt_selftest(struct xwdt_device *xdev)
                return XWT_TIMER_FAILED;
 }
 
-static void xwdt_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int xwdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -193,7 +188,7 @@ static int xwdt_probe(struct platform_device *pdev)
 
        watchdog_set_nowayout(xilinx_wdt_wdd, enable_once);
 
-       xdev->clk = devm_clk_get(dev, NULL);
+       xdev->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(xdev->clk)) {
                if (PTR_ERR(xdev->clk) != -ENOENT)
                        return PTR_ERR(xdev->clk);
@@ -211,15 +206,6 @@ static int xwdt_probe(struct platform_device *pdev)
                                 "The watchdog clock freq cannot be obtained\n");
        } else {
                pfreq = clk_get_rate(xdev->clk);
-               rc = clk_prepare_enable(xdev->clk);
-               if (rc) {
-                       dev_err(dev, "unable to enable clock\n");
-                       return rc;
-               }
-               rc = devm_add_action_or_reset(dev, xwdt_clk_disable_unprepare,
-                                             xdev->clk);
-               if (rc)
-                       return rc;
        }
 
        /*
index 1bdaf17c1d38d7d7d6610f39577033445f880270..8202f0a6b0935361efa3d00adcbdf6c3f6b9fc74 100644 (file)
@@ -325,7 +325,8 @@ static int usb_pcwd_set_heartbeat(struct usb_pcwd_private *usb_pcwd, int t)
 static int usb_pcwd_get_temperature(struct usb_pcwd_private *usb_pcwd,
                                                        int *temperature)
 {
-       unsigned char msb, lsb;
+       unsigned char msb = 0x00;
+       unsigned char lsb = 0x00;
 
        usb_pcwd_send_command(usb_pcwd, CMD_READ_TEMP, &msb, &lsb);
 
@@ -341,7 +342,8 @@ static int usb_pcwd_get_temperature(struct usb_pcwd_private *usb_pcwd,
 static int usb_pcwd_get_timeleft(struct usb_pcwd_private *usb_pcwd,
                                                                int *time_left)
 {
-       unsigned char msb, lsb;
+       unsigned char msb = 0x00;
+       unsigned char lsb = 0x00;
 
        /* Read the time that's left before rebooting */
        /* Note: if the board is not yet armed then we will read 0xFFFF */
index f43062b3c4c81cc88069ffbebaebbf9040e2c3bf..bc4ccddc75a3f0fc8bc8e5c02460d2051636551a 100644 (file)
@@ -164,11 +164,6 @@ static struct watchdog_device pic32_dmt_wdd = {
        .ops            = &pic32_dmt_fops,
 };
 
-static void pic32_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int pic32_dmt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -184,20 +179,12 @@ static int pic32_dmt_probe(struct platform_device *pdev)
        if (IS_ERR(dmt->regs))
                return PTR_ERR(dmt->regs);
 
-       dmt->clk = devm_clk_get(dev, NULL);
+       dmt->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(dmt->clk)) {
                dev_err(dev, "clk not found\n");
                return PTR_ERR(dmt->clk);
        }
 
-       ret = clk_prepare_enable(dmt->clk);
-       if (ret)
-               return ret;
-       ret = devm_add_action_or_reset(dev, pic32_clk_disable_unprepare,
-                                      dmt->clk);
-       if (ret)
-               return ret;
-
        wdd->timeout = pic32_dmt_get_timeout_secs(dmt);
        if (!wdd->timeout) {
                dev_err(dev, "failed to read watchdog register timeout\n");
index 41715d68d9e970f4dcd3e592c087deea27155f08..6d1a00222991f11ed8122a1a6cacc955d398151e 100644 (file)
@@ -162,11 +162,6 @@ static const struct of_device_id pic32_wdt_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, pic32_wdt_dt_ids);
 
-static void pic32_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int pic32_wdt_drv_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -186,22 +181,12 @@ static int pic32_wdt_drv_probe(struct platform_device *pdev)
        if (!wdt->rst_base)
                return -ENOMEM;
 
-       wdt->clk = devm_clk_get(dev, NULL);
+       wdt->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(wdt->clk)) {
                dev_err(dev, "clk not found\n");
                return PTR_ERR(wdt->clk);
        }
 
-       ret = clk_prepare_enable(wdt->clk);
-       if (ret) {
-               dev_err(dev, "clk enable failed\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, pic32_clk_disable_unprepare,
-                                      wdt->clk);
-       if (ret)
-               return ret;
-
        if (pic32_wdt_is_win_enabled(wdt)) {
                dev_err(dev, "windowed-clear mode is not supported.\n");
                return -ENODEV;
index e0ea133c1690e09410a8deb8615985e8d4fb6530..87a44a5675a143d9708fce3cf6bfe4f2a564e7bf 100644 (file)
@@ -179,11 +179,6 @@ static struct watchdog_device pnx4008_wdd = {
        .max_timeout = MAX_HEARTBEAT,
 };
 
-static void pnx4008_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int pnx4008_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -195,18 +190,10 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(wdt_base))
                return PTR_ERR(wdt_base);
 
-       wdt_clk = devm_clk_get(dev, NULL);
+       wdt_clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(wdt_clk))
                return PTR_ERR(wdt_clk);
 
-       ret = clk_prepare_enable(wdt_clk);
-       if (ret)
-               return ret;
-       ret = devm_add_action_or_reset(dev, pnx4008_clk_disable_unprepare,
-                                      wdt_clk);
-       if (ret)
-               return ret;
-
        pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ?
                        WDIOF_CARDRESET : 0;
        pnx4008_wdd.parent = dev;
index 0d2209c5eaca746b93d177923348a0fbb0705e9d..d776474dcdf34c3953cd98884aab53e5044bc080 100644 (file)
@@ -175,11 +175,6 @@ static const struct watchdog_info qcom_wdt_pt_info = {
        .identity       = KBUILD_MODNAME,
 };
 
-static void qcom_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static const struct qcom_wdt_match_data match_data_apcs_tmr = {
        .offset = reg_offset_data_apcs_tmr,
        .pretimeout = false,
@@ -226,21 +221,12 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(wdt->base))
                return PTR_ERR(wdt->base);
 
-       clk = devm_clk_get(dev, NULL);
+       clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(clk)) {
                dev_err(dev, "failed to get input clock\n");
                return PTR_ERR(clk);
        }
 
-       ret = clk_prepare_enable(clk);
-       if (ret) {
-               dev_err(dev, "failed to setup clock\n");
-               return ret;
-       }
-       ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, clk);
-       if (ret)
-               return ret;
-
        /*
         * We use the clock rate to calculate the max timeout, so ensure it's
         * not zero to avoid a divide-by-zero exception.
index 2a5298c5e8e4d8ab68b1b0a24dd5c5938d99aac4..2c30ddd574c599f6c5099ca58f00ff0dea91e3d8 100644 (file)
@@ -235,27 +235,14 @@ static const struct watchdog_info otto_wdt_info = {
                WDIOF_PRETIMEOUT,
 };
 
-static void otto_wdt_clock_action(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int otto_wdt_probe_clk(struct otto_wdt_ctrl *ctrl)
 {
-       struct clk *clk = devm_clk_get(ctrl->dev, NULL);
-       int ret;
+       struct clk *clk;
 
+       clk = devm_clk_get_enabled(ctrl->dev, NULL);
        if (IS_ERR(clk))
                return dev_err_probe(ctrl->dev, PTR_ERR(clk), "Failed to get clock\n");
 
-       ret = clk_prepare_enable(clk);
-       if (ret)
-               return dev_err_probe(ctrl->dev, ret, "Failed to enable clock\n");
-
-       ret = devm_add_action_or_reset(ctrl->dev, otto_wdt_clock_action, clk);
-       if (ret)
-               return ret;
-
        ctrl->clk_rate_khz = clk_get_rate(clk) / 1000;
        if (ctrl->clk_rate_khz == 0)
                return dev_err_probe(ctrl->dev, -ENXIO, "Failed to get clock rate\n");
index 834b94ff3f903ef93862eaebe298baee6faeebf2..95c8d7abce42e6261cdb9ab53d94b7f6a8b89df5 100644 (file)
@@ -94,16 +94,10 @@ static const struct of_device_id rtd119x_wdt_dt_ids[] = {
         { }
 };
 
-static void rtd119x_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int rtd119x_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct rtd119x_watchdog_device *data;
-       int ret;
 
        data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -113,18 +107,10 @@ static int rtd119x_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(data->base))
                return PTR_ERR(data->base);
 
-       data->clk = devm_clk_get(dev, NULL);
+       data->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(data->clk))
                return PTR_ERR(data->clk);
 
-       ret = clk_prepare_enable(data->clk);
-       if (ret)
-               return ret;
-       ret = devm_add_action_or_reset(dev, rtd119x_clk_disable_unprepare,
-                                      data->clk);
-       if (ret)
-               return ret;
-
        data->wdt_dev.info = &rtd119x_wdt_info;
        data->wdt_dev.ops = &rtd119x_wdt_ops;
        data->wdt_dev.timeout = 120;
index 974a4194a8fd67b940ef47263e4cb190b5a0e863..d404953d0e0f44c7ef45138420bba384eba39c5c 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -35,6 +36,8 @@
 
 #define F2CYCLE_NSEC(f)                        (1000000000 / (f))
 
+#define RZV2M_A_NSEC                   730
+
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
@@ -51,11 +54,35 @@ struct rzg2l_wdt_priv {
        struct reset_control *rstc;
        unsigned long osc_clk_rate;
        unsigned long delay;
+       unsigned long minimum_assertion_period;
        struct clk *pclk;
        struct clk *osc_clk;
        enum rz_wdt_type devtype;
 };
 
+static int rzg2l_wdt_reset(struct rzg2l_wdt_priv *priv)
+{
+       int err, status;
+
+       if (priv->devtype == WDT_RZV2M) {
+               /* WDT needs TYPE-B reset control */
+               err = reset_control_assert(priv->rstc);
+               if (err)
+                       return err;
+               ndelay(priv->minimum_assertion_period);
+               err = reset_control_deassert(priv->rstc);
+               if (err)
+                       return err;
+               err = read_poll_timeout(reset_control_status, status,
+                                       status != 1, 0, 1000, false,
+                                       priv->rstc);
+       } else {
+               err = reset_control_reset(priv->rstc);
+       }
+
+       return err;
+}
+
 static void rzg2l_wdt_wait_delay(struct rzg2l_wdt_priv *priv)
 {
        /* delay timer when change the setting register */
@@ -115,25 +142,23 @@ static int rzg2l_wdt_stop(struct watchdog_device *wdev)
 {
        struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
 
+       rzg2l_wdt_reset(priv);
        pm_runtime_put(wdev->parent);
-       reset_control_reset(priv->rstc);
 
        return 0;
 }
 
 static int rzg2l_wdt_set_timeout(struct watchdog_device *wdev, unsigned int timeout)
 {
-       struct rzg2l_wdt_priv *priv = watchdog_get_drvdata(wdev);
-
        wdev->timeout = timeout;
 
        /*
         * If the watchdog is active, reset the module for updating the WDTSET
-        * register so that it is updated with new timeout values.
+        * register by calling rzg2l_wdt_stop() (which internally calls reset_control_reset()
+        * to reset the module) so that it is updated with new timeout values.
         */
        if (watchdog_active(wdev)) {
-               pm_runtime_put(wdev->parent);
-               reset_control_reset(priv->rstc);
+               rzg2l_wdt_stop(wdev);
                rzg2l_wdt_start(wdev);
        }
 
@@ -156,6 +181,7 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev,
                rzg2l_wdt_write(priv, PEEN_FORCE, PEEN);
        } else {
                /* RZ/V2M doesn't have parity error registers */
+               rzg2l_wdt_reset(priv);
 
                wdev->timeout = 0;
 
@@ -253,6 +279,13 @@ static int rzg2l_wdt_probe(struct platform_device *pdev)
 
        priv->devtype = (uintptr_t)of_device_get_match_data(dev);
 
+       if (priv->devtype == WDT_RZV2M) {
+               priv->minimum_assertion_period = RZV2M_A_NSEC +
+                       3 * F2CYCLE_NSEC(pclk_rate) + 5 *
+                       max(F2CYCLE_NSEC(priv->osc_clk_rate),
+                           F2CYCLE_NSEC(pclk_rate));
+       }
+
        pm_runtime_enable(&pdev->dev);
 
        priv->wdev.info = &rzg2l_wdt_ident;
index 55ab384b996570e8b23a3e856ed0bf2c5f74ac9a..980c1717adb5d09bc3141a5205c96c2547225507 100644 (file)
@@ -98,11 +98,6 @@ static const struct watchdog_ops rzn1_wdt_ops = {
        .ping = rzn1_wdt_ping,
 };
 
-static void rzn1_wdt_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int rzn1_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -132,23 +127,12 @@ static int rzn1_wdt_probe(struct platform_device *pdev)
                return ret;
        }
 
-       clk = devm_clk_get(dev, NULL);
+       clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(clk)) {
                dev_err(dev, "failed to get the clock\n");
                return PTR_ERR(clk);
        }
 
-       ret = clk_prepare_enable(clk);
-       if (ret) {
-               dev_err(dev, "failed to prepare/enable the clock\n");
-               return ret;
-       }
-
-       ret = devm_add_action_or_reset(dev, rzn1_wdt_clk_disable_unprepare,
-                                      clk);
-       if (ret)
-               return ret;
-
        clk_rate = clk_get_rate(clk);
        if (!clk_rate) {
                dev_err(dev, "failed to get the clock rate\n");
index 9791c74aebd489a3fee2efc388ace4adb24eda15..63862803421f12d656ea48c7d47f53fafb850aeb 100644 (file)
@@ -150,6 +150,7 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd,
        struct sbsa_gwdt *gwdt = watchdog_get_drvdata(wdd);
 
        wdd->timeout = timeout;
+       timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000);
 
        if (action)
                sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt);
index 83ef55e66ca8643ea9aa4ef8e178ca20d1f250ca..cef0794708e7e761098e25e34f59aa62a408e34b 100644 (file)
@@ -112,11 +112,6 @@ static const struct watchdog_ops visconti_wdt_ops = {
        .set_timeout    = visconti_wdt_set_timeout,
 };
 
-static void visconti_clk_disable_unprepare(void *data)
-{
-       clk_disable_unprepare(data);
-}
-
 static int visconti_wdt_probe(struct platform_device *pdev)
 {
        struct watchdog_device *wdev;
@@ -134,20 +129,10 @@ static int visconti_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(priv->base))
                return PTR_ERR(priv->base);
 
-       clk = devm_clk_get(dev, NULL);
+       clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(clk))
                return dev_err_probe(dev, PTR_ERR(clk), "Could not get clock\n");
 
-       ret = clk_prepare_enable(clk);
-       if (ret) {
-               dev_err(dev, "Could not enable clock\n");
-               return ret;
-       }
-
-       ret = devm_add_action_or_reset(dev, visconti_clk_disable_unprepare, clk);
-       if (ret)
-               return ret;
-
        clk_freq = clk_get_rate(clk);
        if (!clk_freq)
                return -EINVAL;
index 55574ed425042e9405bb2220697bb26c2c5f59cc..0122e8796879752bfa18c6039fe84f698529534a 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/init.h>                /* For __init/__exit/... */
 #include <linux/hrtimer.h>     /* For hrtimers */
 #include <linux/kernel.h>      /* For printk/panic/... */
+#include <linux/kstrtox.h>     /* For kstrto* */
 #include <linux/kthread.h>     /* For kthread_work */
 #include <linux/miscdevice.h>  /* For handling misc devices */
 #include <linux/module.h>      /* For module stuff/... */
@@ -546,6 +547,24 @@ static ssize_t pretimeout_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(pretimeout);
 
+static ssize_t options_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct watchdog_device *wdd = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, "0x%x\n", wdd->info->options);
+}
+static DEVICE_ATTR_RO(options);
+
+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr,
+                              char *buf)
+{
+       struct watchdog_device *wdd = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, "%d\n", wdd->info->firmware_version);
+}
+static DEVICE_ATTR_RO(fw_version);
+
 static ssize_t identity_show(struct device *dev, struct device_attribute *attr,
                                char *buf)
 {
@@ -617,6 +636,8 @@ static umode_t wdt_is_visible(struct kobject *kobj, struct attribute *attr,
 }
 static struct attribute *wdt_attrs[] = {
        &dev_attr_state.attr,
+       &dev_attr_options.attr,
+       &dev_attr_fw_version.attr,
        &dev_attr_identity.attr,
        &dev_attr_timeout.attr,
        &dev_attr_min_timeout.attr,
@@ -1061,8 +1082,8 @@ static int watchdog_cdev_register(struct watchdog_device *wdd)
                if (wdd->id == 0) {
                        misc_deregister(&watchdog_miscdev);
                        old_wd_data = NULL;
-                       put_device(&wd_data->dev);
                }
+               put_device(&wd_data->dev);
                return err;
        }
 
index ce7a4a9e4b03ca8d2ae4d70390190d4780c62a76..0ba99bed59fc4d6ef821f0494ac9daaeeb3591ea 100644 (file)
@@ -301,13 +301,12 @@ static const struct watchdog_info wdat_wdt_info = {
        .identity = "wdat_wdt",
 };
 
-static const struct watchdog_ops wdat_wdt_ops = {
+static struct watchdog_ops wdat_wdt_ops = {
        .owner = THIS_MODULE,
        .start = wdat_wdt_start,
        .stop = wdat_wdt_stop,
        .ping = wdat_wdt_ping,
        .set_timeout = wdat_wdt_set_timeout,
-       .get_timeleft = wdat_wdt_get_timeleft,
 };
 
 static int wdat_wdt_probe(struct platform_device *pdev)
@@ -436,6 +435,9 @@ static int wdat_wdt_probe(struct platform_device *pdev)
                list_add_tail(&instr->node, instructions);
        }
 
+       if (wdat->instructions[ACPI_WDAT_GET_CURRENT_COUNTDOWN])
+               wdat_wdt_ops.get_timeleft = wdat_wdt_get_timeleft;
+
        wdat_wdt_boot_status(wdat);
        wdat_wdt_set_running(wdat);
 
index d0e88875443ae9a495d84bde3fdb439a7659b409..21ca08a694ee33ceaf940c55ded80f20e5255818 100644 (file)
@@ -593,8 +593,7 @@ static int ziirave_wdt_init_duration(struct i2c_client *client)
                                         reset_duration);
 }
 
-static int ziirave_wdt_probe(struct i2c_client *client,
-                            const struct i2c_device_id *id)
+static int ziirave_wdt_probe(struct i2c_client *client)
 {
        int ret;
        struct ziirave_wdt_data *w_priv;
@@ -732,7 +731,7 @@ static struct i2c_driver ziirave_wdt_driver = {
                .name = "ziirave_wdt",
                .of_match_table = zrv_wdt_of_match,
        },
-       .probe = ziirave_wdt_probe,
+       .probe_new = ziirave_wdt_probe,
        .remove = ziirave_wdt_remove,
        .id_table = ziirave_wdt_id,
 };
index 3a9c4517265fa27d4eb00b50f3684384243f260f..61a51b90600dc4aeac24835613904b7257546d48 100644 (file)
@@ -468,7 +468,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 
 #ifdef CONFIG_9P_FSCACHE
        /* register the session for caching */
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+       if (v9ses->cache == CACHE_FSCACHE) {
                rc = v9fs_cache_session_get_cookie(v9ses, dev_name);
                if (rc < 0)
                        goto err_clnt;
index 97599edbc300b0e6672f234bfdd3c3d1cd960b77..6f46d7e4c7509e05d64055eb749ff792efb29fed 100644 (file)
@@ -279,8 +279,6 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
 
        p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
 
-       BUG_ON(!v9inode->writeback_fid);
-
        /* Prefetch area to be written into the cache if we're caching this
         * file.  We need to do this before we get a lock on the page in case
         * there's more than one writer competing for the same cache block.
index 59b0e8948f7874b200dd6d04bc2dd630a3561f56..3d74b04fe0de411333d3d30a0cffbe5eb9bd8cf3 100644 (file)
@@ -197,7 +197,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)
 
 
 /**
- * v9fs_dir_release - close a directory
+ * v9fs_dir_release - called on a close of a file or directory
  * @inode: inode of the directory
  * @filp: file pointer to a directory
  *
@@ -209,6 +209,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
        struct p9_fid *fid;
        __le32 version;
        loff_t i_size;
+       int retval = 0;
 
        fid = filp->private_data;
        p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
@@ -217,7 +218,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
                spin_lock(&inode->i_lock);
                hlist_del(&fid->ilist);
                spin_unlock(&inode->i_lock);
-               p9_fid_put(fid);
+               retval = p9_fid_put(fid);
        }
 
        if ((filp->f_mode & FMODE_WRITE)) {
@@ -228,7 +229,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
        } else {
                fscache_unuse_cookie(v9fs_inode_cookie(v9inode), NULL, NULL);
        }
-       return 0;
+       return retval;
 }
 
 const struct file_operations v9fs_dir_operations = {
index b6ba229757818442b2a1db65692969f8421b3bc0..44c15eb2b908b5720370d6f04575eecc05e626ed 100644 (file)
@@ -74,8 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
        }
 
        mutex_lock(&v9inode->v_mutex);
-       if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
-           !v9inode->writeback_fid &&
+       if ((v9ses->cache) && !v9inode->writeback_fid &&
            ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
                /*
                 * clone a fid and add it to writeback_fid
@@ -93,9 +92,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                v9inode->writeback_fid = (void *) writeback_fid;
        }
        mutex_unlock(&v9inode->v_mutex);
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+       if (v9ses->cache == CACHE_FSCACHE)
                fscache_use_cookie(v9fs_inode_cookie(v9inode),
                                   file->f_mode & FMODE_WRITE);
+#endif
        v9fs_open_fid_add(inode, &fid);
        return 0;
 out_error:
index 4344e7a7865f8df3f15fde270446e6c36ba7bcd0..1d523bec0a94d03108f808b516fa793ace25f6b4 100644 (file)
@@ -843,8 +843,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        inode = d_inode(dentry);
        v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
-       if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
-           !v9inode->writeback_fid &&
+       if ((v9ses->cache) && !v9inode->writeback_fid &&
            ((flags & O_ACCMODE) != O_RDONLY)) {
                /*
                 * clone a fid and add it to writeback_fid
index 3bed3eb3a0e270673289566bd4913db5079b0d1b..331ed60d8fcb571b5f791a05981fb6d353fc1b71 100644 (file)
@@ -316,8 +316,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
        v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
-       if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
-           !v9inode->writeback_fid &&
+       if ((v9ses->cache) && !v9inode->writeback_fid &&
            ((flags & O_ACCMODE) != O_RDONLY)) {
                /*
                 * clone a fid and add it to writeback_fid
@@ -340,9 +339,11 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
        if (err)
                goto out;
        file->private_data = ofid;
-       if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+#ifdef CONFIG_9P_FSCACHE
+       if (v9ses->cache == CACHE_FSCACHE)
                fscache_use_cookie(v9fs_inode_cookie(v9inode),
                                   file->f_mode & FMODE_WRITE);
+#endif
        v9fs_open_fid_add(inode, &ofid);
        file->f_mode |= FMODE_CREATED;
 out:
index 5dcc62e678c480acac3dd467aab1dd55bfad18b1..f4d8bf7dec88a8eddeffbb0de171514566c6fa21 100644 (file)
@@ -2098,6 +2098,9 @@ static long ceph_fallocate(struct file *file, int mode,
        loff_t endoff = 0;
        loff_t size;
 
+       dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__,
+            inode, ceph_vinop(inode), mode, offset, length);
+
        if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
@@ -2132,6 +2135,10 @@ static long ceph_fallocate(struct file *file, int mode,
        if (ret < 0)
                goto unlock;
 
+       ret = file_modified(file);
+       if (ret)
+               goto put_caps;
+
        filemap_invalidate_lock(inode->i_mapping);
        ceph_fscache_invalidate(inode, false);
        ceph_zero_pagecache_range(inode, offset, length);
@@ -2147,6 +2154,7 @@ static long ceph_fallocate(struct file *file, int mode,
        }
        filemap_invalidate_unlock(inode->i_mapping);
 
+put_caps:
        ceph_put_cap_refs(ci, got);
 unlock:
        inode_unlock(inode);
index b7a36ebd0f2f59352dbf1041bdf5aaa39b264e10..e2eff66eefabfdd06864853203a7c67e7d057917 100644 (file)
@@ -667,11 +667,21 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
 int match_target_ip(struct TCP_Server_Info *server,
                    const char *share, size_t share_len,
                    bool *result);
-
-int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
-                                      struct cifs_tcon *tcon,
-                                      struct cifs_sb_info *cifs_sb,
-                                      const char *dfs_link_path);
+int cifs_inval_name_dfs_link_error(const unsigned int xid,
+                                  struct cifs_tcon *tcon,
+                                  struct cifs_sb_info *cifs_sb,
+                                  const char *full_path,
+                                  bool *islink);
+#else
+static inline int cifs_inval_name_dfs_link_error(const unsigned int xid,
+                                  struct cifs_tcon *tcon,
+                                  struct cifs_sb_info *cifs_sb,
+                                  const char *full_path,
+                                  bool *islink)
+{
+       *islink = false;
+       return 0;
+}
 #endif
 
 static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
@@ -684,5 +694,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
 
 struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
 void cifs_put_tcon_super(struct super_block *sb);
+int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
 
 #endif                 /* _CIFSPROTO_H */
index a24e4ddf804326ab7a06f06c9f26bca71abf6ea6..a43c78396dd8815a9b972814bb599dafd2c4c456 100644 (file)
@@ -72,7 +72,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
        struct cifs_ses *ses;
        struct TCP_Server_Info *server;
        struct nls_table *nls_codepage;
-       int retries;
 
        /*
         * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
@@ -102,45 +101,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
        }
        spin_unlock(&tcon->tc_lock);
 
-       retries = server->nr_targets;
-
-       /*
-        * Give demultiplex thread up to 10 seconds to each target available for
-        * reconnect -- should be greater than cifs socket timeout which is 7
-        * seconds.
-        */
-       while (server->tcpStatus == CifsNeedReconnect) {
-               rc = wait_event_interruptible_timeout(server->response_q,
-                                                     (server->tcpStatus != CifsNeedReconnect),
-                                                     10 * HZ);
-               if (rc < 0) {
-                       cifs_dbg(FYI, "%s: aborting reconnect due to a received signal by the process\n",
-                                __func__);
-                       return -ERESTARTSYS;
-               }
-
-               /* are we still trying to reconnect? */
-               spin_lock(&server->srv_lock);
-               if (server->tcpStatus != CifsNeedReconnect) {
-                       spin_unlock(&server->srv_lock);
-                       break;
-               }
-               spin_unlock(&server->srv_lock);
-
-               if (retries && --retries)
-                       continue;
-
-               /*
-                * on "soft" mounts we wait once. Hard mounts keep
-                * retrying until process is killed or server comes
-                * back on-line
-                */
-               if (!tcon->retry) {
-                       cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
-                       return -EHOSTDOWN;
-               }
-               retries = server->nr_targets;
-       }
+       rc = cifs_wait_for_server_reconnect(server, tcon->retry);
+       if (rc)
+               return rc;
 
        spin_lock(&ses->chan_lock);
        if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
index ec020d860be328348b27ea410eec20265b332450..5233f14f0636ac0fa65ff1d1b058d186543ffada 100644 (file)
@@ -1294,7 +1294,8 @@ cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs)
        case AF_INET6: {
                struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
                struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
-               return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
+               return (ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr)
+                       && saddr6->sin6_scope_id == vaddr6->sin6_scope_id);
        }
        default:
                WARN_ON(1);
@@ -1343,32 +1344,8 @@ match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
 
 static bool match_server_address(struct TCP_Server_Info *server, struct sockaddr *addr)
 {
-       switch (addr->sa_family) {
-       case AF_INET: {
-               struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
-               struct sockaddr_in *srv_addr4 =
-                                       (struct sockaddr_in *)&server->dstaddr;
-
-               if (addr4->sin_addr.s_addr != srv_addr4->sin_addr.s_addr)
-                       return false;
-               break;
-       }
-       case AF_INET6: {
-               struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
-               struct sockaddr_in6 *srv_addr6 =
-                                       (struct sockaddr_in6 *)&server->dstaddr;
-
-               if (!ipv6_addr_equal(&addr6->sin6_addr,
-                                    &srv_addr6->sin6_addr))
-                       return false;
-               if (addr6->sin6_scope_id != srv_addr6->sin6_scope_id)
-                       return false;
-               break;
-       }
-       default:
-               WARN_ON(1);
-               return false; /* don't expect to be here */
-       }
+       if (!cifs_match_ipaddr(addr, (struct sockaddr *)&server->dstaddr))
+               return false;
 
        return true;
 }
index ebfcaae8c4373f57b766754ecfd3739e98bf2c74..4d4a2d82636d2a359f785c41c5efca5ba5aa8504 100644 (file)
@@ -52,6 +52,8 @@ static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int
 
        end = (start + len - 1) / PAGE_SIZE;
        xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
+               if (xas_retry(&xas, folio))
+                       continue;
                xas_pause(&xas);
                rcu_read_unlock();
                folio_lock(folio);
@@ -81,6 +83,8 @@ void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len
 
        end = (start + len - 1) / PAGE_SIZE;
        xas_for_each(&xas, folio, end) {
+               if (xas_retry(&xas, folio))
+                       continue;
                if (!folio_test_writeback(folio)) {
                        WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
                                  len, start, folio_index(folio), end);
@@ -112,6 +116,8 @@ void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len
 
        end = (start + len - 1) / PAGE_SIZE;
        xas_for_each(&xas, folio, end) {
+               if (xas_retry(&xas, folio))
+                       continue;
                if (!folio_test_writeback(folio)) {
                        WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
                                  len, start, folio_index(folio), end);
@@ -2839,6 +2845,7 @@ err_xid:
        free_xid(xid);
        if (rc == 0) {
                wbc->nr_to_write = count;
+               rc = len;
        } else if (is_retryable_error(rc)) {
                cifs_pages_write_redirty(inode, start, len);
        } else {
@@ -3605,7 +3612,7 @@ static ssize_t __cifs_writev(
 
                ctx->nr_pinned_pages = rc;
                ctx->bv = (void *)ctx->iter.bvec;
-               ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter);
+               ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
        } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
                   !is_sync_kiocb(iocb)) {
                /*
@@ -4141,7 +4148,7 @@ static ssize_t __cifs_readv(
 
                ctx->nr_pinned_pages = rc;
                ctx->bv = (void *)ctx->iter.bvec;
-               ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter);
+               ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
                ctx->should_dirty = true;
        } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
                   !is_sync_kiocb(iocb)) {
index 2905734eb289b3603c61cce475341003ea5cc50b..a0d286ee723dda465cecbe9d9e0c4f20c3887e3e 100644 (file)
@@ -21,6 +21,7 @@
 #include "cifsfs.h"
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dns_resolve.h"
+#include "dfs_cache.h"
 #endif
 #include "fs_context.h"
 #include "cached_dir.h"
@@ -1198,4 +1199,114 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
        return 0;
 }
+
+/*
+ * Handle weird Windows SMB server behaviour. It responds with
+ * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request for
+ * "\<server>\<dfsname>\<linkpath>" DFS reference, where <dfsname> contains
+ * non-ASCII unicode symbols.
+ */
+int cifs_inval_name_dfs_link_error(const unsigned int xid,
+                                  struct cifs_tcon *tcon,
+                                  struct cifs_sb_info *cifs_sb,
+                                  const char *full_path,
+                                  bool *islink)
+{
+       struct cifs_ses *ses = tcon->ses;
+       size_t len;
+       char *path;
+       char *ref_path;
+
+       *islink = false;
+
+       /*
+        * Fast path - skip check when @full_path doesn't have a prefix path to
+        * look up or tcon is not DFS.
+        */
+       if (strlen(full_path) < 2 || !cifs_sb ||
+           (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+           !is_tcon_dfs(tcon) || !ses->server->origin_fullpath)
+               return 0;
+
+       /*
+        * Slow path - tcon is DFS and @full_path has prefix path, so attempt
+        * to get a referral to figure out whether it is an DFS link.
+        */
+       len = strnlen(tcon->tree_name, MAX_TREE_SIZE + 1) + strlen(full_path) + 1;
+       path = kmalloc(len, GFP_KERNEL);
+       if (!path)
+               return -ENOMEM;
+
+       scnprintf(path, len, "%s%s", tcon->tree_name, full_path);
+       ref_path = dfs_cache_canonical_path(path + 1, cifs_sb->local_nls,
+                                           cifs_remap(cifs_sb));
+       kfree(path);
+
+       if (IS_ERR(ref_path)) {
+               if (PTR_ERR(ref_path) != -EINVAL)
+                       return PTR_ERR(ref_path);
+       } else {
+               struct dfs_info3_param *refs = NULL;
+               int num_refs = 0;
+
+               /*
+                * XXX: we are not using dfs_cache_find() here because we might
+                * end filling all the DFS cache and thus potentially
+                * removing cached DFS targets that the client would eventually
+                * need during failover.
+                */
+               if (ses->server->ops->get_dfs_refer &&
+                   !ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs,
+                                                    &num_refs, cifs_sb->local_nls,
+                                                    cifs_remap(cifs_sb)))
+                       *islink = refs[0].server_type == DFS_TYPE_LINK;
+               free_dfs_info_array(refs, num_refs);
+               kfree(ref_path);
+       }
+       return 0;
+}
 #endif
+
+int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry)
+{
+       int timeout = 10;
+       int rc;
+
+       spin_lock(&server->srv_lock);
+       if (server->tcpStatus != CifsNeedReconnect) {
+               spin_unlock(&server->srv_lock);
+               return 0;
+       }
+       timeout *= server->nr_targets;
+       spin_unlock(&server->srv_lock);
+
+       /*
+        * Give demultiplex thread up to 10 seconds to each target available for
+        * reconnect -- should be greater than cifs socket timeout which is 7
+        * seconds.
+        *
+        * On "soft" mounts we wait once. Hard mounts keep retrying until
+        * process is killed or server comes back on-line.
+        */
+       do {
+               rc = wait_event_interruptible_timeout(server->response_q,
+                                                     (server->tcpStatus != CifsNeedReconnect),
+                                                     timeout * HZ);
+               if (rc < 0) {
+                       cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
+                                __func__);
+                       return -ERESTARTSYS;
+               }
+
+               /* are we still trying to reconnect? */
+               spin_lock(&server->srv_lock);
+               if (server->tcpStatus != CifsNeedReconnect) {
+                       spin_unlock(&server->srv_lock);
+                       return 0;
+               }
+               spin_unlock(&server->srv_lock);
+       } while (retry);
+
+       cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
+       return -EHOSTDOWN;
+}
index 37b4cd59245d69dd369ad7f3f3f0dad2549c5098..9b956294e8643dfe57772699436ffab2bd0bcc85 100644 (file)
@@ -527,12 +527,13 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                         struct cifs_sb_info *cifs_sb, const char *full_path,
                         struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse)
 {
-       int rc;
        __u32 create_options = 0;
        struct cifsFileInfo *cfile;
        struct cached_fid *cfid = NULL;
        struct kvec err_iov[3] = {};
        int err_buftype[3] = {};
+       bool islink;
+       int rc, rc2;
 
        *adjust_tz = false;
        *reparse = false;
@@ -580,15 +581,15 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                                              SMB2_OP_QUERY_INFO, cfile, NULL, NULL,
                                              NULL, NULL);
                        goto out;
-               } else if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) &&
-                          hdr->Status == STATUS_OBJECT_NAME_INVALID) {
-                       /*
-                        * Handle weird Windows SMB server behaviour. It responds with
-                        * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request
-                        * for "\<server>\<dfsname>\<linkpath>" DFS reference,
-                        * where <dfsname> contains non-ASCII unicode symbols.
-                        */
-                       rc = -EREMOTE;
+               } else if (rc != -EREMOTE && hdr->Status == STATUS_OBJECT_NAME_INVALID) {
+                       rc2 = cifs_inval_name_dfs_link_error(xid, tcon, cifs_sb,
+                                                            full_path, &islink);
+                       if (rc2) {
+                               rc = rc2;
+                               goto out;
+                       }
+                       if (islink)
+                               rc = -EREMOTE;
                }
                if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb &&
                    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS))
index f79b075f2992f83657277fdd8a12d6630a9b4ab6..6dfb865ee9d75155f1421b0a41677e32da15eb98 100644 (file)
@@ -796,7 +796,6 @@ static int
 smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
                        struct cifs_sb_info *cifs_sb, const char *full_path)
 {
-       int rc;
        __le16 *utf16_path;
        __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
        int err_buftype = CIFS_NO_BUFFER;
@@ -804,6 +803,8 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
        struct kvec err_iov = {};
        struct cifs_fid fid;
        struct cached_fid *cfid;
+       bool islink;
+       int rc, rc2;
 
        rc = open_cached_dir(xid, tcon, full_path, cifs_sb, true, &cfid);
        if (!rc) {
@@ -833,15 +834,17 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
 
                if (unlikely(!hdr || err_buftype == CIFS_NO_BUFFER))
                        goto out;
-               /*
-                * Handle weird Windows SMB server behaviour. It responds with
-                * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request
-                * for "\<server>\<dfsname>\<linkpath>" DFS reference,
-                * where <dfsname> contains non-ASCII unicode symbols.
-                */
-               if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) &&
-                   hdr->Status == STATUS_OBJECT_NAME_INVALID)
-                       rc = -EREMOTE;
+
+               if (rc != -EREMOTE && hdr->Status == STATUS_OBJECT_NAME_INVALID) {
+                       rc2 = cifs_inval_name_dfs_link_error(xid, tcon, cifs_sb,
+                                                            full_path, &islink);
+                       if (rc2) {
+                               rc = rc2;
+                               goto out;
+                       }
+                       if (islink)
+                               rc = -EREMOTE;
+               }
                if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb &&
                    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS))
                        rc = -EOPNOTSUPP;
index ca9d7110ddcbe0a45e51ac66a843e257eecb9a09..0e53265e1462a350d5c8bbde41893b50f3dac3ae 100644 (file)
@@ -139,66 +139,6 @@ out:
        return;
 }
 
-static int wait_for_server_reconnect(struct TCP_Server_Info *server,
-                                    __le16 smb2_command, bool retry)
-{
-       int timeout = 10;
-       int rc;
-
-       spin_lock(&server->srv_lock);
-       if (server->tcpStatus != CifsNeedReconnect) {
-               spin_unlock(&server->srv_lock);
-               return 0;
-       }
-       timeout *= server->nr_targets;
-       spin_unlock(&server->srv_lock);
-
-       /*
-        * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
-        * here since they are implicitly done when session drops.
-        */
-       switch (smb2_command) {
-       /*
-        * BB Should we keep oplock break and add flush to exceptions?
-        */
-       case SMB2_TREE_DISCONNECT:
-       case SMB2_CANCEL:
-       case SMB2_CLOSE:
-       case SMB2_OPLOCK_BREAK:
-               return -EAGAIN;
-       }
-
-       /*
-        * Give demultiplex thread up to 10 seconds to each target available for
-        * reconnect -- should be greater than cifs socket timeout which is 7
-        * seconds.
-        *
-        * On "soft" mounts we wait once. Hard mounts keep retrying until
-        * process is killed or server comes back on-line.
-        */
-       do {
-               rc = wait_event_interruptible_timeout(server->response_q,
-                                                     (server->tcpStatus != CifsNeedReconnect),
-                                                     timeout * HZ);
-               if (rc < 0) {
-                       cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
-                                __func__);
-                       return -ERESTARTSYS;
-               }
-
-               /* are we still trying to reconnect? */
-               spin_lock(&server->srv_lock);
-               if (server->tcpStatus != CifsNeedReconnect) {
-                       spin_unlock(&server->srv_lock);
-                       return 0;
-               }
-               spin_unlock(&server->srv_lock);
-       } while (retry);
-
-       cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
-       return -EHOSTDOWN;
-}
-
 static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
               struct TCP_Server_Info *server)
@@ -243,7 +183,27 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
            (!tcon->ses->server) || !server)
                return -EIO;
 
-       rc = wait_for_server_reconnect(server, smb2_command, tcon->retry);
+       spin_lock(&server->srv_lock);
+       if (server->tcpStatus == CifsNeedReconnect) {
+               /*
+                * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
+                * here since they are implicitly done when session drops.
+                */
+               switch (smb2_command) {
+               /*
+                * BB Should we keep oplock break and add flush to exceptions?
+                */
+               case SMB2_TREE_DISCONNECT:
+               case SMB2_CANCEL:
+               case SMB2_CLOSE:
+               case SMB2_OPLOCK_BREAK:
+                       spin_unlock(&server->srv_lock);
+                       return -EAGAIN;
+               }
+       }
+       spin_unlock(&server->srv_lock);
+
+       rc = cifs_wait_for_server_reconnect(server, tcon->retry);
        if (rc)
                return rc;
 
index 55b6e319a61dcd309af7a31dcbff5646b70d4ebd..0362ebd4fa0fc2c50176937d04848bc62b58584c 100644 (file)
@@ -837,7 +837,7 @@ static int smbd_post_send_iter(struct smbd_connection *info,
        int data_length;
        struct smbd_request *request;
        struct smbd_data_transfer *packet;
-       int new_credits;
+       int new_credits = 0;
 
 wait_credit:
        /* Wait for send credits. A SMBD packet needs one credit */
index e3d168911dbe12ea242d5ee66c5ddc54a9aa3e55..006ef68d7ff6affe93f4679820643a57fbd26caf 100644 (file)
@@ -183,7 +183,7 @@ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
                                unsigned int len)
 {
        struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
-       struct file_ra_state ra;
+       struct file_ra_state ra = {};
        struct page *pages[BLKS_PER_BUF];
        unsigned i, blocknr, buffer;
        unsigned long devsize;
index 1dfa67f307f1737cfcd39eada98f4dbf272c4484..957574180a5e3e0e80b7830ecf8ec51b989a3e60 100644 (file)
@@ -29,14 +29,15 @@ static int exfat_extract_uni_name(struct exfat_dentry *ep,
 
 }
 
-static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
+static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
                struct exfat_chain *p_dir, int entry, unsigned short *uniname)
 {
-       int i;
+       int i, err;
        struct exfat_entry_set_cache es;
 
-       if (exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES))
-               return;
+       err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
+       if (err)
+               return err;
 
        /*
         * First entry  : file entry
@@ -56,12 +57,13 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
        }
 
        exfat_put_dentry_set(&es, false);
+       return 0;
 }
 
 /* read a directory entry from the opened directory */
 static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_entry *dir_entry)
 {
-       int i, dentries_per_clu, num_ext;
+       int i, dentries_per_clu, num_ext, err;
        unsigned int type, clu_offset, max_dentries;
        struct exfat_chain dir, clu;
        struct exfat_uni_name uni_name;
@@ -100,7 +102,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
                        clu.dir = ei->hint_bmap.clu;
                }
 
-               while (clu_offset > 0) {
+               while (clu_offset > 0 && clu.dir != EXFAT_EOF_CLUSTER) {
                        if (exfat_get_next_cluster(sb, &(clu.dir)))
                                return -EIO;
 
@@ -146,8 +148,12 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
                                        0);
 
                        *uni_name.name = 0x0;
-                       exfat_get_uniname_from_ext_entry(sb, &clu, i,
+                       err = exfat_get_uniname_from_ext_entry(sb, &clu, i,
                                uni_name.name);
+                       if (err) {
+                               brelse(bh);
+                               continue;
+                       }
                        exfat_utf16_to_nls(sb, &uni_name,
                                dir_entry->namebuf.lfn,
                                dir_entry->namebuf.lfnbuf_len);
@@ -234,10 +240,7 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
                fake_offset = 1;
        }
 
-       if (cpos & (DENTRY_SIZE - 1)) {
-               err = -ENOENT;
-               goto unlock;
-       }
+       cpos = round_up(cpos, DENTRY_SIZE);
 
        /* name buffer should be allocated before use */
        err = exfat_alloc_namebuf(nb);
@@ -378,6 +381,12 @@ unsigned int exfat_get_entry_type(struct exfat_dentry *ep)
                        return TYPE_ACL;
                return TYPE_CRITICAL_SEC;
        }
+
+       if (ep->type == EXFAT_VENDOR_EXT)
+               return TYPE_VENDOR_EXT;
+       if (ep->type == EXFAT_VENDOR_ALLOC)
+               return TYPE_VENDOR_ALLOC;
+
        return TYPE_BENIGN_SEC;
 }
 
@@ -521,6 +530,25 @@ release_fbh:
        return ret;
 }
 
+static void exfat_free_benign_secondary_clusters(struct inode *inode,
+               struct exfat_dentry *ep)
+{
+       struct super_block *sb = inode->i_sb;
+       struct exfat_chain dir;
+       unsigned int start_clu =
+               le32_to_cpu(ep->dentry.generic_secondary.start_clu);
+       u64 size = le64_to_cpu(ep->dentry.generic_secondary.size);
+       unsigned char flags = ep->dentry.generic_secondary.flags;
+
+       if (!(flags & ALLOC_POSSIBLE) || !start_clu || !size)
+               return;
+
+       exfat_chain_set(&dir, start_clu,
+                       EXFAT_B_TO_CLU_ROUND_UP(size, EXFAT_SB(sb)),
+                       flags);
+       exfat_free_cluster(inode, &dir);
+}
+
 int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
                int entry, int num_entries, struct exfat_uni_name *p_uniname)
 {
@@ -553,6 +581,9 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
                if (!ep)
                        return -EIO;
 
+               if (exfat_get_entry_type(ep) & TYPE_BENIGN_SEC)
+                       exfat_free_benign_secondary_clusters(inode, ep);
+
                exfat_init_name_entry(ep, uniname);
                exfat_update_bh(bh, sync);
                brelse(bh);
@@ -576,6 +607,9 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir,
                if (!ep)
                        return -EIO;
 
+               if (exfat_get_entry_type(ep) & TYPE_BENIGN_SEC)
+                       exfat_free_benign_secondary_clusters(inode, ep);
+
                exfat_set_entry_type(ep, TYPE_DELETED);
                exfat_update_bh(bh, IS_DIRSYNC(inode));
                brelse(bh);
@@ -744,6 +778,7 @@ enum exfat_validate_dentry_mode {
        ES_MODE_GET_STRM_ENTRY,
        ES_MODE_GET_NAME_ENTRY,
        ES_MODE_GET_CRITICAL_SEC_ENTRY,
+       ES_MODE_GET_BENIGN_SEC_ENTRY,
 };
 
 static bool exfat_validate_entry(unsigned int type,
@@ -757,36 +792,33 @@ static bool exfat_validate_entry(unsigned int type,
                if  (type != TYPE_FILE && type != TYPE_DIR)
                        return false;
                *mode = ES_MODE_GET_FILE_ENTRY;
-               return true;
+               break;
        case ES_MODE_GET_FILE_ENTRY:
                if (type != TYPE_STREAM)
                        return false;
                *mode = ES_MODE_GET_STRM_ENTRY;
-               return true;
+               break;
        case ES_MODE_GET_STRM_ENTRY:
                if (type != TYPE_EXTEND)
                        return false;
                *mode = ES_MODE_GET_NAME_ENTRY;
-               return true;
+               break;
        case ES_MODE_GET_NAME_ENTRY:
-               if (type == TYPE_STREAM)
-                       return false;
-               if (type != TYPE_EXTEND) {
-                       if (!(type & TYPE_CRITICAL_SEC))
-                               return false;
-                       *mode = ES_MODE_GET_CRITICAL_SEC_ENTRY;
-               }
-               return true;
-       case ES_MODE_GET_CRITICAL_SEC_ENTRY:
-               if (type == TYPE_EXTEND || type == TYPE_STREAM)
+               if (type & TYPE_BENIGN_SEC)
+                       *mode = ES_MODE_GET_BENIGN_SEC_ENTRY;
+               else if (type != TYPE_EXTEND)
                        return false;
-               if ((type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC)
+               break;
+       case ES_MODE_GET_BENIGN_SEC_ENTRY:
+               /* Assume unreconized benign secondary entry */
+               if (!(type & TYPE_BENIGN_SEC))
                        return false;
-               return true;
+               break;
        default:
-               WARN_ON_ONCE(1);
                return false;
        }
+
+       return true;
 }
 
 struct exfat_dentry *exfat_get_dentry_cached(
@@ -1167,10 +1199,8 @@ int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir,
 
                type = exfat_get_entry_type(ext_ep);
                brelse(bh);
-               if (type == TYPE_EXTEND || type == TYPE_STREAM)
+               if (type & TYPE_CRITICAL_SEC || type & TYPE_BENIGN_SEC)
                        count++;
-               else
-                       break;
        }
        return count;
 }
index 1bf16abe3c8431d1329b14cbe4727488e0c6de5f..729ada9e26e82ec0d7778c0379c7cbc3ca6b8c52 100644 (file)
@@ -50,7 +50,7 @@ enum {
 #define ES_IDX_LAST_FILENAME(name_len) \
        (ES_IDX_FIRST_FILENAME + EXFAT_FILENAME_ENTRY_NUM(name_len) - 1)
 
-#define DIR_DELETED            0xFFFF0321
+#define DIR_DELETED            0xFFFFFFF7
 
 /* type values */
 #define TYPE_UNUSED            0x0000
@@ -71,6 +71,8 @@ enum {
 #define TYPE_PADDING           0x0402
 #define TYPE_ACLTAB            0x0403
 #define TYPE_BENIGN_SEC                0x0800
+#define TYPE_VENDOR_EXT                0x0801
+#define TYPE_VENDOR_ALLOC      0x0802
 
 #define MAX_CHARSET_SIZE       6 /* max size of multi-byte character */
 #define MAX_NAME_LENGTH                255 /* max len of file name excluding NULL */
index 7f39b1c6469c4e6938fc855ee4cd7a51ede9ec57..0ece2e43cf492c3a0fcc3dec0821623d9dcf485b 100644 (file)
@@ -27,6 +27,7 @@
        ((sbi)->num_clusters - EXFAT_RESERVED_CLUSTERS)
 
 /* AllocationPossible and NoFatChain field in GeneralSecondaryFlags Field */
+#define ALLOC_POSSIBLE         0x01
 #define ALLOC_FAT_CHAIN                0x01
 #define ALLOC_NO_FAT_CHAIN     0x03
 
@@ -50,6 +51,8 @@
 #define EXFAT_STREAM           0xC0    /* stream entry */
 #define EXFAT_NAME             0xC1    /* file name entry */
 #define EXFAT_ACL              0xC2    /* stream entry */
+#define EXFAT_VENDOR_EXT       0xE0    /* vendor extension entry */
+#define EXFAT_VENDOR_ALLOC     0xE1    /* vendor allocation entry */
 
 #define IS_EXFAT_CRITICAL_PRI(x)       (x < 0xA0)
 #define IS_EXFAT_BENIGN_PRI(x)         (x < 0xC0)
@@ -155,6 +158,24 @@ struct exfat_dentry {
                        __le32 start_clu;
                        __le64 size;
                } __packed upcase; /* up-case table directory entry */
+               struct {
+                       __u8 flags;
+                       __u8 vendor_guid[16];
+                       __u8 vendor_defined[14];
+               } __packed vendor_ext; /* vendor extension directory entry */
+               struct {
+                       __u8 flags;
+                       __u8 vendor_guid[16];
+                       __u8 vendor_defined[2];
+                       __le32 start_clu;
+                       __le64 size;
+               } __packed vendor_alloc; /* vendor allocation directory entry */
+               struct {
+                       __u8 flags;
+                       __u8 custom_defined[18];
+                       __le32 start_clu;
+                       __le64 size;
+               } __packed generic_secondary; /* generic secondary directory entry */
        } __packed dentry;
 } __packed;
 
index 41ae4cce1f4203c1d8ecd6e4bd92e23fc42f7366..56b870d9cc0deffd26169bb915179851655d33f8 100644 (file)
@@ -307,7 +307,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
                struct exfat_chain *p_chain, bool sync_bmap)
 {
        int ret = -ENOSPC;
-       unsigned int num_clusters = 0, total_cnt;
+       unsigned int total_cnt;
        unsigned int hint_clu, new_clu, last_clu = EXFAT_EOF_CLUSTER;
        struct super_block *sb = inode->i_sb;
        struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -344,17 +344,11 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 
        /* check cluster validation */
        if (!is_valid_cluster(sbi, hint_clu)) {
-               exfat_err(sb, "hint_cluster is invalid (%u)",
-                       hint_clu);
+               if (hint_clu != sbi->num_clusters)
+                       exfat_err(sb, "hint_cluster is invalid (%u), rewind to the first cluster",
+                                       hint_clu);
                hint_clu = EXFAT_FIRST_CLUSTER;
-               if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
-                       if (exfat_chain_cont_cluster(sb, p_chain->dir,
-                                       num_clusters)) {
-                               ret = -EIO;
-                               goto unlock;
-                       }
-                       p_chain->flags = ALLOC_FAT_CHAIN;
-               }
+               p_chain->flags = ALLOC_FAT_CHAIN;
        }
 
        p_chain->dir = EXFAT_EOF_CLUSTER;
@@ -364,7 +358,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
                if (new_clu != hint_clu &&
                    p_chain->flags == ALLOC_NO_FAT_CHAIN) {
                        if (exfat_chain_cont_cluster(sb, p_chain->dir,
-                                       num_clusters)) {
+                                       p_chain->size)) {
                                ret = -EIO;
                                goto free_cluster;
                        }
@@ -377,8 +371,6 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
                        goto free_cluster;
                }
 
-               num_clusters++;
-
                /* update FAT table */
                if (p_chain->flags == ALLOC_FAT_CHAIN) {
                        if (exfat_ent_set(sb, new_clu, EXFAT_EOF_CLUSTER)) {
@@ -395,13 +387,14 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
                                goto free_cluster;
                        }
                }
+               p_chain->size++;
+
                last_clu = new_clu;
 
-               if (--num_alloc == 0) {
+               if (p_chain->size == num_alloc) {
                        sbi->clu_srch_ptr = hint_clu;
-                       sbi->used_clusters += num_clusters;
+                       sbi->used_clusters += num_alloc;
 
-                       p_chain->size += num_clusters;
                        mutex_unlock(&sbi->bitmap_lock);
                        return 0;
                }
@@ -412,7 +405,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
 
                        if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
                                if (exfat_chain_cont_cluster(sb, p_chain->dir,
-                                               num_clusters)) {
+                                               p_chain->size)) {
                                        ret = -EIO;
                                        goto free_cluster;
                                }
@@ -421,8 +414,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
                }
        }
 free_cluster:
-       if (num_clusters)
-               __exfat_free_cluster(inode, p_chain);
+       __exfat_free_cluster(inode, p_chain);
 unlock:
        mutex_unlock(&sbi->bitmap_lock);
        return ret;
index 1fdb0a64b91db9350b9e8b73d1536601c4a3d775..e99183a746117bd97955b6d48338207e74e50371 100644 (file)
@@ -209,8 +209,7 @@ void exfat_truncate(struct inode *inode)
        if (err)
                goto write_size;
 
-       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
-                               inode->i_blkbits;
+       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
 write_size:
        aligned_size = i_size_read(inode);
        if (aligned_size & (blocksize - 1)) {
index 5b644cb057fa8060bf5c136dfc2071f31b66ec29..481dd338f2b8e7e3ccdc9fd3f59dea9f5e7e330b 100644 (file)
@@ -220,8 +220,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
                num_clusters += num_to_be_allocated;
                *clu = new_clu.dir;
 
-               inode->i_blocks +=
-                       num_to_be_allocated << sbi->sect_per_clus_bits;
+               inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
 
                /*
                 * Move *clu pointer along FAT chains (hole care) because the
@@ -576,8 +575,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
 
        exfat_save_attr(inode, info->attr);
 
-       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
-                               inode->i_blkbits;
+       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
        inode->i_mtime = info->mtime;
        inode->i_ctime = info->mtime;
        ei->i_crtime = info->crtime;
index 02aab4c3a5f734c0308419111ca1c972610c8f89..e0ff9d156f6f57bbc2b68132b9465bd8f9068307 100644 (file)
@@ -396,7 +396,7 @@ static int exfat_find_empty_entry(struct inode *inode,
                ei->i_size_ondisk += sbi->cluster_size;
                ei->i_size_aligned += sbi->cluster_size;
                ei->flags = p_dir->flags;
-               inode->i_blocks += 1 << sbi->sect_per_clus_bits;
+               inode->i_blocks += sbi->cluster_size >> 9;
        }
 
        return dentry;
index 35f0305cd493cc1fd961ca170e5c033c6928dd17..8c32460e031e804a8acf8136b43348fc8839a7df 100644 (file)
@@ -373,8 +373,7 @@ static int exfat_read_root(struct inode *inode)
        inode->i_op = &exfat_dir_inode_operations;
        inode->i_fop = &exfat_dir_operations;
 
-       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
-                               inode->i_blkbits;
+       inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
        ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
        ei->i_size_aligned = i_size_read(inode);
        ei->i_size_ondisk = i_size_read(inode);
index 43e26e6f6e4240f884ce08295db62f9684deda99..4eeb02d456a958b7f8809e7d884378a23d0e4c0b 100644 (file)
@@ -1529,6 +1529,7 @@ struct ext4_sb_info {
        unsigned int s_mount_opt2;
        unsigned long s_mount_flags;
        unsigned int s_def_mount_opt;
+       unsigned int s_def_mount_opt2;
        ext4_fsblk_t s_sb_block;
        atomic64_t s_resv_clusters;
        kuid_t s_resuid;
index 9de1c9d1a13d319217e6eca46fe4a351c562eea9..3559ea6b07818c2640bbfa319502054b777705c1 100644 (file)
@@ -3251,7 +3251,7 @@ static int ext4_split_extent_at(handle_t *handle,
                ext4_ext_mark_unwritten(ex2);
 
        err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
-       if (err != -ENOSPC && err != -EDQUOT)
+       if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
                goto out;
 
        if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
index 4594b62f147bb988d189a0b00318da7cf135699f..b06de728b3b6c9e9e56667fd58109b601034f610 100644 (file)
@@ -1332,8 +1332,14 @@ struct dentry_info_args {
        char *dname;
 };
 
+/* Same as struct ext4_fc_tl, but uses native endianness fields */
+struct ext4_fc_tl_mem {
+       u16 fc_tag;
+       u16 fc_len;
+};
+
 static inline void tl_to_darg(struct dentry_info_args *darg,
-                             struct ext4_fc_tl *tl, u8 *val)
+                             struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct ext4_fc_dentry_info fcd;
 
@@ -1345,16 +1351,18 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
        darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
 }
 
-static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val)
+static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val)
 {
-       memcpy(tl, val, EXT4_FC_TAG_BASE_LEN);
-       tl->fc_len = le16_to_cpu(tl->fc_len);
-       tl->fc_tag = le16_to_cpu(tl->fc_tag);
+       struct ext4_fc_tl tl_disk;
+
+       memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN);
+       tl->fc_len = le16_to_cpu(tl_disk.fc_len);
+       tl->fc_tag = le16_to_cpu(tl_disk.fc_tag);
 }
 
 /* Unlink replay function */
-static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
-                                u8 *val)
+static int ext4_fc_replay_unlink(struct super_block *sb,
+                                struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct inode *inode, *old_parent;
        struct qstr entry;
@@ -1451,8 +1459,8 @@ out:
 }
 
 /* Link replay function */
-static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
-                              u8 *val)
+static int ext4_fc_replay_link(struct super_block *sb,
+                              struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct inode *inode;
        struct dentry_info_args darg;
@@ -1506,8 +1514,8 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
 /*
  * Inode replay function
  */
-static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
-                               u8 *val)
+static int ext4_fc_replay_inode(struct super_block *sb,
+                               struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct ext4_fc_inode fc_inode;
        struct ext4_inode *raw_inode;
@@ -1609,8 +1617,8 @@ out:
  * inode for which we are trying to create a dentry here, should already have
  * been replayed before we start here.
  */
-static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
-                                u8 *val)
+static int ext4_fc_replay_create(struct super_block *sb,
+                                struct ext4_fc_tl_mem *tl, u8 *val)
 {
        int ret = 0;
        struct inode *inode = NULL;
@@ -1708,7 +1716,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
 
 /* Replay add range tag */
 static int ext4_fc_replay_add_range(struct super_block *sb,
-                                   struct ext4_fc_tl *tl, u8 *val)
+                                   struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct ext4_fc_add_range fc_add_ex;
        struct ext4_extent newex, *ex;
@@ -1828,8 +1836,8 @@ out:
 
 /* Replay DEL_RANGE tag */
 static int
-ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
-                        u8 *val)
+ext4_fc_replay_del_range(struct super_block *sb,
+                        struct ext4_fc_tl_mem *tl, u8 *val)
 {
        struct inode *inode;
        struct ext4_fc_del_range lrange;
@@ -2025,7 +2033,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
        struct ext4_fc_replay_state *state;
        int ret = JBD2_FC_REPLAY_CONTINUE;
        struct ext4_fc_add_range ext;
-       struct ext4_fc_tl tl;
+       struct ext4_fc_tl_mem tl;
        struct ext4_fc_tail tail;
        __u8 *start, *end, *cur, *val;
        struct ext4_fc_head head;
@@ -2144,7 +2152,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 {
        struct super_block *sb = journal->j_private;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_fc_tl tl;
+       struct ext4_fc_tl_mem tl;
        __u8 *start, *end, *cur, *val;
        int ret = JBD2_FC_REPLAY_CONTINUE;
        struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
index 6bdf61a62c79621c2617080c4f68cd07ff6bd957..0b8b4499e5ca18a7f437015dce1d9d7071b064e7 100644 (file)
@@ -202,8 +202,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
        return false;
 }
 
-/* Is IO overwriting allocated and initialized blocks? */
-static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+/* Is IO overwriting allocated or initialized blocks? */
+static bool ext4_overwrite_io(struct inode *inode,
+                             loff_t pos, loff_t len, bool *unwritten)
 {
        struct ext4_map_blocks map;
        unsigned int blkbits = inode->i_blkbits;
@@ -217,12 +218,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
        blklen = map.m_len;
 
        err = ext4_map_blocks(NULL, inode, &map, 0);
+       if (err != blklen)
+               return false;
        /*
         * 'err==len' means that all of the blocks have been preallocated,
-        * regardless of whether they have been initialized or not. To exclude
-        * unwritten extents, we need to check m_flags.
+        * regardless of whether they have been initialized or not. We need to
+        * check m_flags to distinguish the unwritten extents.
         */
-       return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+       *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
+       return true;
 }
 
 static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
@@ -431,11 +435,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
  * - For extending writes case we don't take the shared lock, since it requires
  *   updating inode i_disksize and/or orphan handling with exclusive lock.
  *
- * - shared locking will only be true mostly with overwrites. Otherwise we will
- *   switch to exclusive i_rwsem lock.
+ * - shared locking will only be true mostly with overwrites, including
+ *   initialized blocks and unwritten blocks. For overwrite unwritten blocks
+ *   we protect splitting extents by i_data_sem in ext4_inode_info, so we can
+ *   also release exclusive i_rwsem lock.
+ *
+ * - Otherwise we will switch to exclusive i_rwsem lock.
  */
 static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
-                                    bool *ilock_shared, bool *extend)
+                                    bool *ilock_shared, bool *extend,
+                                    bool *unwritten)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -459,7 +468,7 @@ restart:
         * in file_modified().
         */
        if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
-            !ext4_overwrite_io(inode, offset, count))) {
+            !ext4_overwrite_io(inode, offset, count, unwritten))) {
                if (iocb->ki_flags & IOCB_NOWAIT) {
                        ret = -EAGAIN;
                        goto out;
@@ -491,7 +500,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
        loff_t offset = iocb->ki_pos;
        size_t count = iov_iter_count(from);
        const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
-       bool extend = false, unaligned_io = false;
+       bool extend = false, unaligned_io = false, unwritten = false;
        bool ilock_shared = true;
 
        /*
@@ -534,7 +543,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return ext4_buffered_write_iter(iocb, from);
        }
 
-       ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
+       ret = ext4_dio_write_checks(iocb, from,
+                                   &ilock_shared, &extend, &unwritten);
        if (ret <= 0)
                return ret;
 
@@ -582,7 +592,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
                ext4_journal_stop(handle);
        }
 
-       if (ilock_shared)
+       if (ilock_shared && !unwritten)
                iomap_ops = &ext4_iomap_overwrite_ops;
        ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
                           (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
index 40579ef513b716a90729a24ec26c043728a640ae..d251d705c2763ca82aea53e7f9ab41e8f743dbb0 100644 (file)
@@ -4872,13 +4872,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
                goto bad_inode;
        raw_inode = ext4_raw_inode(&iloc);
 
-       if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
-               ext4_error_inode(inode, function, line, 0,
-                                "iget: root inode unallocated");
-               ret = -EFSCORRUPTED;
-               goto bad_inode;
-       }
-
        if ((flags & EXT4_IGET_HANDLE) &&
            (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
                ret = -ESTALE;
@@ -4951,11 +4944,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
         * NeilBrown 1999oct15
         */
        if (inode->i_nlink == 0) {
-               if ((inode->i_mode == 0 ||
+               if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
                     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
                    ino != EXT4_BOOT_LOADER_INO) {
-                       /* this inode is deleted */
-                       ret = -ESTALE;
+                       /* this inode is deleted or unallocated */
+                       if (flags & EXT4_IGET_SPECIAL) {
+                               ext4_error_inode(inode, function, line, 0,
+                                                "iget: special inode unallocated");
+                               ret = -EFSCORRUPTED;
+                       } else
+                               ret = -ESTALE;
                        goto bad_inode;
                }
                /* The only unlinked inodes we let through here have
@@ -5788,7 +5786,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
        ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
        int gdpblocks;
        int idxblocks;
-       int ret = 0;
+       int ret;
 
        /*
         * How many index blocks need to touch to map @lblocks logical blocks
index b0dc7212694e28a3cdf2a11d108115ad89a8a705..12435d61f09eff9d85e23907588d21fd3c0be734 100644 (file)
@@ -155,9 +155,6 @@ static int ext4_update_backup_sb(struct super_block *sb,
        set_buffer_uptodate(bh);
        unlock_buffer(bh);
 
-       if (err)
-               goto out_bh;
-
        if (handle) {
                err = ext4_handle_dirty_metadata(handle, NULL, bh);
                if (err)
index d10a508d95cd6d0241c28f67d5bee2c1cf7d2f26..94608b7df7e8a0fb01e656ec1616244d72744758 100644 (file)
@@ -3872,9 +3872,16 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                        if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
                                goto end_rename;
                }
+               /*
+                * We need to protect against old.inode directory getting
+                * converted from inline directory format into a normal one.
+                */
+               inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
                retval = ext4_rename_dir_prepare(handle, &old);
-               if (retval)
+               if (retval) {
+                       inode_unlock(old.inode);
                        goto end_rename;
+               }
        }
        /*
         * If we're renaming a file within an inline_data dir and adding or
@@ -4006,6 +4013,8 @@ end_rename:
        } else {
                ext4_journal_stop(handle);
        }
+       if (old.dir_bh)
+               inode_unlock(old.inode);
 release_bh:
        brelse(old.dir_bh);
        brelse(old.bh);
index faae0549347175eea372524ece2b0f7b7cf19d75..88f7b8a88c76b6d5d34d9740d58ffe26136dc5dd 100644 (file)
@@ -2146,7 +2146,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
                return 0;
        case Opt_commit:
                if (result.uint_32 == 0)
-                       ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE;
+                       result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
                else if (result.uint_32 > INT_MAX / HZ) {
                        ext4_msg(NULL, KERN_ERR,
                                 "Invalid commit interval %d, "
@@ -2883,7 +2883,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
-       int def_errors, def_mount_opt = sbi->s_def_mount_opt;
+       int def_errors;
        const struct mount_opts *m;
        char sep = nodefs ? '\n' : ',';
 
@@ -2895,15 +2895,28 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 
        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
                int want_set = m->flags & MOPT_SET;
+               int opt_2 = m->flags & MOPT_2;
+               unsigned int mount_opt, def_mount_opt;
+
                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
                    m->flags & MOPT_SKIP)
                        continue;
-               if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
-                       continue; /* skip if same as the default */
+
+               if (opt_2) {
+                       mount_opt = sbi->s_mount_opt2;
+                       def_mount_opt = sbi->s_def_mount_opt2;
+               } else {
+                       mount_opt = sbi->s_mount_opt;
+                       def_mount_opt = sbi->s_def_mount_opt;
+               }
+               /* skip if same as the default */
+               if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
+                       continue;
+               /* select Opt_noFoo vs Opt_Foo */
                if ((want_set &&
-                    (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
-                   (!want_set && (sbi->s_mount_opt & m->mount_opt)))
-                       continue; /* select Opt_noFoo vs Opt_Foo */
+                    (mount_opt & m->mount_opt) != m->mount_opt) ||
+                   (!want_set && (mount_opt & m->mount_opt)))
+                       continue;
                SEQ_OPTS_PRINT("%s", token2str(m->token));
        }
 
@@ -2931,7 +2944,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
        if (nodefs || sbi->s_stripe)
                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
        if (nodefs || EXT4_MOUNT_DATA_FLAGS &
-                       (sbi->s_mount_opt ^ def_mount_opt)) {
+                       (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
                        SEQ_OPTS_PUTS("data=journal");
                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -4727,7 +4740,6 @@ static int ext4_group_desc_init(struct super_block *sb,
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned int db_count;
        ext4_fsblk_t block;
-       int ret;
        int i;
 
        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
@@ -4767,8 +4779,7 @@ static int ext4_group_desc_init(struct super_block *sb,
                        ext4_msg(sb, KERN_ERR,
                               "can't read group descriptor %d", i);
                        sbi->s_gdb_count = i;
-                       ret = PTR_ERR(bh);
-                       goto out;
+                       return PTR_ERR(bh);
                }
                rcu_read_lock();
                rcu_dereference(sbi->s_group_desc)[i] = bh;
@@ -4777,13 +4788,10 @@ static int ext4_group_desc_init(struct super_block *sb,
        sbi->s_gdb_count = db_count;
        if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-               ret = -EFSCORRUPTED;
-               goto out;
+               return -EFSCORRUPTED;
        }
+
        return 0;
-out:
-       ext4_group_desc_free(sbi);
-       return ret;
 }
 
 static int ext4_load_and_init_journal(struct super_block *sb,
@@ -5075,6 +5083,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
                goto failed_mount;
 
        sbi->s_def_mount_opt = sbi->s_mount_opt;
+       sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
 
        err = ext4_check_opt_consistency(fc, sb);
        if (err < 0)
@@ -5209,14 +5218,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
        if (ext4_geometry_check(sb, es))
                goto failed_mount;
 
-       err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
-       if (err)
-               goto failed_mount;
-
        timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
        spin_lock_init(&sbi->s_error_lock);
        INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
 
+       err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
+       if (err)
+               goto failed_mount3;
+
        /* Register extent status tree shrinker */
        if (ext4_es_register_shrinker(sbi))
                goto failed_mount3;
@@ -5937,8 +5946,11 @@ static int ext4_load_journal(struct super_block *sb,
        if (!really_read_only && journal_devnum &&
            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                es->s_journal_dev = cpu_to_le32(journal_devnum);
-
-               /* Make sure we flush the recovery flag to disk. */
+               ext4_commit_super(sb);
+       }
+       if (!really_read_only && journal_inum &&
+           journal_inum != le32_to_cpu(es->s_journal_inum)) {
+               es->s_journal_inum = cpu_to_le32(journal_inum);
                ext4_commit_super(sb);
        }
 
index a2f04a3808db57677377232224e56fdb10c1dc86..62f2ec599218354caf9ea8ef4600cb59a200a0a8 100644 (file)
@@ -184,27 +184,73 @@ ext4_xattr_handler(int name_index)
 }
 
 static int
-ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
-                        void *value_start)
+check_xattrs(struct inode *inode, struct buffer_head *bh,
+            struct ext4_xattr_entry *entry, void *end, void *value_start,
+            const char *function, unsigned int line)
 {
        struct ext4_xattr_entry *e = entry;
+       int err = -EFSCORRUPTED;
+       char *err_str;
+
+       if (bh) {
+               if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
+                   BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+                       err_str = "invalid header";
+                       goto errout;
+               }
+               if (buffer_verified(bh))
+                       return 0;
+               if (!ext4_xattr_block_csum_verify(inode, bh)) {
+                       err = -EFSBADCRC;
+                       err_str = "invalid checksum";
+                       goto errout;
+               }
+       } else {
+               struct ext4_xattr_ibody_header *header = value_start;
+
+               header -= 1;
+               if (end - (void *)header < sizeof(*header) + sizeof(u32)) {
+                       err_str = "in-inode xattr block too small";
+                       goto errout;
+               }
+               if (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
+                       err_str = "bad magic number in in-inode xattr";
+                       goto errout;
+               }
+       }
 
        /* Find the end of the names list */
        while (!IS_LAST_ENTRY(e)) {
                struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
-               if ((void *)next >= end)
-                       return -EFSCORRUPTED;
-               if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
-                       return -EFSCORRUPTED;
+               if ((void *)next >= end) {
+                       err_str = "e_name out of bounds";
+                       goto errout;
+               }
+               if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) {
+                       err_str = "bad e_name length";
+                       goto errout;
+               }
                e = next;
        }
 
        /* Check the values */
        while (!IS_LAST_ENTRY(entry)) {
                u32 size = le32_to_cpu(entry->e_value_size);
+               unsigned long ea_ino = le32_to_cpu(entry->e_value_inum);
 
-               if (size > EXT4_XATTR_SIZE_MAX)
-                       return -EFSCORRUPTED;
+               if (!ext4_has_feature_ea_inode(inode->i_sb) && ea_ino) {
+                       err_str = "ea_inode specified without ea_inode feature enabled";
+                       goto errout;
+               }
+               if (ea_ino && ((ea_ino == EXT4_ROOT_INO) ||
+                              !ext4_valid_inum(inode->i_sb, ea_ino))) {
+                       err_str = "invalid ea_ino";
+                       goto errout;
+               }
+               if (size > EXT4_XATTR_SIZE_MAX) {
+                       err_str = "e_value size too large";
+                       goto errout;
+               }
 
                if (size != 0 && entry->e_value_inum == 0) {
                        u16 offs = le16_to_cpu(entry->e_value_offs);
@@ -216,66 +262,54 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
                         * the padded and unpadded sizes, since the size may
                         * overflow to 0 when adding padding.
                         */
-                       if (offs > end - value_start)
-                               return -EFSCORRUPTED;
+                       if (offs > end - value_start) {
+                               err_str = "e_value out of bounds";
+                               goto errout;
+                       }
                        value = value_start + offs;
                        if (value < (void *)e + sizeof(u32) ||
                            size > end - value ||
-                           EXT4_XATTR_SIZE(size) > end - value)
-                               return -EFSCORRUPTED;
+                           EXT4_XATTR_SIZE(size) > end - value) {
+                               err_str = "overlapping e_value ";
+                               goto errout;
+                       }
                }
                entry = EXT4_XATTR_NEXT(entry);
        }
-
+       if (bh)
+               set_buffer_verified(bh);
        return 0;
+
+errout:
+       if (bh)
+               __ext4_error_inode(inode, function, line, 0, -err,
+                                  "corrupted xattr block %llu: %s",
+                                  (unsigned long long) bh->b_blocknr,
+                                  err_str);
+       else
+               __ext4_error_inode(inode, function, line, 0, -err,
+                                  "corrupted in-inode xattr: %s", err_str);
+       return err;
 }
 
 static inline int
 __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
                         const char *function, unsigned int line)
 {
-       int error = -EFSCORRUPTED;
-
-       if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
-           BHDR(bh)->h_blocks != cpu_to_le32(1))
-               goto errout;
-       if (buffer_verified(bh))
-               return 0;
-
-       error = -EFSBADCRC;
-       if (!ext4_xattr_block_csum_verify(inode, bh))
-               goto errout;
-       error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
-                                        bh->b_data);
-errout:
-       if (error)
-               __ext4_error_inode(inode, function, line, 0, -error,
-                                  "corrupted xattr block %llu",
-                                  (unsigned long long) bh->b_blocknr);
-       else
-               set_buffer_verified(bh);
-       return error;
+       return check_xattrs(inode, bh, BFIRST(bh), bh->b_data + bh->b_size,
+                           bh->b_data, function, line);
 }
 
 #define ext4_xattr_check_block(inode, bh) \
        __ext4_xattr_check_block((inode), (bh),  __func__, __LINE__)
 
 
-static int
+static inline int
 __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
                         void *end, const char *function, unsigned int line)
 {
-       int error = -EFSCORRUPTED;
-
-       if (end - (void *)header < sizeof(*header) + sizeof(u32) ||
-           (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
-               goto errout;
-       error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header));
-errout:
-       if (error)
-               __ext4_error_inode(inode, function, line, 0, -error,
-                                  "corrupted in-inode xattr");
-       return error;
+       return check_xattrs(inode, NULL, IFIRST(header), end, IFIRST(header),
+                           function, line);
 }
 
 #define xattr_check_inode(inode, header, end) \
@@ -388,6 +422,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
        struct inode *inode;
        int err;
 
+       /*
+        * We have to check for this corruption early as otherwise
+        * iget_locked() could wait indefinitely for the state of our
+        * parent inode.
+        */
+       if (parent->i_ino == ea_ino) {
+               ext4_error(parent->i_sb,
+                          "Parent and EA inode have the same ino %lu", ea_ino);
+               return -EFSCORRUPTED;
+       }
+
        inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
@@ -1438,6 +1483,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
        uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
        int err;
 
+       if (inode->i_sb->s_root == NULL) {
+               ext4_warning(inode->i_sb,
+                            "refuse to create EA inode when umounting");
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+
        /*
         * Let the next inode be the goal, so we try and allocate the EA inode
         * in the same group, or nearby one.
@@ -2567,9 +2619,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
 
        is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
        bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
-       buffer = kvmalloc(value_size, GFP_NOFS);
        b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
-       if (!is || !bs || !buffer || !b_entry_name) {
+       if (!is || !bs || !b_entry_name) {
                error = -ENOMEM;
                goto out;
        }
@@ -2581,12 +2632,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
 
        /* Save the entry name and the entry value */
        if (entry->e_value_inum) {
+               buffer = kvmalloc(value_size, GFP_NOFS);
+               if (!buffer) {
+                       error = -ENOMEM;
+                       goto out;
+               }
+
                error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
                if (error)
                        goto out;
        } else {
                size_t value_offs = le16_to_cpu(entry->e_value_offs);
-               memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+               buffer = (void *)IFIRST(header) + value_offs;
        }
 
        memcpy(b_entry_name, entry->e_name, entry->e_name_len);
@@ -2601,25 +2658,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
        if (error)
                goto out;
 
-       /* Remove the chosen entry from the inode */
-       error = ext4_xattr_ibody_set(handle, inode, &i, is);
-       if (error)
-               goto out;
-
        i.value = buffer;
        i.value_len = value_size;
        error = ext4_xattr_block_find(inode, &i, bs);
        if (error)
                goto out;
 
-       /* Add entry which was removed from the inode into the block */
+       /* Move ea entry from the inode into the block */
        error = ext4_xattr_block_set(handle, inode, &i, bs);
        if (error)
                goto out;
-       error = 0;
+
+       /* Remove the chosen entry from the inode */
+       i.value = NULL;
+       i.value_len = 0;
+       error = ext4_xattr_ibody_set(handle, inode, &i, is);
+
 out:
        kfree(b_entry_name);
-       kvfree(buffer);
+       if (entry->e_value_inum && buffer)
+               kvfree(buffer);
        if (is)
                brelse(is->iloc.bh);
        if (bs)
index 5a5515d83a1b9187e265259629c49e6e05f1d65b..c3e058e0a01882357aaa575c02c9bb903bca2b93 100644 (file)
@@ -70,7 +70,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
                .old_blkaddr = index,
                .new_blkaddr = index,
                .encrypted_page = NULL,
-               .is_por = !is_meta,
+               .is_por = !is_meta ? 1 : 0,
        };
        int err;
 
@@ -171,10 +171,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type)
 {
-       if (time_to_inject(sbi, FAULT_BLKADDR)) {
-               f2fs_show_injection_info(sbi, FAULT_BLKADDR);
+       if (time_to_inject(sbi, FAULT_BLKADDR))
                return false;
-       }
 
        switch (type) {
        case META_NAT:
@@ -239,8 +237,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
                .op = REQ_OP_READ,
                .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
                .encrypted_page = NULL,
-               .in_list = false,
-               .is_por = (type == META_POR),
+               .in_list = 0,
+               .is_por = (type == META_POR) ? 1 : 0,
        };
        struct blk_plug plug;
        int err;
@@ -625,7 +623,6 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
 
        if (time_to_inject(sbi, FAULT_ORPHAN)) {
                spin_unlock(&im->ino_lock);
-               f2fs_show_injection_info(sbi, FAULT_ORPHAN);
                return -ENOSPC;
        }
 
@@ -798,7 +795,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
         */
        head = &im->ino_list;
 
-       /* loop for each orphan inode entry and write them in Jornal block */
+       /* loop for each orphan inode entry and write them in journal block */
        list_for_each_entry(orphan, head, list) {
                if (!page) {
                        page = f2fs_grab_meta_page(sbi, start_blk++);
@@ -1128,7 +1125,7 @@ retry:
        } else {
                /*
                 * We should submit bio, since it exists several
-                * wribacking dentry pages in the freeing inode.
+                * writebacking dentry pages in the freeing inode.
                 */
                f2fs_submit_merged_write(sbi, DATA);
                cond_resched();
@@ -1476,20 +1473,18 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
        ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
        for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
-               ckpt->cur_node_segno[i] =
-                       cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
-               ckpt->cur_node_blkoff[i] =
-                       cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
-               ckpt->alloc_type[i + CURSEG_HOT_NODE] =
-                               curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
+               struct curseg_info *curseg = CURSEG_I(sbi, i + CURSEG_HOT_NODE);
+
+               ckpt->cur_node_segno[i] = cpu_to_le32(curseg->segno);
+               ckpt->cur_node_blkoff[i] = cpu_to_le16(curseg->next_blkoff);
+               ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg->alloc_type;
        }
        for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
-               ckpt->cur_data_segno[i] =
-                       cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
-               ckpt->cur_data_blkoff[i] =
-                       cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
-               ckpt->alloc_type[i + CURSEG_HOT_DATA] =
-                               curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
+               struct curseg_info *curseg = CURSEG_I(sbi, i + CURSEG_HOT_DATA);
+
+               ckpt->cur_data_segno[i] = cpu_to_le32(curseg->segno);
+               ckpt->cur_data_blkoff[i] = cpu_to_le16(curseg->next_blkoff);
+               ckpt->alloc_type[i + CURSEG_HOT_DATA] = curseg->alloc_type;
        }
 
        /* 2 cp + n data seg summary + orphan inode blocks */
index 2532f369cb10f532f2063605654edc1b57963c07..b40dec3d7f7992de985035a22086d693684a001f 100644 (file)
@@ -241,7 +241,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
        unsigned int size = LZ4_MEM_COMPRESS;
 
 #ifdef CONFIG_F2FS_FS_LZ4HC
-       if (F2FS_I(cc->inode)->i_compress_flag >> COMPRESS_LEVEL_OFFSET)
+       if (F2FS_I(cc->inode)->i_compress_level)
                size = LZ4HC_MEM_COMPRESS;
 #endif
 
@@ -267,8 +267,7 @@ static void lz4_destroy_compress_ctx(struct compress_ctx *cc)
 #ifdef CONFIG_F2FS_FS_LZ4HC
 static int lz4hc_compress_pages(struct compress_ctx *cc)
 {
-       unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
-                                               COMPRESS_LEVEL_OFFSET;
+       unsigned char level = F2FS_I(cc->inode)->i_compress_level;
        int len;
 
        if (level)
@@ -340,8 +339,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
        zstd_cstream *stream;
        void *workspace;
        unsigned int workspace_size;
-       unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
-                                               COMPRESS_LEVEL_OFFSET;
+       unsigned char level = F2FS_I(cc->inode)->i_compress_level;
 
        if (!level)
                level = F2FS_ZSTD_DEFAULT_CLEVEL;
@@ -564,7 +562,7 @@ module_param(num_compress_pages, uint, 0444);
 MODULE_PARM_DESC(num_compress_pages,
                "Number of intermediate compress pages to preallocate");
 
-int f2fs_init_compress_mempool(void)
+int __init f2fs_init_compress_mempool(void)
 {
        compress_page_pool = mempool_create_page_pool(num_compress_pages, 0);
        return compress_page_pool ? 0 : -ENOMEM;
@@ -690,9 +688,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
        vm_unmap_ram(cc->cbuf, cc->nr_cpages);
        vm_unmap_ram(cc->rbuf, cc->cluster_size);
 
-       for (i = 0; i < cc->nr_cpages; i++) {
-               if (i < new_nr_cpages)
-                       continue;
+       for (i = new_nr_cpages; i < cc->nr_cpages; i++) {
                f2fs_compress_free_page(cc->cpages[i]);
                cc->cpages[i] = NULL;
        }
@@ -1070,7 +1066,7 @@ retry:
                if (ret)
                        goto out;
                if (bio)
-                       f2fs_submit_bio(sbi, bio, DATA);
+                       f2fs_submit_read_bio(sbi, bio, DATA);
 
                ret = f2fs_init_compress_ctx(cc);
                if (ret)
@@ -1215,10 +1211,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
                .page = NULL,
                .encrypted_page = NULL,
                .compressed_page = NULL,
-               .submitted = false,
+               .submitted = 0,
                .io_type = io_type,
                .io_wbc = wbc,
-               .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode),
+               .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ?
+                                                                       1 : 0,
        };
        struct dnode_of_data dn;
        struct node_info ni;
@@ -1228,7 +1225,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
        loff_t psize;
        int i, err;
 
-       /* we should bypass data pages to proceed the kworkder jobs */
+       /* we should bypass data pages to proceed the kworker jobs */
        if (unlikely(f2fs_cp_error(sbi))) {
                mapping_set_error(cc->rpages[0]->mapping, -EIO);
                goto out_free;
@@ -1813,6 +1810,7 @@ unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
 const struct address_space_operations f2fs_compress_aops = {
        .release_folio = f2fs_release_folio,
        .invalidate_folio = f2fs_invalidate_folio,
+       .migrate_folio  = filemap_migrate_folio,
 };
 
 struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi)
index 41addc605350f72b897ceb08fe4544795dc783dc..06b552a0aba23cd9b8ce6fcf205ed16e9c5a1272 100644 (file)
@@ -292,13 +292,11 @@ static void f2fs_read_end_io(struct bio *bio)
        struct bio_post_read_ctx *ctx;
        bool intask = in_task();
 
-       iostat_update_and_unbind_ctx(bio, 0);
+       iostat_update_and_unbind_ctx(bio);
        ctx = bio->bi_private;
 
-       if (time_to_inject(sbi, FAULT_READ_IO)) {
-               f2fs_show_injection_info(sbi, FAULT_READ_IO);
+       if (time_to_inject(sbi, FAULT_READ_IO))
                bio->bi_status = BLK_STS_IOERR;
-       }
 
        if (bio->bi_status) {
                f2fs_finish_read_bio(bio, intask);
@@ -332,13 +330,11 @@ static void f2fs_write_end_io(struct bio *bio)
        struct bio_vec *bvec;
        struct bvec_iter_all iter_all;
 
-       iostat_update_and_unbind_ctx(bio, 1);
+       iostat_update_and_unbind_ctx(bio);
        sbi = bio->bi_private;
 
-       if (time_to_inject(sbi, FAULT_WRITE_IO)) {
-               f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
+       if (time_to_inject(sbi, FAULT_WRITE_IO))
                bio->bi_status = BLK_STS_IOERR;
-       }
 
        bio_for_each_segment_all(bvec, bio, iter_all) {
                struct page *page = bvec->bv_page;
@@ -507,65 +503,66 @@ static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
        return fscrypt_mergeable_bio(bio, inode, next_idx);
 }
 
-static inline void __submit_bio(struct f2fs_sb_info *sbi,
-                               struct bio *bio, enum page_type type)
+void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
+                                enum page_type type)
 {
-       if (!is_read_io(bio_op(bio))) {
-               unsigned int start;
+       WARN_ON_ONCE(!is_read_io(bio_op(bio)));
+       trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 
-               if (type != DATA && type != NODE)
-                       goto submit_io;
+       iostat_update_submit_ctx(bio, type);
+       submit_bio(bio);
+}
 
-               if (f2fs_lfs_mode(sbi) && current->plug)
-                       blk_finish_plug(current->plug);
+static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio)
+{
+       unsigned int start =
+               (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
 
-               if (!F2FS_IO_ALIGNED(sbi))
-                       goto submit_io;
+       if (start == 0)
+               return;
 
-               start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
-               start %= F2FS_IO_SIZE(sbi);
+       /* fill dummy pages */
+       for (; start < F2FS_IO_SIZE(sbi); start++) {
+               struct page *page =
+                       mempool_alloc(sbi->write_io_dummy,
+                                     GFP_NOIO | __GFP_NOFAIL);
+               f2fs_bug_on(sbi, !page);
 
-               if (start == 0)
-                       goto submit_io;
+               lock_page(page);
 
-               /* fill dummy pages */
-               for (; start < F2FS_IO_SIZE(sbi); start++) {
-                       struct page *page =
-                               mempool_alloc(sbi->write_io_dummy,
-                                             GFP_NOIO | __GFP_NOFAIL);
-                       f2fs_bug_on(sbi, !page);
+               zero_user_segment(page, 0, PAGE_SIZE);
+               set_page_private_dummy(page);
 
-                       lock_page(page);
+               if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+                       f2fs_bug_on(sbi, 1);
+       }
+}
 
-                       zero_user_segment(page, 0, PAGE_SIZE);
-                       set_page_private_dummy(page);
+static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
+                                 enum page_type type)
+{
+       WARN_ON_ONCE(is_read_io(bio_op(bio)));
+
+       if (type == DATA || type == NODE) {
+               if (f2fs_lfs_mode(sbi) && current->plug)
+                       blk_finish_plug(current->plug);
 
-                       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
-                               f2fs_bug_on(sbi, 1);
+               if (F2FS_IO_ALIGNED(sbi)) {
+                       f2fs_align_write_bio(sbi, bio);
+                       /*
+                        * In the NODE case, we lose next block address chain.
+                        * So, we need to do checkpoint in f2fs_sync_file.
+                        */
+                       if (type == NODE)
+                               set_sbi_flag(sbi, SBI_NEED_CP);
                }
-               /*
-                * In the NODE case, we lose next block address chain. So, we
-                * need to do checkpoint in f2fs_sync_file.
-                */
-               if (type == NODE)
-                       set_sbi_flag(sbi, SBI_NEED_CP);
        }
-submit_io:
-       if (is_read_io(bio_op(bio)))
-               trace_f2fs_submit_read_bio(sbi->sb, type, bio);
-       else
-               trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 
+       trace_f2fs_submit_write_bio(sbi->sb, type, bio);
        iostat_update_submit_ctx(bio, type);
        submit_bio(bio);
 }
 
-void f2fs_submit_bio(struct f2fs_sb_info *sbi,
-                               struct bio *bio, enum page_type type)
-{
-       __submit_bio(sbi, bio, type);
-}
-
 static void __submit_merged_bio(struct f2fs_bio_info *io)
 {
        struct f2fs_io_info *fio = &io->fio;
@@ -573,12 +570,13 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
        if (!io->bio)
                return;
 
-       if (is_read_io(fio->op))
+       if (is_read_io(fio->op)) {
                trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
-       else
+               f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
+       } else {
                trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
-
-       __submit_bio(io->sbi, io->bio, fio->type);
+               f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
+       }
        io->bio = NULL;
 }
 
@@ -655,6 +653,9 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 
        f2fs_down_write(&io->io_rwsem);
 
+       if (!io->bio)
+               goto unlock_out;
+
        /* change META to META_FLUSH in the checkpoint procedure */
        if (type >= META_FLUSH) {
                io->fio.type = META_FLUSH;
@@ -663,6 +664,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
                        io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
        }
        __submit_merged_bio(io);
+unlock_out:
        f2fs_up_write(&io->io_rwsem);
 }
 
@@ -741,12 +743,15 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
        }
 
        if (fio->io_wbc && !is_read_io(fio->op))
-               wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+               wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 
        inc_page_count(fio->sbi, is_read_io(fio->op) ?
                        __read_io_type(page) : WB_DATA_TYPE(fio->page));
 
-       __submit_bio(fio->sbi, bio, fio->type);
+       if (is_read_io(bio_op(bio)))
+               f2fs_submit_read_bio(fio->sbi, bio, fio->type);
+       else
+               f2fs_submit_write_bio(fio->sbi, bio, fio->type);
        return 0;
 }
 
@@ -848,7 +853,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 
                        /* page can't be merged into bio; submit the bio */
                        del_bio_entry(be);
-                       __submit_bio(sbi, *bio, DATA);
+                       f2fs_submit_write_bio(sbi, *bio, DATA);
                        break;
                }
                f2fs_up_write(&io->bio_list_lock);
@@ -911,7 +916,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
        }
 
        if (found)
-               __submit_bio(sbi, target, DATA);
+               f2fs_submit_write_bio(sbi, target, DATA);
        if (bio && *bio) {
                bio_put(*bio);
                *bio = NULL;
@@ -948,7 +953,7 @@ alloc_new:
        }
 
        if (fio->io_wbc)
-               wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+               wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 
        inc_page_count(fio->sbi, WB_DATA_TYPE(page));
 
@@ -991,7 +996,7 @@ next:
                bio_page = fio->page;
 
        /* set submitted = true as a return value */
-       fio->submitted = true;
+       fio->submitted = 1;
 
        inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
@@ -1007,7 +1012,7 @@ alloc_new:
                                (fio->type == DATA || fio->type == NODE) &&
                                fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
                        dec_page_count(sbi, WB_DATA_TYPE(bio_page));
-                       fio->retry = true;
+                       fio->retry = 1;
                        goto skip;
                }
                io->bio = __bio_alloc(fio, BIO_MAX_VECS);
@@ -1022,7 +1027,7 @@ alloc_new:
        }
 
        if (fio->io_wbc)
-               wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
+               wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 
        io->last_block_in_bio = fio->new_blkaddr;
 
@@ -1107,7 +1112,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
        }
        inc_page_count(sbi, F2FS_RD_DATA);
        f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
-       __submit_bio(sbi, bio, DATA);
+       f2fs_submit_read_bio(sbi, bio, DATA);
        return 0;
 }
 
@@ -1207,19 +1212,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
        return err;
 }
 
-int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
-{
-       struct extent_info ei = {0, };
-       struct inode *inode = dn->inode;
-
-       if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
-               dn->data_blkaddr = ei.blk + index - ei.fofs;
-               return 0;
-       }
-
-       return f2fs_reserve_block(dn, index);
-}
-
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                                     blk_opf_t op_flags, bool for_write,
                                     pgoff_t *next_pgofs)
@@ -1227,15 +1219,14 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
        struct address_space *mapping = inode->i_mapping;
        struct dnode_of_data dn;
        struct page *page;
-       struct extent_info ei = {0, };
        int err;
 
        page = f2fs_grab_cache_page(mapping, index, for_write);
        if (!page)
                return ERR_PTR(-ENOMEM);
 
-       if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
-               dn.data_blkaddr = ei.blk + index - ei.fofs;
+       if (f2fs_lookup_read_extent_cache_block(inode, index,
+                                               &dn.data_blkaddr)) {
                if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE_READ)) {
                        err = -EFSCORRUPTED;
@@ -1432,13 +1423,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
                return err;
 
        dn->data_blkaddr = f2fs_data_blkaddr(dn);
-       if (dn->data_blkaddr != NULL_ADDR)
-               goto alloc;
-
-       if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
-               return err;
+       if (dn->data_blkaddr == NULL_ADDR) {
+               err = inc_valid_block_count(sbi, dn->inode, &count);
+               if (unlikely(err))
+                       return err;
+       }
 
-alloc:
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
        old_blkaddr = dn->data_blkaddr;
        f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
@@ -1452,19 +1442,91 @@ alloc:
        return 0;
 }
 
-void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
 {
-       if (flag == F2FS_GET_BLOCK_PRE_AIO) {
-               if (lock)
-                       f2fs_down_read(&sbi->node_change);
-               else
-                       f2fs_up_read(&sbi->node_change);
+       if (flag == F2FS_GET_BLOCK_PRE_AIO)
+               f2fs_down_read(&sbi->node_change);
+       else
+               f2fs_lock_op(sbi);
+}
+
+static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
+{
+       if (flag == F2FS_GET_BLOCK_PRE_AIO)
+               f2fs_up_read(&sbi->node_change);
+       else
+               f2fs_unlock_op(sbi);
+}
+
+int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+       int err = 0;
+
+       f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
+       if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
+                                               &dn->data_blkaddr))
+               err = f2fs_reserve_block(dn, index);
+       f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
+
+       return err;
+}
+
+static int f2fs_map_no_dnode(struct inode *inode,
+               struct f2fs_map_blocks *map, struct dnode_of_data *dn,
+               pgoff_t pgoff)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+       /*
+        * There is one exceptional case that read_node_page() may return
+        * -ENOENT due to filesystem has been shutdown or cp_error, return
+        * -EIO in that case.
+        */
+       if (map->m_may_create &&
+           (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
+               return -EIO;
+
+       if (map->m_next_pgofs)
+               *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
+       if (map->m_next_extent)
+               *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
+       return 0;
+}
+
+static bool f2fs_map_blocks_cached(struct inode *inode,
+               struct f2fs_map_blocks *map, int flag)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       unsigned int maxblocks = map->m_len;
+       pgoff_t pgoff = (pgoff_t)map->m_lblk;
+       struct extent_info ei = {};
+
+       if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
+               return false;
+
+       map->m_pblk = ei.blk + pgoff - ei.fofs;
+       map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
+       map->m_flags = F2FS_MAP_MAPPED;
+       if (map->m_next_extent)
+               *map->m_next_extent = pgoff + map->m_len;
+
+       /* for hardware encryption, but to avoid potential issue in future */
+       if (flag == F2FS_GET_BLOCK_DIO)
+               f2fs_wait_on_block_writeback_range(inode,
+                                       map->m_pblk, map->m_len);
+
+       if (f2fs_allow_multi_device_dio(sbi, flag)) {
+               int bidx = f2fs_target_device_index(sbi, map->m_pblk);
+               struct f2fs_dev_info *dev = &sbi->devs[bidx];
+
+               map->m_bdev = dev->bdev;
+               map->m_pblk -= dev->start_blk;
+               map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
        } else {
-               if (lock)
-                       f2fs_lock_op(sbi);
-               else
-                       f2fs_unlock_op(sbi);
+               map->m_bdev = inode->i_sb->s_bdev;
        }
+       return true;
 }
 
 /*
@@ -1472,8 +1534,7 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
  * maps continuous logical blocks to physical blocks, and return such
  * info via f2fs_map_blocks structure.
  */
-int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
-                                               int create, int flag)
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
 {
        unsigned int maxblocks = map->m_len;
        struct dnode_of_data dn;
@@ -1483,14 +1544,17 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        int err = 0, ofs = 1;
        unsigned int ofs_in_node, last_ofs_in_node;
        blkcnt_t prealloc;
-       struct extent_info ei = {0, };
        block_t blkaddr;
        unsigned int start_pgofs;
        int bidx = 0;
+       bool is_hole;
 
        if (!maxblocks)
                return 0;
 
+       if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
+               goto out;
+
        map->m_bdev = inode->i_sb->s_bdev;
        map->m_multidev_dio =
                f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
@@ -1502,42 +1566,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        pgofs = (pgoff_t)map->m_lblk;
        end = pgofs + maxblocks;
 
-       if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) {
-               if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
-                                                       map->m_may_create)
-                       goto next_dnode;
-
-               map->m_pblk = ei.blk + pgofs - ei.fofs;
-               map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
-               map->m_flags = F2FS_MAP_MAPPED;
-               if (map->m_next_extent)
-                       *map->m_next_extent = pgofs + map->m_len;
-
-               /* for hardware encryption, but to avoid potential issue in future */
-               if (flag == F2FS_GET_BLOCK_DIO)
-                       f2fs_wait_on_block_writeback_range(inode,
-                                               map->m_pblk, map->m_len);
-
-               if (map->m_multidev_dio) {
-                       block_t blk_addr = map->m_pblk;
-
-                       bidx = f2fs_target_device_index(sbi, map->m_pblk);
-
-                       map->m_bdev = FDEV(bidx).bdev;
-                       map->m_pblk -= FDEV(bidx).start_blk;
-                       map->m_len = min(map->m_len,
-                               FDEV(bidx).end_blk + 1 - map->m_pblk);
-
-                       if (map->m_may_create)
-                               f2fs_update_device_state(sbi, inode->i_ino,
-                                                       blk_addr, map->m_len);
-               }
-               goto out;
-       }
-
 next_dnode:
        if (map->m_may_create)
-               f2fs_do_map_lock(sbi, flag, true);
+               f2fs_map_lock(sbi, flag);
 
        /* When reading holes, we need its node page */
        set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1545,29 +1576,8 @@ next_dnode:
        if (err) {
                if (flag == F2FS_GET_BLOCK_BMAP)
                        map->m_pblk = 0;
-
-               if (err == -ENOENT) {
-                       /*
-                        * There is one exceptional case that read_node_page()
-                        * may return -ENOENT due to filesystem has been
-                        * shutdown or cp_error, so force to convert error
-                        * number to EIO for such case.
-                        */
-                       if (map->m_may_create &&
-                               (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
-                               f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto unlock_out;
-                       }
-
-                       err = 0;
-                       if (map->m_next_pgofs)
-                               *map->m_next_pgofs =
-                                       f2fs_get_next_page_offset(&dn, pgofs);
-                       if (map->m_next_extent)
-                               *map->m_next_extent =
-                                       f2fs_get_next_page_offset(&dn, pgofs);
-               }
+               if (err == -ENOENT)
+                       err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
                goto unlock_out;
        }
 
@@ -1578,78 +1588,76 @@ next_dnode:
 
 next_block:
        blkaddr = f2fs_data_blkaddr(&dn);
-
-       if (__is_valid_data_blkaddr(blkaddr) &&
-               !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
+       is_hole = !__is_valid_data_blkaddr(blkaddr);
+       if (!is_hole &&
+           !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
                err = -EFSCORRUPTED;
                f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                goto sync_out;
        }
 
-       if (__is_valid_data_blkaddr(blkaddr)) {
-               /* use out-place-update for driect IO under LFS mode */
-               if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
-                                                       map->m_may_create) {
+       /* use out-place-update for direct IO under LFS mode */
+       if (map->m_may_create &&
+           (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
+               if (unlikely(f2fs_cp_error(sbi))) {
+                       err = -EIO;
+                       goto sync_out;
+               }
+
+               switch (flag) {
+               case F2FS_GET_BLOCK_PRE_AIO:
+                       if (blkaddr == NULL_ADDR) {
+                               prealloc++;
+                               last_ofs_in_node = dn.ofs_in_node;
+                       }
+                       break;
+               case F2FS_GET_BLOCK_PRE_DIO:
+               case F2FS_GET_BLOCK_DIO:
                        err = __allocate_data_block(&dn, map->m_seg_type);
                        if (err)
                                goto sync_out;
-                       blkaddr = dn.data_blkaddr;
+                       if (flag == F2FS_GET_BLOCK_PRE_DIO)
+                               file_need_truncate(inode);
                        set_inode_flag(inode, FI_APPEND_WRITE);
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       err = -EIO;
+                       goto sync_out;
                }
-       } else {
-               if (create) {
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto sync_out;
-                       }
-                       if (flag == F2FS_GET_BLOCK_PRE_AIO) {
-                               if (blkaddr == NULL_ADDR) {
-                                       prealloc++;
-                                       last_ofs_in_node = dn.ofs_in_node;
-                               }
-                       } else {
-                               WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
-                                       flag != F2FS_GET_BLOCK_DIO);
-                               err = __allocate_data_block(&dn,
-                                                       map->m_seg_type);
-                               if (!err) {
-                                       if (flag == F2FS_GET_BLOCK_PRE_DIO)
-                                               file_need_truncate(inode);
-                                       set_inode_flag(inode, FI_APPEND_WRITE);
-                               }
-                       }
-                       if (err)
-                               goto sync_out;
+
+               blkaddr = dn.data_blkaddr;
+               if (is_hole)
                        map->m_flags |= F2FS_MAP_NEW;
-                       blkaddr = dn.data_blkaddr;
-               } else {
-                       if (f2fs_compressed_file(inode) &&
-                                       f2fs_sanity_check_cluster(&dn) &&
-                                       (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                       IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
-                               err = -EFSCORRUPTED;
-                               f2fs_handle_error(sbi,
-                                               ERROR_CORRUPTED_CLUSTER);
-                               goto sync_out;
-                       }
-                       if (flag == F2FS_GET_BLOCK_BMAP) {
-                               map->m_pblk = 0;
-                               goto sync_out;
-                       }
-                       if (flag == F2FS_GET_BLOCK_PRECACHE)
-                               goto sync_out;
-                       if (flag == F2FS_GET_BLOCK_FIEMAP &&
-                                               blkaddr == NULL_ADDR) {
-                               if (map->m_next_pgofs)
-                                       *map->m_next_pgofs = pgofs + 1;
-                               goto sync_out;
-                       }
-                       if (flag != F2FS_GET_BLOCK_FIEMAP) {
-                               /* for defragment case */
+       } else if (is_hole) {
+               if (f2fs_compressed_file(inode) &&
+                   f2fs_sanity_check_cluster(&dn) &&
+                   (flag != F2FS_GET_BLOCK_FIEMAP ||
+                    IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
+                       err = -EFSCORRUPTED;
+                       f2fs_handle_error(sbi,
+                                       ERROR_CORRUPTED_CLUSTER);
+                       goto sync_out;
+               }
+
+               switch (flag) {
+               case F2FS_GET_BLOCK_PRECACHE:
+                       goto sync_out;
+               case F2FS_GET_BLOCK_BMAP:
+                       map->m_pblk = 0;
+                       goto sync_out;
+               case F2FS_GET_BLOCK_FIEMAP:
+                       if (blkaddr == NULL_ADDR) {
                                if (map->m_next_pgofs)
                                        *map->m_next_pgofs = pgofs + 1;
                                goto sync_out;
                        }
+                       break;
+               default:
+                       /* for defragment case */
+                       if (map->m_next_pgofs)
+                               *map->m_next_pgofs = pgofs + 1;
+                       goto sync_out;
                }
        }
 
@@ -1660,9 +1668,9 @@ next_block:
                bidx = f2fs_target_device_index(sbi, blkaddr);
 
        if (map->m_len == 0) {
-               /* preallocated unwritten block should be mapped for fiemap. */
+               /* reserved delalloc block should be mapped for fiemap. */
                if (blkaddr == NEW_ADDR)
-                       map->m_flags |= F2FS_MAP_UNWRITTEN;
+                       map->m_flags |= F2FS_MAP_DELALLOC;
                map->m_flags |= F2FS_MAP_MAPPED;
 
                map->m_pblk = blkaddr;
@@ -1721,7 +1729,7 @@ skip:
        f2fs_put_dnode(&dn);
 
        if (map->m_may_create) {
-               f2fs_do_map_lock(sbi, flag, false);
+               f2fs_map_unlock(sbi, flag);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
        goto next_dnode;
@@ -1767,11 +1775,11 @@ sync_out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (map->m_may_create) {
-               f2fs_do_map_lock(sbi, flag, false);
+               f2fs_map_unlock(sbi, flag);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
 out:
-       trace_f2fs_map_blocks(inode, map, create, flag, err);
+       trace_f2fs_map_blocks(inode, map, flag, err);
        return err;
 }
 
@@ -1793,7 +1801,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
 
        while (map.m_lblk < last_lblk) {
                map.m_len = last_lblk - map.m_lblk;
-               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
                if (err || map.m_len == 0)
                        return false;
                map.m_lblk += map.m_len;
@@ -1967,7 +1975,7 @@ next:
                map.m_len = cluster_size - count_in_cluster;
        }
 
-       ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
+       ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
        if (ret)
                goto out;
 
@@ -1984,7 +1992,7 @@ next:
 
        compr_appended = false;
        /* In a case of compressed cluster, append this to the last extent */
-       if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
+       if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
                        !(map.m_flags & F2FS_MAP_FLAGS))) {
                compr_appended = true;
                goto skip_fill;
@@ -2030,7 +2038,7 @@ skip_fill:
                                compr_cluster = false;
                                size += blks_to_bytes(inode, 1);
                        }
-               } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+               } else if (map.m_flags & F2FS_MAP_DELALLOC) {
                        flags = FIEMAP_EXTENT_UNWRITTEN;
                }
 
@@ -2099,7 +2107,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
        map->m_lblk = block_in_file;
        map->m_len = last_block - block_in_file;
 
-       ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
+       ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
        if (ret)
                goto out;
 got_it:
@@ -2136,7 +2144,7 @@ zero_out:
                                       *last_block_in_bio, block_nr) ||
                    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
 submit_and_realloc:
-               __submit_bio(F2FS_I_SB(inode), bio, DATA);
+               f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
                bio = NULL;
        }
        if (bio == NULL) {
@@ -2283,7 +2291,7 @@ skip_reading_dnode:
                                        *last_block_in_bio, blkaddr) ||
                    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
 submit_and_realloc:
-                       __submit_bio(sbi, bio, DATA);
+                       f2fs_submit_read_bio(sbi, bio, DATA);
                        bio = NULL;
                }
 
@@ -2377,7 +2385,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
                if (f2fs_compressed_file(inode)) {
-                       /* there are remained comressed pages, submit them */
+                       /* there are remained compressed pages, submit them */
                        if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
                                ret = f2fs_read_multi_pages(&cc, &bio,
                                                        max_nr_pages,
@@ -2444,7 +2452,7 @@ next_page:
 #endif
        }
        if (bio)
-               __submit_bio(F2FS_I_SB(inode), bio, DATA);
+               f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
        return ret;
 }
 
@@ -2530,34 +2538,29 @@ static inline bool check_inplace_update_policy(struct inode *inode,
                                struct f2fs_io_info *fio)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       unsigned int policy = SM_I(sbi)->ipu_policy;
 
-       if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
-                       is_inode_flag_set(inode, FI_OPU_WRITE))
+       if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
+           is_inode_flag_set(inode, FI_OPU_WRITE))
                return false;
-       if (policy & (0x1 << F2FS_IPU_FORCE))
+       if (IS_F2FS_IPU_FORCE(sbi))
                return true;
-       if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
+       if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
                return true;
-       if (policy & (0x1 << F2FS_IPU_UTIL) &&
-                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+       if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
                return true;
-       if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
-                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+       if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
+           utilization(sbi) > SM_I(sbi)->min_ipu_util)
                return true;
 
        /*
         * IPU for rewrite async pages
         */
-       if (policy & (0x1 << F2FS_IPU_ASYNC) &&
-                       fio && fio->op == REQ_OP_WRITE &&
-                       !(fio->op_flags & REQ_SYNC) &&
-                       !IS_ENCRYPTED(inode))
+       if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
+           !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
                return true;
 
        /* this is only set during fdatasync */
-       if (policy & (0x1 << F2FS_IPU_FSYNC) &&
-                       is_inode_flag_set(inode, FI_NEED_IPU))
+       if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
                return true;
 
        if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
@@ -2635,7 +2638,6 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
        struct page *page = fio->page;
        struct inode *inode = page->mapping->host;
        struct dnode_of_data dn;
-       struct extent_info ei = {0, };
        struct node_info ni;
        bool ipu_force = false;
        int err = 0;
@@ -2647,9 +2649,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
                set_new_dnode(&dn, inode, NULL, NULL, 0);
 
        if (need_inplace_update(fio) &&
-           f2fs_lookup_read_extent_cache(inode, page->index, &ei)) {
-               fio->old_blkaddr = ei.blk + page->index - ei.fofs;
-
+           f2fs_lookup_read_extent_cache_block(inode, page->index,
+                                               &fio->old_blkaddr)) {
                if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
                                                DATA_GENERIC_ENHANCE)) {
                        f2fs_handle_error(fio->sbi,
@@ -2699,7 +2700,6 @@ got_it:
                        goto out_writepage;
 
                set_page_writeback(page);
-               ClearPageError(page);
                f2fs_put_dnode(&dn);
                if (fio->need_lock == LOCK_REQ)
                        f2fs_unlock_op(fio->sbi);
@@ -2735,7 +2735,6 @@ got_it:
                goto out_writepage;
 
        set_page_writeback(page);
-       ClearPageError(page);
 
        if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
                f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
@@ -2780,10 +2779,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
                .old_blkaddr = NULL_ADDR,
                .page = page,
                .encrypted_page = NULL,
-               .submitted = false,
+               .submitted = 0,
                .compr_blocks = compr_blocks,
                .need_lock = LOCK_RETRY,
-               .post_read = f2fs_post_read_required(inode),
+               .post_read = f2fs_post_read_required(inode) ? 1 : 0,
                .io_type = io_type,
                .io_wbc = wbc,
                .bio = bio,
@@ -2792,7 +2791,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
 
        trace_f2fs_writepage(page, DATA);
 
-       /* we should bypass data pages to proceed the kworkder jobs */
+       /* we should bypass data pages to proceed the kworker jobs */
        if (unlikely(f2fs_cp_error(sbi))) {
                mapping_set_error(page->mapping, -EIO);
                /*
@@ -2904,14 +2903,14 @@ out:
        }
 
        if (submitted)
-               *submitted = fio.submitted ? 1 : 0;
+               *submitted = fio.submitted;
 
        return 0;
 
 redirty_out:
        redirty_page_for_writepage(wbc, page);
        /*
-        * pageout() in MM traslates EAGAIN, so calls handle_write_error()
+        * pageout() in MM translates EAGAIN, so calls handle_write_error()
         * -> mapping_set_error() -> set_bit(AS_EIO, ...).
         * file_write_and_wait_range() will see EIO error, which is critical
         * to return value of fsync() followed by atomic_write failure to user.
@@ -2945,7 +2944,7 @@ out:
 }
 
 /*
- * This function was copied from write_cche_pages from mm/page-writeback.c.
+ * This function was copied from write_cache_pages from mm/page-writeback.c.
  * The major change is making write step of cold data page separately from
  * warm/hot data page.
  */
@@ -3354,9 +3353,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
        struct dnode_of_data dn;
        struct page *ipage;
        bool locked = false;
-       struct extent_info ei = {0, };
+       int flag = F2FS_GET_BLOCK_PRE_AIO;
        int err = 0;
-       int flag;
 
        /*
         * If a whole page is being written and we already preallocated all the
@@ -3366,14 +3364,13 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
                return 0;
 
        /* f2fs_lock_op avoids race between write CP and convert_inline_page */
-       if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
-               flag = F2FS_GET_BLOCK_DEFAULT;
-       else
-               flag = F2FS_GET_BLOCK_PRE_AIO;
-
-       if (f2fs_has_inline_data(inode) ||
-                       (pos & PAGE_MASK) >= i_size_read(inode)) {
-               f2fs_do_map_lock(sbi, flag, true);
+       if (f2fs_has_inline_data(inode)) {
+               if (pos + len > MAX_INLINE_DATA(inode))
+                       flag = F2FS_GET_BLOCK_DEFAULT;
+               f2fs_map_lock(sbi, flag);
+               locked = true;
+       } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+               f2fs_map_lock(sbi, flag);
                locked = true;
        }
 
@@ -3393,40 +3390,40 @@ restart:
                        set_inode_flag(inode, FI_DATA_EXIST);
                        if (inode->i_nlink)
                                set_page_private_inline(ipage);
-               } else {
-                       err = f2fs_convert_inline_page(&dn, page);
-                       if (err)
-                               goto out;
-                       if (dn.data_blkaddr == NULL_ADDR)
-                               err = f2fs_get_block(&dn, index);
-               }
-       } else if (locked) {
-               err = f2fs_get_block(&dn, index);
-       } else {
-               if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
-                       dn.data_blkaddr = ei.blk + index - ei.fofs;
-               } else {
-                       /* hole case */
-                       err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
-                       if (err || dn.data_blkaddr == NULL_ADDR) {
-                               f2fs_put_dnode(&dn);
-                               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
-                                                               true);
-                               WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
-                               locked = true;
-                               goto restart;
-                       }
+                       goto out;
                }
+               err = f2fs_convert_inline_page(&dn, page);
+               if (err || dn.data_blkaddr != NULL_ADDR)
+                       goto out;
        }
 
-       /* convert_inline_page can make node_changed */
-       *blk_addr = dn.data_blkaddr;
-       *node_changed = dn.node_changed;
+       if (!f2fs_lookup_read_extent_cache_block(inode, index,
+                                                &dn.data_blkaddr)) {
+               if (locked) {
+                       err = f2fs_reserve_block(&dn, index);
+                       goto out;
+               }
+
+               /* hole case */
+               err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+               if (!err && dn.data_blkaddr != NULL_ADDR)
+                       goto out;
+               f2fs_put_dnode(&dn);
+               f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
+               WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
+               locked = true;
+               goto restart;
+       }
 out:
+       if (!err) {
+               /* convert_inline_page can make node_changed */
+               *blk_addr = dn.data_blkaddr;
+               *node_changed = dn.node_changed;
+       }
        f2fs_put_dnode(&dn);
 unlock_out:
        if (locked)
-               f2fs_do_map_lock(sbi, flag, false);
+               f2fs_map_unlock(sbi, flag);
        return err;
 }
 
@@ -3435,7 +3432,6 @@ static int __find_data_block(struct inode *inode, pgoff_t index,
 {
        struct dnode_of_data dn;
        struct page *ipage;
-       struct extent_info ei = {0, };
        int err = 0;
 
        ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
@@ -3444,9 +3440,8 @@ static int __find_data_block(struct inode *inode, pgoff_t index,
 
        set_new_dnode(&dn, inode, ipage, ipage, 0);
 
-       if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
-               dn.data_blkaddr = ei.blk + index - ei.fofs;
-       } else {
+       if (!f2fs_lookup_read_extent_cache_block(inode, index,
+                                                &dn.data_blkaddr)) {
                /* hole case */
                err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
                if (err) {
@@ -3467,7 +3462,7 @@ static int __reserve_data_block(struct inode *inode, pgoff_t index,
        struct page *ipage;
        int err = 0;
 
-       f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+       f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
 
        ipage = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(ipage)) {
@@ -3476,14 +3471,16 @@ static int __reserve_data_block(struct inode *inode, pgoff_t index,
        }
        set_new_dnode(&dn, inode, ipage, ipage, 0);
 
-       err = f2fs_get_block(&dn, index);
+       if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
+                                               &dn.data_blkaddr))
+               err = f2fs_reserve_block(&dn, index);
 
        *blk_addr = dn.data_blkaddr;
        *node_changed = dn.node_changed;
        f2fs_put_dnode(&dn);
 
 unlock_out:
-       f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+       f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
        return err;
 }
 
@@ -3729,6 +3726,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
                }
        }
 
+       clear_page_private_reference(&folio->page);
        clear_page_private_gcing(&folio->page);
 
        if (test_opt(sbi, COMPRESS_CACHE) &&
@@ -3754,6 +3752,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait)
                        clear_page_private_data(&folio->page);
        }
 
+       clear_page_private_reference(&folio->page);
        clear_page_private_gcing(&folio->page);
 
        folio_detach_private(folio);
@@ -3835,7 +3834,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
                map.m_next_pgofs = NULL;
                map.m_seg_type = NO_CHECK_TYPE;
 
-               if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
+               if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
                        blknr = map.m_pblk;
        }
 out:
@@ -3943,7 +3942,7 @@ retry:
                map.m_seg_type = NO_CHECK_TYPE;
                map.m_may_create = false;
 
-               ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
+               ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
                if (ret)
                        goto out;
 
@@ -4168,8 +4167,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
        if (flags & IOMAP_WRITE)
                map.m_may_create = true;
 
-       err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
-                             F2FS_GET_BLOCK_DIO);
+       err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
        if (err)
                return err;
 
@@ -4182,20 +4180,24 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
         */
        map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
 
-       if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
-               iomap->length = blks_to_bytes(inode, map.m_len);
-               if (map.m_flags & F2FS_MAP_MAPPED) {
-                       iomap->type = IOMAP_MAPPED;
-                       iomap->flags |= IOMAP_F_MERGED;
-               } else {
-                       iomap->type = IOMAP_UNWRITTEN;
-               }
-               if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
-                       return -EINVAL;
+       /*
+        * We should never see delalloc or compressed extents here based on
+        * prior flushing and checks.
+        */
+       if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
+               return -EINVAL;
+       if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
+               return -EINVAL;
 
+       if (map.m_pblk != NULL_ADDR) {
+               iomap->length = blks_to_bytes(inode, map.m_len);
+               iomap->type = IOMAP_MAPPED;
+               iomap->flags |= IOMAP_F_MERGED;
                iomap->bdev = map.m_bdev;
                iomap->addr = blks_to_bytes(inode, map.m_pblk);
        } else {
+               if (flags & IOMAP_WRITE)
+                       return -ENOTBLK;
                iomap->length = blks_to_bytes(inode, next_pgofs) -
                                iomap->offset;
                iomap->type = IOMAP_HOLE;
index 32af4f0c573572a5be6cf8f00a25228b08fad068..30a77936e3c594a431c261ef5c9a76cd923bd8f7 100644 (file)
@@ -354,6 +354,17 @@ static char *s_flag[] = {
        [SBI_IS_FREEZING]       = " freezefs",
 };
 
+static const char *ipu_mode_names[F2FS_IPU_MAX] = {
+       [F2FS_IPU_FORCE]        = "FORCE",
+       [F2FS_IPU_SSR]          = "SSR",
+       [F2FS_IPU_UTIL]         = "UTIL",
+       [F2FS_IPU_SSR_UTIL]     = "SSR_UTIL",
+       [F2FS_IPU_FSYNC]        = "FSYNC",
+       [F2FS_IPU_ASYNC]        = "ASYNC",
+       [F2FS_IPU_NOCACHE]      = "NOCACHE",
+       [F2FS_IPU_HONOR_OPU_WRITE]      = "HONOR_OPU_WRITE",
+};
+
 static int stat_show(struct seq_file *s, void *v)
 {
        struct f2fs_stat_info *si;
@@ -362,16 +373,18 @@ static int stat_show(struct seq_file *s, void *v)
 
        raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
        list_for_each_entry(si, &f2fs_stat_list, stat_list) {
-               update_general_status(si->sbi);
+               struct f2fs_sb_info *sbi = si->sbi;
+
+               update_general_status(sbi);
 
                seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
-                       si->sbi->sb->s_bdev, i++,
-                       f2fs_readonly(si->sbi->sb) ? "RO" : "RW",
-                       is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
-                       "Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good"));
-               if (si->sbi->s_flag) {
+                       sbi->sb->s_bdev, i++,
+                       f2fs_readonly(sbi->sb) ? "RO" : "RW",
+                       is_set_ckpt_flags(sbi, CP_DISABLED_FLAG) ?
+                       "Disabled" : (f2fs_cp_error(sbi) ? "Error" : "Good"));
+               if (sbi->s_flag) {
                        seq_puts(s, "[SBI:");
-                       for_each_set_bit(j, &si->sbi->s_flag, 32)
+                       for_each_set_bit(j, &sbi->s_flag, 32)
                                seq_puts(s, s_flag[j]);
                        seq_puts(s, "]\n");
                }
@@ -383,8 +396,21 @@ static int stat_show(struct seq_file *s, void *v)
                           si->overp_segs, si->rsvd_segs);
                seq_printf(s, "Current Time Sec: %llu / Mounted Time Sec: %llu\n\n",
                                        ktime_get_boottime_seconds(),
-                                       SIT_I(si->sbi)->mounted_time);
-               if (test_opt(si->sbi, DISCARD))
+                                       SIT_I(sbi)->mounted_time);
+
+               seq_puts(s, "Policy:\n");
+               seq_puts(s, "  - IPU: [");
+               if (IS_F2FS_IPU_DISABLE(sbi)) {
+                       seq_puts(s, " DISABLE");
+               } else {
+                       unsigned long policy = SM_I(sbi)->ipu_policy;
+
+                       for_each_set_bit(j, &policy, F2FS_IPU_MAX)
+                               seq_printf(s, " %s", ipu_mode_names[j]);
+               }
+               seq_puts(s, " ]\n\n");
+
+               if (test_opt(sbi, DISCARD))
                        seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n",
                                si->utilization, si->valid_count, si->discard_blks);
                else
@@ -491,15 +517,15 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "  - node segments : %d (%d)\n",
                                si->node_segs, si->bg_node_segs);
                seq_puts(s, "  - Reclaimed segs :\n");
-               seq_printf(s, "    - Normal : %d\n", si->sbi->gc_reclaimed_segs[GC_NORMAL]);
-               seq_printf(s, "    - Idle CB : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_CB]);
+               seq_printf(s, "    - Normal : %d\n", sbi->gc_reclaimed_segs[GC_NORMAL]);
+               seq_printf(s, "    - Idle CB : %d\n", sbi->gc_reclaimed_segs[GC_IDLE_CB]);
                seq_printf(s, "    - Idle Greedy : %d\n",
-                               si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]);
-               seq_printf(s, "    - Idle AT : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_AT]);
+                               sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]);
+               seq_printf(s, "    - Idle AT : %d\n", sbi->gc_reclaimed_segs[GC_IDLE_AT]);
                seq_printf(s, "    - Urgent High : %d\n",
-                               si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH]);
-               seq_printf(s, "    - Urgent Mid : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_MID]);
-               seq_printf(s, "    - Urgent Low : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
+                               sbi->gc_reclaimed_segs[GC_URGENT_HIGH]);
+               seq_printf(s, "    - Urgent Mid : %d\n", sbi->gc_reclaimed_segs[GC_URGENT_MID]);
+               seq_printf(s, "    - Urgent Low : %d\n", sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
                seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
                                si->bg_data_blks + si->bg_node_blks);
                seq_printf(s, "  - data blocks : %d (%d)\n", si->data_blks,
@@ -565,7 +591,7 @@ static int stat_show(struct seq_file *s, void *v)
                           si->ndirty_imeta);
                seq_printf(s, "  - fsync mark: %4lld\n",
                           percpu_counter_sum_positive(
-                                       &si->sbi->rf_node_block_count));
+                                       &sbi->rf_node_block_count));
                seq_printf(s, "  - NATs: %9d/%9d\n  - SITs: %9d/%9d\n",
                           si->dirty_nats, si->nats, si->dirty_sits, si->sits);
                seq_printf(s, "  - free_nids: %9d/%9d\n  - alloc_nids: %9d\n",
@@ -592,12 +618,12 @@ static int stat_show(struct seq_file *s, void *v)
                           si->block_count[LFS], si->segment_count[LFS]);
 
                /* segment usage info */
-               f2fs_update_sit_info(si->sbi);
+               f2fs_update_sit_info(sbi);
                seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
                           si->bimodal, si->avg_vblocks);
 
                /* memory footprint */
-               update_mem_info(si->sbi);
+               update_mem_info(sbi);
                seq_printf(s, "\nMemory: %llu KB\n",
                        (si->base_mem + si->cache_mem + si->page_mem) >> 10);
                seq_printf(s, "  - static: %llu KB\n",
index 8e025157f35c9550885b5d9c7eb8506871f560bb..9ccdbe120425e8c5b212c66cba920acb1f737739 100644 (file)
@@ -732,10 +732,8 @@ int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
        }
 
 start:
-       if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
-               f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH);
+       if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
                return -ENOSPC;
-       }
 
        if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
                return -ENOSPC;
index 342af24b2f8cf7d8237ff05b0db6cd92e51f5e8c..28b12553f2b34e08cc937c960339821e0f5230ef 100644 (file)
 #include "node.h"
 #include <trace/events/f2fs.h>
 
+bool sanity_check_extent_cache(struct inode *inode)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct extent_info *ei;
+
+       if (!fi->extent_tree[EX_READ])
+               return true;
+
+       ei = &fi->extent_tree[EX_READ]->largest;
+
+       if (ei->len &&
+               (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+                                       DATA_GENERIC_ENHANCE) ||
+               !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+                                       DATA_GENERIC_ENHANCE))) {
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
+               f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+                         __func__, inode->i_ino,
+                         ei->blk, ei->fofs, ei->len);
+               return false;
+       }
+       return true;
+}
+
 static void __set_extent_info(struct extent_info *ei,
                                unsigned int fofs, unsigned int len,
                                block_t blk, bool keep_clen,
@@ -233,7 +258,7 @@ struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
  * @prev_ex: extent before ofs
  * @next_ex: extent after ofs
  * @insert_p: insert point for new extent at ofs
- * in order to simpfy the insertion after.
+ * in order to simplify the insertion after.
  * tree must stay unchanged between lookup and insertion.
  */
 struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
@@ -718,7 +743,7 @@ static void __update_extent_tree_range(struct inode *inode,
        if (!en)
                en = next_en;
 
-       /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
+       /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
        while (en && en->ei.fofs < end) {
                unsigned int org_end;
                int parts = 0;  /* # of parts current extent split into */
@@ -871,14 +896,23 @@ unlock_out:
 }
 #endif
 
-static unsigned long long __calculate_block_age(unsigned long long new,
+static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
+                                               unsigned long long new,
                                                unsigned long long old)
 {
-       unsigned long long diff;
+       unsigned int rem_old, rem_new;
+       unsigned long long res;
+       unsigned int weight = sbi->last_age_weight;
+
+       res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
+               + div_u64_rem(old, 100, &rem_old) * weight;
 
-       diff = (new >= old) ? new - (new - old) : new + (old - new);
+       if (rem_new)
+               res += rem_new * (100 - weight) / 100;
+       if (rem_old)
+               res += rem_old * weight / 100;
 
-       return div_u64(diff * LAST_AGE_WEIGHT, 100);
+       return res;
 }
 
 /* This returns a new age and allocated blocks in ei */
@@ -910,7 +944,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
                        cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 
                if (tei.age)
-                       ei->age = __calculate_block_age(cur_age, tei.age);
+                       ei->age = __calculate_block_age(sbi, cur_age, tei.age);
                else
                        ei->age = cur_age;
                ei->last_blocks = cur_blocks;
@@ -1047,6 +1081,17 @@ bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
        return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
 }
 
+bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
+                               block_t *blkaddr)
+{
+       struct extent_info ei = {};
+
+       if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
+               return false;
+       *blkaddr = ei.blk + index - ei.fofs;
+       return true;
+}
+
 void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
 {
        return __update_extent_cache(dn, EX_READ);
@@ -1226,6 +1271,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
        atomic64_set(&sbi->allocated_data_blocks, 0);
        sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
        sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
+       sbi->last_age_weight = LAST_AGE_WEIGHT;
 }
 
 int __init f2fs_create_extent_cache(void)
index 9a3ffa39ad30ede1dff0ac55d1b95cdd61abab20..b0ab2062038a09ca22e7bc02ae6b1997d0af8b33 100644 (file)
@@ -402,7 +402,6 @@ struct discard_cmd_control {
        struct list_head wait_list;             /* store on-flushing entries */
        struct list_head fstrim_list;           /* in-flight discard from fstrim */
        wait_queue_head_t discard_wait_queue;   /* waiting queue for wake-up */
-       unsigned int discard_wake;              /* to wake up discard thread */
        struct mutex cmd_lock;
        unsigned int nr_discards;               /* # of discards in the list */
        unsigned int max_discards;              /* max. discards to be issued */
@@ -410,6 +409,7 @@ struct discard_cmd_control {
        unsigned int min_discard_issue_time;    /* min. interval between discard issue */
        unsigned int mid_discard_issue_time;    /* mid. interval between discard issue */
        unsigned int max_discard_issue_time;    /* max. interval between discard issue */
+       unsigned int discard_io_aware_gran; /* minimum discard granularity not be aware of I/O */
        unsigned int discard_urgent_util;       /* utilization which issue discard proactively */
        unsigned int discard_granularity;       /* discard granularity */
        unsigned int max_ordered_discard;       /* maximum discard granularity issued by lba order */
@@ -420,6 +420,7 @@ struct discard_cmd_control {
        atomic_t discard_cmd_cnt;               /* # of cached cmd count */
        struct rb_root_cached root;             /* root of discard rb-tree */
        bool rbtree_check;                      /* config for consistence check */
+       bool discard_wake;                      /* to wake up discard thread */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -692,15 +693,13 @@ struct extent_tree_info {
 };
 
 /*
- * This structure is taken from ext4_map_blocks.
- *
- * Note that, however, f2fs uses NEW and MAPPED flags for f2fs_map_blocks().
+ * State of block returned by f2fs_map_blocks.
  */
-#define F2FS_MAP_NEW           (1 << BH_New)
-#define F2FS_MAP_MAPPED                (1 << BH_Mapped)
-#define F2FS_MAP_UNWRITTEN     (1 << BH_Unwritten)
+#define F2FS_MAP_NEW           (1U << 0)
+#define F2FS_MAP_MAPPED                (1U << 1)
+#define F2FS_MAP_DELALLOC      (1U << 2)
 #define F2FS_MAP_FLAGS         (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\
-                               F2FS_MAP_UNWRITTEN)
+                               F2FS_MAP_DELALLOC)
 
 struct f2fs_map_blocks {
        struct block_device *m_bdev;    /* for multi-device dio */
@@ -870,7 +869,7 @@ struct f2fs_inode_info {
        unsigned char i_compress_algorithm;     /* algorithm type */
        unsigned char i_log_cluster_size;       /* log of cluster size */
        unsigned char i_compress_level;         /* compress level (lz4hc,zstd) */
-       unsigned short i_compress_flag;         /* compress flag */
+       unsigned char i_compress_flag;          /* compress flag */
        unsigned int i_cluster_size;            /* cluster size */
 
        unsigned int atomic_write_cnt;
@@ -1193,7 +1192,8 @@ enum iostat_type {
        FS_META_READ_IO,                /* meta read IOs */
 
        /* other */
-       FS_DISCARD,                     /* discard */
+       FS_DISCARD_IO,                  /* discard */
+       FS_FLUSH_IO,                    /* flush */
        NR_IO_TYPE,
 };
 
@@ -1210,19 +1210,19 @@ struct f2fs_io_info {
        struct page *encrypted_page;    /* encrypted page */
        struct page *compressed_page;   /* compressed page */
        struct list_head list;          /* serialize IOs */
-       bool submitted;         /* indicate IO submission */
-       int need_lock;          /* indicate we need to lock cp_rwsem */
-       bool in_list;           /* indicate fio is in io_list */
-       bool is_por;            /* indicate IO is from recovery or not */
-       bool retry;             /* need to reallocate block address */
-       int compr_blocks;       /* # of compressed block addresses */
-       bool encrypted;         /* indicate file is encrypted */
-       bool post_read;         /* require post read */
+       unsigned int compr_blocks;      /* # of compressed block addresses */
+       unsigned int need_lock:8;       /* indicate we need to lock cp_rwsem */
+       unsigned int version:8;         /* version of the node */
+       unsigned int submitted:1;       /* indicate IO submission */
+       unsigned int in_list:1;         /* indicate fio is in io_list */
+       unsigned int is_por:1;          /* indicate IO is from recovery or not */
+       unsigned int retry:1;           /* need to reallocate block address */
+       unsigned int encrypted:1;       /* indicate file is encrypted */
+       unsigned int post_read:1;       /* require post read */
        enum iostat_type io_type;       /* io type */
        struct writeback_control *io_wbc; /* writeback control */
        struct bio **bio;               /* bio for ipu */
        sector_t *last_block;           /* last block number in bio */
-       unsigned char version;          /* version of the node */
 };
 
 struct bio_entry {
@@ -1384,8 +1384,6 @@ enum {
        MEMORY_MODE_LOW,        /* memory mode for low memry devices */
 };
 
-
-
 static inline int f2fs_test_bit(unsigned int nr, char *addr);
 static inline void f2fs_set_bit(unsigned int nr, char *addr);
 static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1396,19 +1394,17 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr);
  * Layout A: lowest bit should be 1
  * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... |
  * bit 0       PAGE_PRIVATE_NOT_POINTER
- * bit 1       PAGE_PRIVATE_ATOMIC_WRITE
- * bit 2       PAGE_PRIVATE_DUMMY_WRITE
- * bit 3       PAGE_PRIVATE_ONGOING_MIGRATION
- * bit 4       PAGE_PRIVATE_INLINE_INODE
- * bit 5       PAGE_PRIVATE_REF_RESOURCE
- * bit 6-      f2fs private data
+ * bit 1       PAGE_PRIVATE_DUMMY_WRITE
+ * bit 2       PAGE_PRIVATE_ONGOING_MIGRATION
+ * bit 3       PAGE_PRIVATE_INLINE_INODE
+ * bit 4       PAGE_PRIVATE_REF_RESOURCE
+ * bit 5-      f2fs private data
  *
  * Layout B: lowest bit should be 0
  * page.private is a wrapped pointer.
  */
 enum {
        PAGE_PRIVATE_NOT_POINTER,               /* private contains non-pointer data */
-       PAGE_PRIVATE_ATOMIC_WRITE,              /* data page from atomic write path */
        PAGE_PRIVATE_DUMMY_WRITE,               /* data page for padding aligned IO */
        PAGE_PRIVATE_ONGOING_MIGRATION,         /* data page which is on-going migrating */
        PAGE_PRIVATE_INLINE_INODE,              /* inode page contains inline data */
@@ -1450,22 +1446,18 @@ static inline void clear_page_private_##name(struct page *page) \
 }
 
 PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER);
-PAGE_PRIVATE_GET_FUNC(reference, REF_RESOURCE);
 PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE);
 PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE);
 
 PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE);
 PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE);
 PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE);
 
 PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE);
 PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE);
 PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE);
 
 static inline unsigned long get_page_private_data(struct page *page)
@@ -1679,6 +1671,7 @@ struct f2fs_sb_info {
        /* The threshold used for hot and warm data seperation*/
        unsigned int hot_data_age_threshold;
        unsigned int warm_data_age_threshold;
+       unsigned int last_age_weight;
 
        /* basic filesystem units */
        unsigned int log_sectors_per_block;     /* log2 sectors per block */
@@ -1864,8 +1857,9 @@ struct f2fs_sb_info {
 #ifdef CONFIG_F2FS_IOSTAT
        /* For app/fs IO statistics */
        spinlock_t iostat_lock;
-       unsigned long long rw_iostat[NR_IO_TYPE];
-       unsigned long long prev_rw_iostat[NR_IO_TYPE];
+       unsigned long long iostat_count[NR_IO_TYPE];
+       unsigned long long iostat_bytes[NR_IO_TYPE];
+       unsigned long long prev_iostat_bytes[NR_IO_TYPE];
        bool iostat_enable;
        unsigned long iostat_next_period;
        unsigned int iostat_period_ms;
@@ -1877,12 +1871,10 @@ struct f2fs_sb_info {
 };
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-#define f2fs_show_injection_info(sbi, type)                                    \
-       printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
-               KERN_INFO, sbi->sb->s_id,                               \
-               f2fs_fault_name[type],                                  \
-               __func__, __builtin_return_address(0))
-static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+#define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__,        \
+                                                                       __builtin_return_address(0))
+static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type,
+                               const char *func, const char *parent_func)
 {
        struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
 
@@ -1895,12 +1887,14 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
        atomic_inc(&ffi->inject_ops);
        if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
                atomic_set(&ffi->inject_ops, 0);
+               printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",
+                       KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type],
+                       func, parent_func);
                return true;
        }
        return false;
 }
 #else
-#define f2fs_show_injection_info(sbi, type) do { } while (0)
 static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 {
        return false;
@@ -2233,10 +2227,8 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 
 static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
 {
-       if (time_to_inject(sbi, FAULT_LOCK_OP)) {
-               f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+       if (time_to_inject(sbi, FAULT_LOCK_OP))
                return 0;
-       }
        return f2fs_down_read_trylock(&sbi->cp_rwsem);
 }
 
@@ -2324,7 +2316,6 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
                return ret;
 
        if (time_to_inject(sbi, FAULT_BLOCK)) {
-               f2fs_show_injection_info(sbi, FAULT_BLOCK);
                release = *count;
                goto release_quota;
        }
@@ -2604,10 +2595,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
                        return err;
        }
 
-       if (time_to_inject(sbi, FAULT_BLOCK)) {
-               f2fs_show_injection_info(sbi, FAULT_BLOCK);
+       if (time_to_inject(sbi, FAULT_BLOCK))
                goto enospc;
-       }
 
        spin_lock(&sbi->stat_lock);
 
@@ -2731,11 +2720,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
                if (page)
                        return page;
 
-               if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
-                       f2fs_show_injection_info(F2FS_M_SB(mapping),
-                                                       FAULT_PAGE_ALLOC);
+               if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC))
                        return NULL;
-               }
        }
 
        if (!for_write)
@@ -2752,10 +2738,8 @@ static inline struct page *f2fs_pagecache_get_page(
                                struct address_space *mapping, pgoff_t index,
                                int fgp_flags, gfp_t gfp_mask)
 {
-       if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
-               f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET);
+       if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET))
                return NULL;
-       }
 
        return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
 }
@@ -2805,10 +2789,8 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
        if (nofail)
                return f2fs_kmem_cache_alloc_nofail(cachep, flags);
 
-       if (time_to_inject(sbi, FAULT_SLAB_ALLOC)) {
-               f2fs_show_injection_info(sbi, FAULT_SLAB_ALLOC);
+       if (time_to_inject(sbi, FAULT_SLAB_ALLOC))
                return NULL;
-       }
 
        return kmem_cache_alloc(cachep, flags);
 }
@@ -3382,10 +3364,8 @@ static inline bool is_dot_dotdot(const u8 *name, size_t len)
 static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
                                        size_t size, gfp_t flags)
 {
-       if (time_to_inject(sbi, FAULT_KMALLOC)) {
-               f2fs_show_injection_info(sbi, FAULT_KMALLOC);
+       if (time_to_inject(sbi, FAULT_KMALLOC))
                return NULL;
-       }
 
        return kmalloc(size, flags);
 }
@@ -3399,10 +3379,8 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
 static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
                                        size_t size, gfp_t flags)
 {
-       if (time_to_inject(sbi, FAULT_KVMALLOC)) {
-               f2fs_show_injection_info(sbi, FAULT_KVMALLOC);
+       if (time_to_inject(sbi, FAULT_KVMALLOC))
                return NULL;
-       }
 
        return kvmalloc(size, flags);
 }
@@ -3788,8 +3766,8 @@ int __init f2fs_init_bioset(void);
 void f2fs_destroy_bioset(void);
 int f2fs_init_bio_entry_cache(void);
 void f2fs_destroy_bio_entry_cache(void);
-void f2fs_submit_bio(struct f2fs_sb_info *sbi,
-                               struct bio *bio, enum page_type type);
+void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
+                         enum page_type type);
 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -3808,7 +3786,7 @@ void f2fs_set_data_blkaddr(struct dnode_of_data *dn);
 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
 int f2fs_reserve_new_block(struct dnode_of_data *dn);
-int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
+int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index);
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                        blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs);
@@ -3819,9 +3797,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 struct page *f2fs_get_new_data_page(struct inode *inode,
                        struct page *ipage, pgoff_t index, bool new_i_size);
 int f2fs_do_write_data_page(struct f2fs_io_info *fio);
-void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
-int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
-                       int create, int flag);
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        u64 start, u64 len);
 int f2fs_encrypt_one_page(struct f2fs_io_info *fio);
@@ -4161,6 +4137,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 /*
  * extent_cache.c
  */
+bool sanity_check_extent_cache(struct inode *inode);
 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
                                struct rb_entry *cached_re, unsigned int ofs);
 struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
@@ -4190,6 +4167,8 @@ void f2fs_destroy_extent_cache(void);
 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage);
 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
                        struct extent_info *ei);
+bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
+                       block_t *blkaddr);
 void f2fs_update_read_extent_cache(struct dnode_of_data *dn);
 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
                        pgoff_t fofs, block_t blkaddr, unsigned int len);
@@ -4259,7 +4238,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
 int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
 void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
 bool f2fs_is_compress_backend_ready(struct inode *inode);
-int f2fs_init_compress_mempool(void);
+int __init f2fs_init_compress_mempool(void);
 void f2fs_destroy_compress_mempool(void);
 void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
 void f2fs_end_read_compressed_page(struct page *page, bool failed,
@@ -4328,7 +4307,7 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
        WARN_ON_ONCE(1);
        return ERR_PTR(-EINVAL);
 }
-static inline int f2fs_init_compress_mempool(void) { return 0; }
+static inline int __init f2fs_init_compress_mempool(void) { return 0; }
 static inline void f2fs_destroy_compress_mempool(void) { }
 static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic,
                                bool in_task) { }
@@ -4381,9 +4360,8 @@ static inline int set_compress_context(struct inode *inode)
        if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
                F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
                        F2FS_OPTION(sbi).compress_level)
-               F2FS_I(inode)->i_compress_flag |=
-                               F2FS_OPTION(sbi).compress_level <<
-                               COMPRESS_LEVEL_OFFSET;
+               F2FS_I(inode)->i_compress_level =
+                               F2FS_OPTION(sbi).compress_level;
        F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
        set_inode_flag(inode, FI_COMPRESSED_FILE);
        stat_inc_compr_inode(inode);
index b906176397436a1fefe4cbaeee6326847039f644..15dabeac4690500f8f6e6d05d64b5cba18729781 100644 (file)
@@ -113,10 +113,8 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 
        if (need_alloc) {
                /* block allocation */
-               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
                set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = f2fs_get_block(&dn, page->index);
-               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+               err = f2fs_get_block_locked(&dn, page->index);
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -305,7 +303,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
                 * for OPU case, during fsync(), node can be persisted before
                 * data when lower device doesn't support write barrier, result
                 * in data corruption after SPO.
-                * So for strict fsync mode, force to use atomic write sematics
+                * So for strict fsync mode, force to use atomic write semantics
                 * to keep write order in between data/node and last node to
                 * avoid potential data corruption.
                 */
@@ -619,7 +617,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
                fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
                                                        dn->inode) + ofs;
                f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
-               f2fs_update_age_extent_cache_range(dn, fofs, nr_free);
+               f2fs_update_age_extent_cache_range(dn, fofs, len);
                dec_valid_block_count(sbi, dn->inode, nr_free);
        }
        dn->ofs_in_node = ofs;
@@ -784,10 +782,8 @@ int f2fs_truncate(struct inode *inode)
 
        trace_f2fs_truncate(inode);
 
-       if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
-               f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE);
+       if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
                return -EIO;
-       }
 
        err = f2fs_dquot_initialize(inode);
        if (err)
@@ -1112,7 +1108,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
        return 0;
 }
 
-static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
+static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 {
        pgoff_t pg_start, pg_end;
        loff_t off_start, off_end;
@@ -1498,6 +1494,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
        }
 
        f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
+       f2fs_update_age_extent_cache_range(dn, start, index - start);
 
        return ret;
 }
@@ -1684,7 +1681,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        return ret;
 }
 
-static int expand_inode_data(struct inode *inode, loff_t offset,
+static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
                                        loff_t len, int mode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1697,7 +1694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
                        .err_gc_skipped = true,
                        .nr_free_secs = 0 };
        pgoff_t pg_start, pg_end;
-       loff_t new_size = i_size_read(inode);
+       loff_t new_size;
        loff_t off_end;
        block_t expanded = 0;
        int err;
@@ -1745,7 +1742,7 @@ next_alloc:
                f2fs_unlock_op(sbi);
 
                map.m_seg_type = CURSEG_COLD_DATA_PINNED;
-               err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
                file_dont_truncate(inode);
 
                f2fs_up_write(&sbi->pin_sem);
@@ -1758,7 +1755,7 @@ next_alloc:
 
                map.m_len = expanded;
        } else {
-               err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
                expanded = map.m_len;
        }
 out_err:
@@ -1809,7 +1806,7 @@ static long f2fs_fallocate(struct file *file, int mode,
                return -EOPNOTSUPP;
 
        /*
-        * Pinned file should not support partial trucation since the block
+        * Pinned file should not support partial truncation since the block
         * can be used by applications.
         */
        if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
@@ -1832,7 +1829,7 @@ static long f2fs_fallocate(struct file *file, int mode,
                if (offset >= inode->i_size)
                        goto out;
 
-               ret = punch_hole(inode, offset, len);
+               ret = f2fs_punch_hole(inode, offset, len);
        } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
                ret = f2fs_collapse_range(inode, offset, len);
        } else if (mode & FALLOC_FL_ZERO_RANGE) {
@@ -1840,7 +1837,7 @@ static long f2fs_fallocate(struct file *file, int mode,
        } else if (mode & FALLOC_FL_INSERT_RANGE) {
                ret = f2fs_insert_range(inode, offset, len);
        } else {
-               ret = expand_inode_data(inode, offset, len, mode);
+               ret = f2fs_expand_inode_data(inode, offset, len, mode);
        }
 
        if (!ret) {
@@ -1859,14 +1856,17 @@ out:
 static int f2fs_release_file(struct inode *inode, struct file *filp)
 {
        /*
-        * f2fs_relase_file is called at every close calls. So we should
+        * f2fs_release_file is called at every close calls. So we should
         * not drop any inmemory pages by close called by other process.
         */
        if (!(filp->f_mode & FMODE_WRITE) ||
                        atomic_read(&inode->i_writecount) != 1)
                return 0;
 
+       inode_lock(inode);
        f2fs_abort_atomic_write(inode, true);
+       inode_unlock(inode);
+
        return 0;
 }
 
@@ -1880,8 +1880,13 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
         * until all the writers close its file. Since this should be done
         * before dropping file lock, it needs to do in ->flush.
         */
-       if (F2FS_I(inode)->atomic_write_task == current)
+       if (F2FS_I(inode)->atomic_write_task == current &&
+                               (current->flags & PF_EXITING)) {
+               inode_lock(inode);
                f2fs_abort_atomic_write(inode, true);
+               inode_unlock(inode);
+       }
+
        return 0;
 }
 
@@ -2087,19 +2092,28 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
                goto out;
        }
 
-       /* Create a COW inode for atomic write */
-       pinode = f2fs_iget(inode->i_sb, fi->i_pino);
-       if (IS_ERR(pinode)) {
-               f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
-               ret = PTR_ERR(pinode);
-               goto out;
-       }
+       /* Check if the inode already has a COW inode */
+       if (fi->cow_inode == NULL) {
+               /* Create a COW inode for atomic write */
+               pinode = f2fs_iget(inode->i_sb, fi->i_pino);
+               if (IS_ERR(pinode)) {
+                       f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+                       ret = PTR_ERR(pinode);
+                       goto out;
+               }
 
-       ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode);
-       iput(pinode);
-       if (ret) {
-               f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
-               goto out;
+               ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode);
+               iput(pinode);
+               if (ret) {
+                       f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+                       goto out;
+               }
+
+               set_inode_flag(fi->cow_inode, FI_COW_FILE);
+               clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
+       } else {
+               /* Reuse the already created COW inode */
+               f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
        }
 
        f2fs_write_inode(inode, NULL);
@@ -2107,8 +2121,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
        stat_inc_atomic_inode(inode);
 
        set_inode_flag(inode, FI_ATOMIC_FILE);
-       set_inode_flag(fi->cow_inode, FI_COW_FILE);
-       clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
 
        isize = i_size_read(inode);
        fi->original_i_size = isize;
@@ -2338,6 +2350,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       u8 encrypt_pw_salt[16];
        int err;
 
        if (!f2fs_sb_has_encrypt(sbi))
@@ -2362,12 +2375,14 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
                goto out_err;
        }
 got_it:
-       if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
-                                                                       16))
-               err = -EFAULT;
+       memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
 out_err:
        f2fs_up_write(&sbi->sb_lock);
        mnt_drop_write_file(filp);
+
+       if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
+               err = -EFAULT;
+
        return err;
 }
 
@@ -2524,7 +2539,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
        return __f2fs_ioc_gc_range(filp, &range);
 }
 
-static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
+static int f2fs_ioc_write_checkpoint(struct file *filp)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -2606,7 +2621,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
         */
        while (map.m_lblk < pg_end) {
                map.m_len = pg_end - map.m_lblk;
-               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
                if (err)
                        goto out;
 
@@ -2653,7 +2668,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 
 do_map:
                map.m_len = pg_end - map.m_lblk;
-               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
                if (err)
                        goto clear_out;
 
@@ -3227,7 +3242,7 @@ int f2fs_precache_extents(struct inode *inode)
                map.m_len = end - map.m_lblk;
 
                f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
-               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
+               err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
                f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
                if (err)
                        return err;
@@ -3238,7 +3253,7 @@ int f2fs_precache_extents(struct inode *inode)
        return 0;
 }
 
-static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
+static int f2fs_ioc_precache_extents(struct file *filp)
 {
        return f2fs_precache_extents(file_inode(filp));
 }
@@ -3942,7 +3957,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
                goto out;
        }
 
-       if (inode->i_size != 0) {
+       if (F2FS_HAS_BLOCKS(inode)) {
                ret = -EFBIG;
                goto out;
        }
@@ -3995,7 +4010,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
        return ret;
 }
 
-static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+static int f2fs_ioc_decompress_file(struct file *filp)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -4068,7 +4083,7 @@ out:
        return ret;
 }
 
-static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+static int f2fs_ioc_compress_file(struct file *filp)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -4184,7 +4199,7 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_GARBAGE_COLLECT_RANGE:
                return f2fs_ioc_gc_range(filp, arg);
        case F2FS_IOC_WRITE_CHECKPOINT:
-               return f2fs_ioc_write_checkpoint(filp, arg);
+               return f2fs_ioc_write_checkpoint(filp);
        case F2FS_IOC_DEFRAGMENT:
                return f2fs_ioc_defragment(filp, arg);
        case F2FS_IOC_MOVE_RANGE:
@@ -4198,7 +4213,7 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_SET_PIN_FILE:
                return f2fs_ioc_set_pin_file(filp, arg);
        case F2FS_IOC_PRECACHE_EXTENTS:
-               return f2fs_ioc_precache_extents(filp, arg);
+               return f2fs_ioc_precache_extents(filp);
        case F2FS_IOC_RESIZE_FS:
                return f2fs_ioc_resize_fs(filp, arg);
        case FS_IOC_ENABLE_VERITY:
@@ -4224,9 +4239,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_SET_COMPRESS_OPTION:
                return f2fs_ioc_set_compress_option(filp, arg);
        case F2FS_IOC_DECOMPRESS_FILE:
-               return f2fs_ioc_decompress_file(filp, arg);
+               return f2fs_ioc_decompress_file(filp);
        case F2FS_IOC_COMPRESS_FILE:
-               return f2fs_ioc_compress_file(filp, arg);
+               return f2fs_ioc_compress_file(filp);
        default:
                return -ENOTTY;
        }
@@ -4341,6 +4356,27 @@ out:
        return ret;
 }
 
+static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       char *buf, *path;
+
+       buf = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
+       if (!buf)
+               return;
+       path = dentry_path_raw(file_dentry(iocb->ki_filp), buf, PATH_MAX);
+       if (IS_ERR(path))
+               goto free_buf;
+       if (rw == WRITE)
+               trace_f2fs_datawrite_start(inode, iocb->ki_pos, count,
+                               current->pid, path, current->comm);
+       else
+               trace_f2fs_dataread_start(inode, iocb->ki_pos, count,
+                               current->pid, path, current->comm);
+free_buf:
+       kfree(buf);
+}
+
 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
@@ -4350,24 +4386,9 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (!f2fs_is_compress_backend_ready(inode))
                return -EOPNOTSUPP;
 
-       if (trace_f2fs_dataread_start_enabled()) {
-               char *p = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
-               char *path;
-
-               if (!p)
-                       goto skip_read_trace;
+       if (trace_f2fs_dataread_start_enabled())
+               f2fs_trace_rw_file_path(iocb, iov_iter_count(to), READ);
 
-               path = dentry_path_raw(file_dentry(iocb->ki_filp), p, PATH_MAX);
-               if (IS_ERR(path)) {
-                       kfree(p);
-                       goto skip_read_trace;
-               }
-
-               trace_f2fs_dataread_start(inode, pos, iov_iter_count(to),
-                                       current->pid, path, current->comm);
-               kfree(p);
-       }
-skip_read_trace:
        if (f2fs_should_use_dio(inode, iocb, to)) {
                ret = f2fs_dio_read_iter(iocb, to);
        } else {
@@ -4466,7 +4487,7 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
                flag = F2FS_GET_BLOCK_PRE_AIO;
        }
 
-       ret = f2fs_map_blocks(inode, &map, 1, flag);
+       ret = f2fs_map_blocks(inode, &map, flag);
        /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
        if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
                return ret;
@@ -4673,24 +4694,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (preallocated < 0) {
                ret = preallocated;
        } else {
-               if (trace_f2fs_datawrite_start_enabled()) {
-                       char *p = f2fs_kmalloc(F2FS_I_SB(inode),
-                                               PATH_MAX, GFP_KERNEL);
-                       char *path;
-
-                       if (!p)
-                               goto skip_write_trace;
-                       path = dentry_path_raw(file_dentry(iocb->ki_filp),
-                                                               p, PATH_MAX);
-                       if (IS_ERR(path)) {
-                               kfree(p);
-                               goto skip_write_trace;
-                       }
-                       trace_f2fs_datawrite_start(inode, orig_pos, orig_count,
-                                       current->pid, path, current->comm);
-                       kfree(p);
-               }
-skip_write_trace:
+               if (trace_f2fs_datawrite_start_enabled())
+                       f2fs_trace_rw_file_path(iocb, orig_count, WRITE);
+
                /* Do the actual write. */
                ret = dio ?
                        f2fs_dio_write_iter(iocb, from, &may_need_sync) :
@@ -4823,6 +4829,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case F2FS_IOC32_MOVE_RANGE:
                return f2fs_compat_ioc_move_range(file, arg);
        case F2FS_IOC_START_ATOMIC_WRITE:
+       case F2FS_IOC_START_ATOMIC_REPLACE:
        case F2FS_IOC_COMMIT_ATOMIC_WRITE:
        case F2FS_IOC_START_VOLATILE_WRITE:
        case F2FS_IOC_RELEASE_VOLATILE_WRITE:
index 6e2cae3d2e717ae1d042f48bd748b9645f68e2c7..0a9dfa4598606709c3ceba02d22fcba6c1328212 100644 (file)
@@ -57,7 +57,7 @@ static int gc_thread_func(void *data)
 
                /* give it a try one time */
                if (gc_th->gc_wake)
-                       gc_th->gc_wake = 0;
+                       gc_th->gc_wake = false;
 
                if (try_to_freeze()) {
                        stat_other_skip_bggc_count(sbi);
@@ -72,11 +72,9 @@ static int gc_thread_func(void *data)
                        continue;
                }
 
-               if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
-                       f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
+               if (time_to_inject(sbi, FAULT_CHECKPOINT))
                        f2fs_stop_checkpoint(sbi, false,
                                        STOP_CP_REASON_FAULT_INJECT);
-               }
 
                if (!sb_start_write_trylock(sbi->sb)) {
                        stat_other_skip_bggc_count(sbi);
@@ -185,7 +183,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
        gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
        gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
 
-       gc_th->gc_wake = 0;
+       gc_th->gc_wake = false;
 
        sbi->gc_thread = gc_th;
        init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
@@ -1150,7 +1148,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
        struct address_space *mapping = inode->i_mapping;
        struct dnode_of_data dn;
        struct page *page;
-       struct extent_info ei = {0, };
        struct f2fs_io_info fio = {
                .sbi = sbi,
                .ino = inode->i_ino,
@@ -1159,8 +1156,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
                .op = REQ_OP_READ,
                .op_flags = 0,
                .encrypted_page = NULL,
-               .in_list = false,
-               .retry = false,
+               .in_list = 0,
+               .retry = 0,
        };
        int err;
 
@@ -1168,8 +1165,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
        if (!page)
                return -ENOMEM;
 
-       if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
-               dn.data_blkaddr = ei.blk + index - ei.fofs;
+       if (f2fs_lookup_read_extent_cache_block(inode, index,
+                                               &dn.data_blkaddr)) {
                if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE_READ))) {
                        err = -EFSCORRUPTED;
@@ -1248,8 +1245,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
                .op = REQ_OP_READ,
                .op_flags = 0,
                .encrypted_page = NULL,
-               .in_list = false,
-               .retry = false,
+               .in_list = 0,
+               .retry = 0,
        };
        struct dnode_of_data dn;
        struct f2fs_summary sum;
@@ -1365,7 +1362,6 @@ static int move_data_block(struct inode *inode, block_t bidx,
                dec_page_count(fio.sbi, F2FS_DIRTY_META);
 
        set_page_writeback(fio.encrypted_page);
-       ClearPageError(page);
 
        fio.op = REQ_OP_WRITE;
        fio.op_flags = REQ_SYNC;
index 19b956c2d697a48336559c04b05e703011110420..15bd1d680f67789e665426e62acf9283c653e11d 100644 (file)
@@ -41,7 +41,7 @@ struct f2fs_gc_kthread {
        unsigned int no_gc_sleep_time;
 
        /* for changing gc mode */
-       unsigned int gc_wake;
+       bool gc_wake;
 
        /* for GC_MERGE mount option */
        wait_queue_head_t fggc_wq;              /*
index 21a495234ffd7f22bc63f8bc05b02acd6e4ee4f7..72269e7efd260a78b8c380f0a372c539c94a366d 100644 (file)
@@ -174,7 +174,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 
        /* write data page to try to make data consistent */
        set_page_writeback(page);
-       ClearPageError(page);
        fio.old_blkaddr = dn->data_blkaddr;
        set_inode_flag(dn->inode, FI_HOT_DATA);
        f2fs_outplace_write_data(dn, &fio);
@@ -422,18 +421,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
 
        dentry_blk = page_address(page);
 
+       /*
+        * Start by zeroing the full block, to ensure that all unused space is
+        * zeroed and no uninitialized memory is leaked to disk.
+        */
+       memset(dentry_blk, 0, F2FS_BLKSIZE);
+
        make_dentry_ptr_inline(dir, &src, inline_dentry);
        make_dentry_ptr_block(dir, &dst, dentry_blk);
 
        /* copy data from inline dentry block to new dentry block */
        memcpy(dst.bitmap, src.bitmap, src.nr_bitmap);
-       memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap);
-       /*
-        * we do not need to zero out remainder part of dentry and filename
-        * field, since we have used bitmap for marking the usage status of
-        * them, besides, we can also ignore copying/zeroing reserved space
-        * of dentry block, because them haven't been used so far.
-        */
        memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
        memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
 
index ff6cf66ed46b22778bd03d8f370bbc5782cf99b4..7d2e2c0dba65cc3e4c26f9473dace273d25b3068 100644 (file)
@@ -262,22 +262,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
                return false;
        }
 
-       if (fi->extent_tree[EX_READ]) {
-               struct extent_info *ei = &fi->extent_tree[EX_READ]->largest;
-
-               if (ei->len &&
-                       (!f2fs_is_valid_blkaddr(sbi, ei->blk,
-                                               DATA_GENERIC_ENHANCE) ||
-                       !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
-                                               DATA_GENERIC_ENHANCE))) {
-                       set_sbi_flag(sbi, SBI_NEED_FSCK);
-                       f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
-                                 __func__, inode->i_ino,
-                                 ei->blk, ei->fofs, ei->len);
-                       return false;
-               }
-       }
-
        if (f2fs_sanity_check_inline_data(inode)) {
                set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
@@ -413,12 +397,6 @@ static int do_read_inode(struct inode *inode)
                fi->i_inline_xattr_size = 0;
        }
 
-       if (!sanity_check_inode(inode, node_page)) {
-               f2fs_put_page(node_page, 1);
-               f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
-               return -EFSCORRUPTED;
-       }
-
        /* check data exist */
        if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
                __recover_inline_status(inode, node_page);
@@ -466,11 +444,17 @@ static int do_read_inode(struct inode *inode)
                                        (fi->i_flags & F2FS_COMPR_FL)) {
                if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
                                        i_log_cluster_size)) {
+                       unsigned short compress_flag;
+
                        atomic_set(&fi->i_compr_blocks,
                                        le64_to_cpu(ri->i_compr_blocks));
                        fi->i_compress_algorithm = ri->i_compress_algorithm;
                        fi->i_log_cluster_size = ri->i_log_cluster_size;
-                       fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag);
+                       compress_flag = le16_to_cpu(ri->i_compress_flag);
+                       fi->i_compress_level = compress_flag >>
+                                               COMPRESS_LEVEL_OFFSET;
+                       fi->i_compress_flag = compress_flag &
+                                       (BIT(COMPRESS_LEVEL_OFFSET) - 1);
                        fi->i_cluster_size = 1 << fi->i_log_cluster_size;
                        set_inode_flag(inode, FI_COMPRESSED_FILE);
                }
@@ -482,6 +466,18 @@ static int do_read_inode(struct inode *inode)
        f2fs_init_read_extent_tree(inode, node_page);
        f2fs_init_age_extent_tree(inode);
 
+       if (!sanity_check_inode(inode, node_page)) {
+               f2fs_put_page(node_page, 1);
+               f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+               return -EFSCORRUPTED;
+       }
+
+       if (!sanity_check_extent_cache(inode)) {
+               f2fs_put_page(node_page, 1);
+               f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+               return -EFSCORRUPTED;
+       }
+
        f2fs_put_page(node_page, 1);
 
        stat_inc_inline_xattr(inode);
@@ -686,13 +682,17 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
                if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
                        F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
                                                        i_log_cluster_size)) {
+                       unsigned short compress_flag;
+
                        ri->i_compr_blocks =
                                cpu_to_le64(atomic_read(
                                        &F2FS_I(inode)->i_compr_blocks));
                        ri->i_compress_algorithm =
                                F2FS_I(inode)->i_compress_algorithm;
-                       ri->i_compress_flag =
-                               cpu_to_le16(F2FS_I(inode)->i_compress_flag);
+                       compress_flag = F2FS_I(inode)->i_compress_flag |
+                               F2FS_I(inode)->i_compress_level <<
+                                               COMPRESS_LEVEL_OFFSET;
+                       ri->i_compress_flag = cpu_to_le16(compress_flag);
                        ri->i_log_cluster_size =
                                F2FS_I(inode)->i_log_cluster_size;
                }
@@ -714,18 +714,19 @@ void f2fs_update_inode_page(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct page *node_page;
+       int count = 0;
 retry:
        node_page = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(node_page)) {
                int err = PTR_ERR(node_page);
 
-               if (err == -ENOMEM) {
-                       cond_resched();
+               /* The node block was truncated. */
+               if (err == -ENOENT)
+                       return;
+
+               if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT)
                        goto retry;
-               } else if (err != -ENOENT) {
-                       f2fs_stop_checkpoint(sbi, false,
-                                       STOP_CP_REASON_UPDATE_INODE);
-               }
+               f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE);
                return;
        }
        f2fs_update_inode(inode, node_page);
@@ -766,11 +767,18 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 void f2fs_evict_inode(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       nid_t xnid = fi->i_xattr_nid;
        int err = 0;
 
        f2fs_abort_atomic_write(inode, true);
 
+       if (fi->cow_inode) {
+               clear_inode_flag(fi->cow_inode, FI_COW_FILE);
+               iput(fi->cow_inode);
+               fi->cow_inode = NULL;
+       }
+
        trace_f2fs_evict_inode(inode);
        truncate_inode_pages_final(&inode->i_data);
 
@@ -809,10 +817,8 @@ retry:
        if (F2FS_HAS_BLOCKS(inode))
                err = f2fs_truncate(inode);
 
-       if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
-               f2fs_show_injection_info(sbi, FAULT_EVICT_INODE);
+       if (time_to_inject(sbi, FAULT_EVICT_INODE))
                err = -EIO;
-       }
 
        if (!err) {
                f2fs_lock_op(sbi);
@@ -857,7 +863,7 @@ no_delete:
        stat_dec_inline_inode(inode);
        stat_dec_compr_inode(inode);
        stat_sub_compr_blocks(inode,
-                       atomic_read(&F2FS_I(inode)->i_compr_blocks));
+                       atomic_read(&fi->i_compr_blocks));
 
        if (likely(!f2fs_cp_error(sbi) &&
                                !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
index 3166a8939ed4f4f3ffae033adc4d462ca9cf26b6..3d5bfb1ad585d9be86a5fd527615d3896f939230 100644 (file)
 #include "iostat.h"
 #include <trace/events/f2fs.h>
 
-#define NUM_PREALLOC_IOSTAT_CTXS       128
 static struct kmem_cache *bio_iostat_ctx_cache;
 static mempool_t *bio_iostat_ctx_pool;
 
+static inline unsigned long long iostat_get_avg_bytes(struct f2fs_sb_info *sbi,
+       enum iostat_type type)
+{
+       return sbi->iostat_count[type] ? div64_u64(sbi->iostat_bytes[type],
+               sbi->iostat_count[type]) : 0;
+}
+
+#define IOSTAT_INFO_SHOW(name, type)                                   \
+       seq_printf(seq, "%-23s %-16llu %-16llu %-16llu\n",              \
+                       name":", sbi->iostat_bytes[type],               \
+                       sbi->iostat_count[type],                        \
+                       iostat_get_avg_bytes(sbi, type))
+
 int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
 {
        struct super_block *sb = seq->private;
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
-       time64_t now = ktime_get_real_seconds();
 
        if (!sbi->iostat_enable)
                return 0;
 
-       seq_printf(seq, "time:          %-16llu\n", now);
+       seq_printf(seq, "time:          %-16llu\n", ktime_get_real_seconds());
+       seq_printf(seq, "\t\t\t%-16s %-16s %-16s\n",
+                               "io_bytes", "count", "avg_bytes");
 
        /* print app write IOs */
        seq_puts(seq, "[WRITE]\n");
-       seq_printf(seq, "app buffered data:     %-16llu\n",
-                               sbi->rw_iostat[APP_BUFFERED_IO]);
-       seq_printf(seq, "app direct data:       %-16llu\n",
-                               sbi->rw_iostat[APP_DIRECT_IO]);
-       seq_printf(seq, "app mapped data:       %-16llu\n",
-                               sbi->rw_iostat[APP_MAPPED_IO]);
-       seq_printf(seq, "app buffered cdata:    %-16llu\n",
-                               sbi->rw_iostat[APP_BUFFERED_CDATA_IO]);
-       seq_printf(seq, "app mapped cdata:      %-16llu\n",
-                               sbi->rw_iostat[APP_MAPPED_CDATA_IO]);
+       IOSTAT_INFO_SHOW("app buffered data", APP_BUFFERED_IO);
+       IOSTAT_INFO_SHOW("app direct data", APP_DIRECT_IO);
+       IOSTAT_INFO_SHOW("app mapped data", APP_MAPPED_IO);
+       IOSTAT_INFO_SHOW("app buffered cdata", APP_BUFFERED_CDATA_IO);
+       IOSTAT_INFO_SHOW("app mapped cdata", APP_MAPPED_CDATA_IO);
 
        /* print fs write IOs */
-       seq_printf(seq, "fs data:               %-16llu\n",
-                               sbi->rw_iostat[FS_DATA_IO]);
-       seq_printf(seq, "fs cdata:              %-16llu\n",
-                               sbi->rw_iostat[FS_CDATA_IO]);
-       seq_printf(seq, "fs node:               %-16llu\n",
-                               sbi->rw_iostat[FS_NODE_IO]);
-       seq_printf(seq, "fs meta:               %-16llu\n",
-                               sbi->rw_iostat[FS_META_IO]);
-       seq_printf(seq, "fs gc data:            %-16llu\n",
-                               sbi->rw_iostat[FS_GC_DATA_IO]);
-       seq_printf(seq, "fs gc node:            %-16llu\n",
-                               sbi->rw_iostat[FS_GC_NODE_IO]);
-       seq_printf(seq, "fs cp data:            %-16llu\n",
-                               sbi->rw_iostat[FS_CP_DATA_IO]);
-       seq_printf(seq, "fs cp node:            %-16llu\n",
-                               sbi->rw_iostat[FS_CP_NODE_IO]);
-       seq_printf(seq, "fs cp meta:            %-16llu\n",
-                               sbi->rw_iostat[FS_CP_META_IO]);
+       IOSTAT_INFO_SHOW("fs data", FS_DATA_IO);
+       IOSTAT_INFO_SHOW("fs cdata", FS_CDATA_IO);
+       IOSTAT_INFO_SHOW("fs node", FS_NODE_IO);
+       IOSTAT_INFO_SHOW("fs meta", FS_META_IO);
+       IOSTAT_INFO_SHOW("fs gc data", FS_GC_DATA_IO);
+       IOSTAT_INFO_SHOW("fs gc node", FS_GC_NODE_IO);
+       IOSTAT_INFO_SHOW("fs cp data", FS_CP_DATA_IO);
+       IOSTAT_INFO_SHOW("fs cp node", FS_CP_NODE_IO);
+       IOSTAT_INFO_SHOW("fs cp meta", FS_CP_META_IO);
 
        /* print app read IOs */
        seq_puts(seq, "[READ]\n");
-       seq_printf(seq, "app buffered data:     %-16llu\n",
-                               sbi->rw_iostat[APP_BUFFERED_READ_IO]);
-       seq_printf(seq, "app direct data:       %-16llu\n",
-                               sbi->rw_iostat[APP_DIRECT_READ_IO]);
-       seq_printf(seq, "app mapped data:       %-16llu\n",
-                               sbi->rw_iostat[APP_MAPPED_READ_IO]);
-       seq_printf(seq, "app buffered cdata:    %-16llu\n",
-                               sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO]);
-       seq_printf(seq, "app mapped cdata:      %-16llu\n",
-                               sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO]);
+       IOSTAT_INFO_SHOW("app buffered data", APP_BUFFERED_READ_IO);
+       IOSTAT_INFO_SHOW("app direct data", APP_DIRECT_READ_IO);
+       IOSTAT_INFO_SHOW("app mapped data", APP_MAPPED_READ_IO);
+       IOSTAT_INFO_SHOW("app buffered cdata", APP_BUFFERED_CDATA_READ_IO);
+       IOSTAT_INFO_SHOW("app mapped cdata", APP_MAPPED_CDATA_READ_IO);
 
        /* print fs read IOs */
-       seq_printf(seq, "fs data:               %-16llu\n",
-                               sbi->rw_iostat[FS_DATA_READ_IO]);
-       seq_printf(seq, "fs gc data:            %-16llu\n",
-                               sbi->rw_iostat[FS_GDATA_READ_IO]);
-       seq_printf(seq, "fs cdata:              %-16llu\n",
-                               sbi->rw_iostat[FS_CDATA_READ_IO]);
-       seq_printf(seq, "fs node:               %-16llu\n",
-                               sbi->rw_iostat[FS_NODE_READ_IO]);
-       seq_printf(seq, "fs meta:               %-16llu\n",
-                               sbi->rw_iostat[FS_META_READ_IO]);
+       IOSTAT_INFO_SHOW("fs data", FS_DATA_READ_IO);
+       IOSTAT_INFO_SHOW("fs gc data", FS_GDATA_READ_IO);
+       IOSTAT_INFO_SHOW("fs cdata", FS_CDATA_READ_IO);
+       IOSTAT_INFO_SHOW("fs node", FS_NODE_READ_IO);
+       IOSTAT_INFO_SHOW("fs meta", FS_META_READ_IO);
 
        /* print other IOs */
        seq_puts(seq, "[OTHER]\n");
-       seq_printf(seq, "fs discard:            %-16llu\n",
-                               sbi->rw_iostat[FS_DISCARD]);
+       IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO);
+       IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO);
 
        return 0;
 }
 
 static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
 {
-       int io, idx = 0;
-       unsigned int cnt;
+       int io, idx;
        struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
        struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
        unsigned long flags;
@@ -106,12 +94,11 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
        spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
        for (idx = 0; idx < MAX_IO_TYPE; idx++) {
                for (io = 0; io < NR_PAGE_TYPE; io++) {
-                       cnt = io_lat->bio_cnt[idx][io];
                        iostat_lat[idx][io].peak_lat =
                           jiffies_to_msecs(io_lat->peak_lat[idx][io]);
-                       iostat_lat[idx][io].cnt = cnt;
-                       iostat_lat[idx][io].avg_lat = cnt ?
-                          jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
+                       iostat_lat[idx][io].cnt = io_lat->bio_cnt[idx][io];
+                       iostat_lat[idx][io].avg_lat = iostat_lat[idx][io].cnt ?
+                          jiffies_to_msecs(io_lat->sum_lat[idx][io]) / iostat_lat[idx][io].cnt : 0;
                        io_lat->sum_lat[idx][io] = 0;
                        io_lat->peak_lat[idx][io] = 0;
                        io_lat->bio_cnt[idx][io] = 0;
@@ -141,9 +128,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
                                msecs_to_jiffies(sbi->iostat_period_ms);
 
        for (i = 0; i < NR_IO_TYPE; i++) {
-               iostat_diff[i] = sbi->rw_iostat[i] -
-                               sbi->prev_rw_iostat[i];
-               sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
+               iostat_diff[i] = sbi->iostat_bytes[i] -
+                               sbi->prev_iostat_bytes[i];
+               sbi->prev_iostat_bytes[i] = sbi->iostat_bytes[i];
        }
        spin_unlock_irqrestore(&sbi->iostat_lock, flags);
 
@@ -159,8 +146,9 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
 
        spin_lock_irq(&sbi->iostat_lock);
        for (i = 0; i < NR_IO_TYPE; i++) {
-               sbi->rw_iostat[i] = 0;
-               sbi->prev_rw_iostat[i] = 0;
+               sbi->iostat_count[i] = 0;
+               sbi->iostat_bytes[i] = 0;
+               sbi->prev_iostat_bytes[i] = 0;
        }
        spin_unlock_irq(&sbi->iostat_lock);
 
@@ -169,6 +157,13 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
        spin_unlock_irq(&sbi->iostat_lat_lock);
 }
 
+static inline void __f2fs_update_iostat(struct f2fs_sb_info *sbi,
+                       enum iostat_type type, unsigned long long io_bytes)
+{
+       sbi->iostat_bytes[type] += io_bytes;
+       sbi->iostat_count[type]++;
+}
+
 void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
                        enum iostat_type type, unsigned long long io_bytes)
 {
@@ -178,33 +173,33 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
                return;
 
        spin_lock_irqsave(&sbi->iostat_lock, flags);
-       sbi->rw_iostat[type] += io_bytes;
+       __f2fs_update_iostat(sbi, type, io_bytes);
 
        if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
-               sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
+               __f2fs_update_iostat(sbi, APP_WRITE_IO, io_bytes);
 
        if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
-               sbi->rw_iostat[APP_READ_IO] += io_bytes;
+               __f2fs_update_iostat(sbi, APP_READ_IO, io_bytes);
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        if (inode && f2fs_compressed_file(inode)) {
                if (type == APP_BUFFERED_IO)
-                       sbi->rw_iostat[APP_BUFFERED_CDATA_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, APP_BUFFERED_CDATA_IO, io_bytes);
 
                if (type == APP_BUFFERED_READ_IO)
-                       sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, APP_BUFFERED_CDATA_READ_IO, io_bytes);
 
                if (type == APP_MAPPED_READ_IO)
-                       sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, APP_MAPPED_CDATA_READ_IO, io_bytes);
 
                if (type == APP_MAPPED_IO)
-                       sbi->rw_iostat[APP_MAPPED_CDATA_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, APP_MAPPED_CDATA_IO, io_bytes);
 
                if (type == FS_DATA_READ_IO)
-                       sbi->rw_iostat[FS_CDATA_READ_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, FS_CDATA_READ_IO, io_bytes);
 
                if (type == FS_DATA_IO)
-                       sbi->rw_iostat[FS_CDATA_IO] += io_bytes;
+                       __f2fs_update_iostat(sbi, FS_CDATA_IO, io_bytes);
        }
 #endif
 
@@ -214,49 +209,48 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
 }
 
 static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
-                               int rw, bool is_sync)
+                               enum iostat_lat_type lat_type)
 {
        unsigned long ts_diff;
-       unsigned int iotype = iostat_ctx->type;
+       unsigned int page_type = iostat_ctx->type;
        struct f2fs_sb_info *sbi = iostat_ctx->sbi;
        struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
-       int idx;
        unsigned long flags;
 
        if (!sbi->iostat_enable)
                return;
 
        ts_diff = jiffies - iostat_ctx->submit_ts;
-       if (iotype >= META_FLUSH)
-               iotype = META;
-
-       if (rw == 0) {
-               idx = READ_IO;
-       } else {
-               if (is_sync)
-                       idx = WRITE_SYNC_IO;
-               else
-                       idx = WRITE_ASYNC_IO;
+       if (page_type == META_FLUSH) {
+               page_type = META;
+       } else if (page_type >= NR_PAGE_TYPE) {
+               f2fs_warn(sbi, "%s: %d over NR_PAGE_TYPE", __func__, page_type);
+               return;
        }
 
        spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
-       io_lat->sum_lat[idx][iotype] += ts_diff;
-       io_lat->bio_cnt[idx][iotype]++;
-       if (ts_diff > io_lat->peak_lat[idx][iotype])
-               io_lat->peak_lat[idx][iotype] = ts_diff;
+       io_lat->sum_lat[lat_type][page_type] += ts_diff;
+       io_lat->bio_cnt[lat_type][page_type]++;
+       if (ts_diff > io_lat->peak_lat[lat_type][page_type])
+               io_lat->peak_lat[lat_type][page_type] = ts_diff;
        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
 }
 
-void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
+void iostat_update_and_unbind_ctx(struct bio *bio)
 {
        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
-       bool is_sync = bio->bi_opf & REQ_SYNC;
+       enum iostat_lat_type lat_type;
 
-       if (rw == 0)
-               bio->bi_private = iostat_ctx->post_read_ctx;
-       else
+       if (op_is_write(bio_op(bio))) {
+               lat_type = bio->bi_opf & REQ_SYNC ?
+                               WRITE_SYNC_IO : WRITE_ASYNC_IO;
                bio->bi_private = iostat_ctx->sbi;
-       __update_iostat_latency(iostat_ctx, rw, is_sync);
+       } else {
+               lat_type = READ_IO;
+               bio->bi_private = iostat_ctx->post_read_ctx;
+       }
+
+       __update_iostat_latency(iostat_ctx, lat_type);
        mempool_free(iostat_ctx, bio_iostat_ctx_pool);
 }
 
index 2c048307b6e0b5ebba099872a046f81c0f8fe87a..eb99d05cf27273be29a53078788c4ba3dd4c9a85 100644 (file)
@@ -8,20 +8,21 @@
 
 struct bio_post_read_ctx;
 
+enum iostat_lat_type {
+       READ_IO = 0,
+       WRITE_SYNC_IO,
+       WRITE_ASYNC_IO,
+       MAX_IO_TYPE,
+};
+
 #ifdef CONFIG_F2FS_IOSTAT
 
+#define NUM_PREALLOC_IOSTAT_CTXS       128
 #define DEFAULT_IOSTAT_PERIOD_MS       3000
 #define MIN_IOSTAT_PERIOD_MS           100
 /* maximum period of iostat tracing is 1 day */
 #define MAX_IOSTAT_PERIOD_MS           8640000
 
-enum {
-       READ_IO,
-       WRITE_SYNC_IO,
-       WRITE_ASYNC_IO,
-       MAX_IO_TYPE,
-};
-
 struct iostat_lat_info {
        unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE];       /* sum of io latencies */
        unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE];      /* peak io latency */
@@ -57,7 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
        return iostat_ctx->post_read_ctx;
 }
 
-extern void iostat_update_and_unbind_ctx(struct bio *bio, int rw);
+extern void iostat_update_and_unbind_ctx(struct bio *bio);
 extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
                struct bio *bio, struct bio_post_read_ctx *ctx);
 extern int f2fs_init_iostat_processing(void);
@@ -67,7 +68,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
 #else
 static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
                enum iostat_type type, unsigned long long io_bytes) {}
-static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
+static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
 static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
                struct bio *bio, struct bio_post_read_ctx *ctx) {}
 static inline void iostat_update_submit_ctx(struct bio *bio,
index d8e01bbbf27fb4a6a63b89897570872d5786c6de..11fc4c8036a9df064562e9d68065912a381a6c87 100644 (file)
@@ -926,9 +926,6 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 static int f2fs_create_whiteout(struct mnt_idmap *idmap,
                                struct inode *dir, struct inode **whiteout)
 {
-       if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
-               return -EIO;
-
        return __f2fs_tmpfile(idmap, dir, NULL,
                                S_IFCHR | WHITEOUT_MODE, true, whiteout);
 }
@@ -966,7 +963,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
        /*
         * If new_inode is null, the below renaming flow will
-        * add a link in old_dir which can conver inline_dir.
+        * add a link in old_dir which can convert inline_dir.
         * After then, if we failed to get the entry due to other
         * reasons like ENOMEM, we had to remove the new entry.
         * Instead of adding such the error handling routine, let's
index cf997356d9f9294379ba9daf0d660ebd5e144c2a..bd1dad5237967686a3d5359423c5e43157b3abe7 100644 (file)
@@ -1587,7 +1587,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
                .op_flags = wbc_to_write_flags(wbc),
                .page = page,
                .encrypted_page = NULL,
-               .submitted = false,
+               .submitted = 0,
                .io_type = io_type,
                .io_wbc = wbc,
        };
@@ -1651,7 +1651,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        }
 
        set_page_writeback(page);
-       ClearPageError(page);
 
        fio.old_blkaddr = ni.blk_addr;
        f2fs_do_write_node_page(nid, &fio);
@@ -2083,8 +2082,6 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
                spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
 
                f2fs_wait_on_page_writeback(page, NODE, true, false);
-               if (TestClearPageError(page))
-                       ret = -EIO;
 
                put_page(page);
 
@@ -2548,10 +2545,8 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct free_nid *i = NULL;
 retry:
-       if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
-               f2fs_show_injection_info(sbi, FAULT_ALLOC_NID);
+       if (time_to_inject(sbi, FAULT_ALLOC_NID))
                return false;
-       }
 
        spin_lock(&nm_i->nid_list_lock);
 
index ae3c4e5474efa2d2869bddbb03da4b8da42bc8b4..227e258361734f0f2e78dbe0e301ed82062d0005 100644 (file)
@@ -192,18 +192,18 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
        if (!f2fs_is_atomic_file(inode))
                return;
 
-       clear_inode_flag(fi->cow_inode, FI_COW_FILE);
-       iput(fi->cow_inode);
-       fi->cow_inode = NULL;
        release_atomic_write_cnt(inode);
        clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
        clear_inode_flag(inode, FI_ATOMIC_REPLACE);
        clear_inode_flag(inode, FI_ATOMIC_FILE);
        stat_dec_atomic_inode(inode);
 
+       F2FS_I(inode)->atomic_write_task = NULL;
+
        if (clean) {
                truncate_inode_pages_final(inode->i_mapping);
                f2fs_i_size_write(inode, fi->original_i_size);
+               fi->original_i_size = 0;
        }
 }
 
@@ -255,6 +255,9 @@ retry:
        }
 
        f2fs_put_dnode(&dn);
+
+       trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
+                                       index, *old_addr, new_addr, recover);
        return 0;
 }
 
@@ -262,19 +265,24 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head,
                                        bool revoke)
 {
        struct revoke_entry *cur, *tmp;
+       pgoff_t start_index = 0;
        bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
 
        list_for_each_entry_safe(cur, tmp, head, list) {
-               if (revoke)
+               if (revoke) {
                        __replace_atomic_write_block(inode, cur->index,
                                                cur->old_addr, NULL, true);
+               } else if (truncate) {
+                       f2fs_truncate_hole(inode, start_index, cur->index);
+                       start_index = cur->index + 1;
+               }
 
                list_del(&cur->list);
                kmem_cache_free(revoke_entry_slab, cur);
        }
 
        if (!revoke && truncate)
-               f2fs_do_truncate_blocks(inode, 0, false);
+               f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
 }
 
 static int __f2fs_commit_atomic_write(struct inode *inode)
@@ -384,10 +392,8 @@ int f2fs_commit_atomic_write(struct inode *inode)
  */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
-       if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
-               f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
+       if (time_to_inject(sbi, FAULT_CHECKPOINT))
                f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
-       }
 
        /* balance_fs_bg is able to be pending */
        if (need && excess_cached_nats(sbi))
@@ -508,6 +514,8 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
 
        trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
                                test_opt(sbi, FLUSH_MERGE), ret);
+       if (!ret)
+               f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
        return ret;
 }
 
@@ -1059,7 +1067,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
        dpolicy->granularity = granularity;
 
        dpolicy->max_requests = dcc->max_discard_request;
-       dpolicy->io_aware_gran = MAX_PLIST_NUM;
+       dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
        dpolicy->timeout = false;
 
        if (discard_type == DPOLICY_BG) {
@@ -1095,9 +1103,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
                                block_t start, block_t len);
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
-                                               struct discard_policy *dpolicy,
-                                               struct discard_cmd *dc,
-                                               unsigned int *issued)
+                               struct discard_policy *dpolicy,
+                               struct discard_cmd *dc, int *issued)
 {
        struct block_device *bdev = dc->bdev;
        unsigned int max_discard_blocks =
@@ -1141,7 +1148,6 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
                dc->len += len;
 
                if (time_to_inject(sbi, FAULT_DISCARD)) {
-                       f2fs_show_injection_info(sbi, FAULT_DISCARD);
                        err = -EIO;
                } else {
                        err = __blkdev_issue_discard(bdev,
@@ -1186,7 +1192,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
 
                atomic_inc(&dcc->issued_discard);
 
-               f2fs_update_iostat(sbi, NULL, FS_DISCARD, len * F2FS_BLKSIZE);
+               f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
 
                lstart += len;
                start += len;
@@ -1378,8 +1384,8 @@ static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
        mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
 }
 
-static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
-                                       struct discard_policy *dpolicy)
+static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+               struct discard_policy *dpolicy, int *issued)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
@@ -1387,7 +1393,6 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
        struct discard_cmd *dc;
        struct blk_plug plug;
        unsigned int pos = dcc->next_pos;
-       unsigned int issued = 0;
        bool io_interrupted = false;
 
        mutex_lock(&dcc->cmd_lock);
@@ -1414,9 +1419,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
                }
 
                dcc->next_pos = dc->lstart + dc->len;
-               err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+               err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
 
-               if (issued >= dpolicy->max_requests)
+               if (*issued >= dpolicy->max_requests)
                        break;
 next:
                node = rb_next(&dc->rb_node);
@@ -1432,10 +1437,8 @@ next:
 
        mutex_unlock(&dcc->cmd_lock);
 
-       if (!issued && io_interrupted)
-               issued = -1;
-
-       return issued;
+       if (!(*issued) && io_interrupted)
+               *issued = -1;
 }
 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
                                        struct discard_policy *dpolicy);
@@ -1463,8 +1466,10 @@ retry:
                if (i + 1 < dpolicy->granularity)
                        break;
 
-               if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered)
-                       return __issue_discard_cmd_orderly(sbi, dpolicy);
+               if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
+                       __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
+                       return issued;
+               }
 
                pend_list = &dcc->pend_list[i];
 
@@ -1609,9 +1614,9 @@ static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
                return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
 
        /* wait all */
-       __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
+       __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
        discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
-       __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
+       __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
        discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
 
        return discard_blks;
@@ -1653,7 +1658,14 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
        }
 }
 
-/* This comes from f2fs_put_super */
+/**
+ * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
+ * @sbi: the f2fs_sb_info data for discard cmd to issue
+ *
+ * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
+ *
+ * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
+ */
 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -1661,7 +1673,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
        bool dropped;
 
        if (!atomic_read(&dcc->discard_cmd_cnt))
-               return false;
+               return true;
 
        __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
                                        dcc->discard_granularity);
@@ -1672,7 +1684,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
        __wait_all_discard_cmd(sbi, NULL);
 
        f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
-       return dropped;
+       return !dropped;
 }
 
 static int issue_discard_thread(void *data)
@@ -1694,13 +1706,14 @@ static int issue_discard_thread(void *data)
 
                if (sbi->gc_mode == GC_URGENT_HIGH ||
                        !f2fs_available_free_memory(sbi, DISCARD_CACHE))
-                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
+                                               MIN_DISCARD_GRANULARITY);
                else
                        __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
                                                dcc->discard_granularity);
 
                if (dcc->discard_wake)
-                       dcc->discard_wake = 0;
+                       dcc->discard_wake = false;
 
                /* clean up pending candidates before going to sleep */
                if (atomic_read(&dcc->queued_discard))
@@ -2065,6 +2078,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        if (!dcc)
                return -ENOMEM;
 
+       dcc->discard_io_aware_gran = MAX_PLIST_NUM;
        dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
        dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
        if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
@@ -2327,17 +2341,13 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
        return is_cp;
 }
 
-/*
- * This function should be resided under the curseg_mutex lock
- */
-static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
-                                       struct f2fs_summary *sum)
+static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
-       void *addr = curseg->sum_blk;
 
-       addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
-       memcpy(addr, sum, sizeof(struct f2fs_summary));
+       if (sbi->ckpt->alloc_type[type] == SSR)
+               return sbi->blocks_per_seg;
+       return curseg->next_blkoff;
 }
 
 /*
@@ -2349,15 +2359,11 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
        int i, sum_in_page;
 
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
-               if (sbi->ckpt->alloc_type[i] == SSR)
-                       valid_sum_count += sbi->blocks_per_seg;
-               else {
-                       if (for_ra)
-                               valid_sum_count += le16_to_cpu(
-                                       F2FS_CKPT(sbi)->cur_data_blkoff[i]);
-                       else
-                               valid_sum_count += curseg_blkoff(sbi, i);
-               }
+               if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
+                       valid_sum_count +=
+                               le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
+               else
+                       valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
        }
 
        sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
@@ -2628,30 +2634,10 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
        return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
 }
 
-/*
- * If a segment is written by LFS manner, next block offset is just obtained
- * by increasing the current block offset. However, if a segment is written by
- * SSR manner, next block offset obtained by calling __next_free_blkoff
- */
-static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
-                               struct curseg_info *seg)
+static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
+               struct curseg_info *seg)
 {
-       if (seg->alloc_type == SSR) {
-               seg->next_blkoff =
-                       __next_free_blkoff(sbi, seg->segno,
-                                               seg->next_blkoff + 1);
-       } else {
-               seg->next_blkoff++;
-               if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
-                       /* To allocate block chunks in different sizes, use random number */
-                       if (--seg->fragment_remained_chunk <= 0) {
-                               seg->fragment_remained_chunk =
-                                  get_random_u32_inclusive(1, sbi->max_fragment_chunk);
-                               seg->next_blkoff +=
-                                  get_random_u32_inclusive(1, sbi->max_fragment_hole);
-                       }
-               }
-       }
+       return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
 }
 
 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
@@ -2909,33 +2895,23 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
        struct curseg_info *curseg = CURSEG_I(sbi, type);
        unsigned int old_segno;
 
-       if (!curseg->inited)
-               goto alloc;
-
-       if (force || curseg->next_blkoff ||
-               get_valid_blocks(sbi, curseg->segno, new_sec))
-               goto alloc;
-
-       if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+       if (!force && curseg->inited &&
+           !curseg->next_blkoff &&
+           !get_valid_blocks(sbi, curseg->segno, new_sec) &&
+           !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
                return;
-alloc:
+
        old_segno = curseg->segno;
        new_curseg(sbi, type, true);
        stat_inc_seg_type(sbi, curseg);
        locate_dirty_segment(sbi, old_segno);
 }
 
-static void __allocate_new_section(struct f2fs_sb_info *sbi,
-                                               int type, bool force)
-{
-       __allocate_new_segment(sbi, type, true, force);
-}
-
 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
 {
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
        down_write(&SIT_I(sbi)->sentry_lock);
-       __allocate_new_section(sbi, type, force);
+       __allocate_new_segment(sbi, type, true, force);
        up_write(&SIT_I(sbi)->sentry_lock);
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
@@ -3113,13 +3089,6 @@ out:
        return err;
 }
 
-static bool __has_curseg_space(struct f2fs_sb_info *sbi,
-                                       struct curseg_info *curseg)
-{
-       return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
-                                                       curseg->segno);
-}
-
 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
 {
        switch (hint) {
@@ -3238,6 +3207,19 @@ static int __get_segment_type(struct f2fs_io_info *fio)
        return type;
 }
 
+static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
+               struct curseg_info *seg)
+{
+       /* To allocate block chunks in different sizes, use random number */
+       if (--seg->fragment_remained_chunk > 0)
+               return;
+
+       seg->fragment_remained_chunk =
+               get_random_u32_inclusive(1, sbi->max_fragment_chunk);
+       seg->next_blkoff +=
+               get_random_u32_inclusive(1, sbi->max_fragment_hole);
+}
+
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                block_t old_blkaddr, block_t *new_blkaddr,
                struct f2fs_summary *sum, int type,
@@ -3248,6 +3230,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        unsigned long long old_mtime;
        bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
        struct seg_entry *se = NULL;
+       bool segment_full = false;
 
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
 
@@ -3266,15 +3249,16 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
        f2fs_wait_discard_bio(sbi, *new_blkaddr);
 
-       /*
-        * __add_sum_entry should be resided under the curseg_mutex
-        * because, this function updates a summary entry in the
-        * current summary block.
-        */
-       __add_sum_entry(sbi, type, sum);
-
-       __refresh_next_blkoff(sbi, curseg);
-
+       curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
+       if (curseg->alloc_type == SSR) {
+               curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
+       } else {
+               curseg->next_blkoff++;
+               if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+                       f2fs_randomize_chunk(sbi, curseg);
+       }
+       if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
+               segment_full = true;
        stat_inc_block_count(sbi, curseg);
 
        if (from_gc) {
@@ -3293,10 +3277,11 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
                update_sit_entry(sbi, old_blkaddr, -1);
 
-       if (!__has_curseg_space(sbi, curseg)) {
-               /*
-                * Flush out current segment and replace it with new segment.
-                */
+       /*
+        * If the current segment is full, flush it out and replace it with a
+        * new segment.
+        */
+       if (segment_full) {
                if (from_gc) {
                        get_atssr_segment(sbi, type, se->type,
                                                AT_SSR, se->mtime);
@@ -3331,10 +3316,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                struct f2fs_bio_info *io;
 
                if (F2FS_IO_ALIGNED(sbi))
-                       fio->retry = false;
+                       fio->retry = 0;
 
                INIT_LIST_HEAD(&fio->list);
-               fio->in_list = true;
+               fio->in_list = 1;
                io = sbi->write_io[fio->type] + fio->temp;
                spin_lock(&io->io_lock);
                list_add_tail(&fio->list, &io->io_list);
@@ -3415,14 +3400,13 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
                .new_blkaddr = page->index,
                .page = page,
                .encrypted_page = NULL,
-               .in_list = false,
+               .in_list = 0,
        };
 
        if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
                fio.op_flags &= ~REQ_META;
 
        set_page_writeback(page);
-       ClearPageError(page);
        f2fs_submit_page_write(&fio);
 
        stat_inc_meta_count(sbi, page->index);
@@ -3487,7 +3471,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
 
        stat_inc_inplace_blocks(fio->sbi);
 
-       if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
+       if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
                err = f2fs_merge_page_bio(fio);
        else
                err = f2fs_submit_page_bio(fio);
@@ -3576,7 +3560,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        }
 
        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
-       __add_sum_entry(sbi, type, sum);
+       curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
 
        if (!recover_curseg || recover_newaddr) {
                if (!from_gc)
@@ -3634,7 +3618,7 @@ void f2fs_wait_on_page_writeback(struct page *page,
 
                /* submit cached LFS IO */
                f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
-               /* sbumit cached IPU IO */
+               /* submit cached IPU IO */
                f2fs_submit_merged_ipu_write(sbi, NULL, page);
                if (ordered) {
                        wait_on_page_writeback(page);
@@ -3885,15 +3869,8 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
 
        /* Step 3: write summary entries */
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
-               unsigned short blkoff;
-
                seg_i = CURSEG_I(sbi, i);
-               if (sbi->ckpt->alloc_type[i] == SSR)
-                       blkoff = sbi->blocks_per_seg;
-               else
-                       blkoff = curseg_blkoff(sbi, i);
-
-               for (j = 0; j < blkoff; j++) {
+               for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
                        if (!page) {
                                page = f2fs_grab_meta_page(sbi, blkaddr++);
                                kaddr = (unsigned char *)page_address(page);
@@ -5126,7 +5103,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
                sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
 
        if (!f2fs_lfs_mode(sbi))
-               sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
+               sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
        sm_info->min_seq_blocks = sbi->blocks_per_seg;
index 3ad1b7b6fa9467430fbfeaa1a66c51259974e489..efdb7fc3b7975b1184812afb15e5751beea7b22c 100644 (file)
@@ -670,6 +670,9 @@ static inline int utilization(struct f2fs_sb_info *sbi)
 
 #define SMALL_VOLUME_SEGMENTS  (16 * 512)      /* 16GB */
 
+#define F2FS_IPU_DISABLE       0
+
+/* Modification on enum should be synchronized with ipu_mode_names array */
 enum {
        F2FS_IPU_FORCE,
        F2FS_IPU_SSR,
@@ -679,8 +682,29 @@ enum {
        F2FS_IPU_ASYNC,
        F2FS_IPU_NOCACHE,
        F2FS_IPU_HONOR_OPU_WRITE,
+       F2FS_IPU_MAX,
 };
 
+static inline bool IS_F2FS_IPU_DISABLE(struct f2fs_sb_info *sbi)
+{
+       return SM_I(sbi)->ipu_policy == F2FS_IPU_DISABLE;
+}
+
+#define F2FS_IPU_POLICY(name)                                  \
+static inline bool IS_##name(struct f2fs_sb_info *sbi)         \
+{                                                              \
+       return SM_I(sbi)->ipu_policy & BIT(name);               \
+}
+
+F2FS_IPU_POLICY(F2FS_IPU_FORCE);
+F2FS_IPU_POLICY(F2FS_IPU_SSR);
+F2FS_IPU_POLICY(F2FS_IPU_UTIL);
+F2FS_IPU_POLICY(F2FS_IPU_SSR_UTIL);
+F2FS_IPU_POLICY(F2FS_IPU_FSYNC);
+F2FS_IPU_POLICY(F2FS_IPU_ASYNC);
+F2FS_IPU_POLICY(F2FS_IPU_NOCACHE);
+F2FS_IPU_POLICY(F2FS_IPU_HONOR_OPU_WRITE);
+
 static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
                int type)
 {
@@ -695,15 +719,10 @@ static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi,
        return curseg->alloc_type;
 }
 
-static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
-{
-       struct curseg_info *curseg = CURSEG_I(sbi, type);
-       return curseg->next_blkoff;
-}
-
-static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+static inline bool valid_main_segno(struct f2fs_sb_info *sbi,
+               unsigned int segno)
 {
-       f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
+       return segno <= (MAIN_SEGS(sbi) - 1);
 }
 
 static inline void verify_fio_blkaddr(struct f2fs_io_info *fio)
@@ -758,7 +777,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
 
        /* check segment usage, and check boundary of a given segment number */
        if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
-                                       || segno > TOTAL_SEGS(sbi) - 1)) {
+                                       || !valid_main_segno(sbi, segno))) {
                f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
                         GET_SIT_VBLOCKS(raw_sit), segno);
                set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -775,7 +794,7 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
        unsigned int offset = SIT_BLOCK_OFFSET(start);
        block_t blk_addr = sit_i->sit_base_addr + offset;
 
-       check_seg_range(sbi, start);
+       f2fs_bug_on(sbi, !valid_main_segno(sbi, start));
 
 #ifdef CONFIG_F2FS_CHECK_FS
        if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
@@ -924,6 +943,6 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
        if (!wakeup || !is_idle(sbi, DISCARD_TIME))
                return;
 wake_up:
-       dcc->discard_wake = 1;
+       dcc->discard_wake = true;
        wake_up_interruptible_all(&dcc->discard_wait_queue);
 }
index 64d3556d61a55440027990068fd72f0baa0791c7..fbaaabbcd6de7aec74e20eb5940c762b76c918d6 100644 (file)
@@ -1288,19 +1288,18 @@ default_check:
         * zone alignment optimization. This is optional for host-aware
         * devices, but mandatory for host-managed zoned block devices.
         */
-#ifndef CONFIG_BLK_DEV_ZONED
-       if (f2fs_sb_has_blkzoned(sbi)) {
-               f2fs_err(sbi, "Zoned block device support is not enabled");
-               return -EINVAL;
-       }
-#endif
        if (f2fs_sb_has_blkzoned(sbi)) {
+#ifdef CONFIG_BLK_DEV_ZONED
                if (F2FS_OPTION(sbi).discard_unit !=
                                                DISCARD_UNIT_SECTION) {
                        f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
                        F2FS_OPTION(sbi).discard_unit =
                                        DISCARD_UNIT_SECTION;
                }
+#else
+               f2fs_err(sbi, "Zoned block device support is not enabled");
+               return -EINVAL;
+#endif
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -1341,12 +1340,12 @@ default_check:
        }
 
        if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) {
-               f2fs_err(sbi, "LFS not compatible with checkpoint=disable");
+               f2fs_err(sbi, "LFS is not compatible with checkpoint=disable");
                return -EINVAL;
        }
 
        if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
-               f2fs_err(sbi, "LFS not compatible with ATGC");
+               f2fs_err(sbi, "LFS is not compatible with ATGC");
                return -EINVAL;
        }
 
@@ -1366,10 +1365,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 {
        struct f2fs_inode_info *fi;
 
-       if (time_to_inject(F2FS_SB(sb), FAULT_SLAB_ALLOC)) {
-               f2fs_show_injection_info(F2FS_SB(sb), FAULT_SLAB_ALLOC);
+       if (time_to_inject(F2FS_SB(sb), FAULT_SLAB_ALLOC))
                return NULL;
-       }
 
        fi = alloc_inode_sb(sb, f2fs_inode_cachep, GFP_F2FS_ZERO);
        if (!fi)
@@ -1424,8 +1421,6 @@ static int f2fs_drop_inode(struct inode *inode)
                        atomic_inc(&inode->i_count);
                        spin_unlock(&inode->i_lock);
 
-                       f2fs_abort_atomic_write(inode, true);
-
                        /* should remain fi->extent_tree for writepage */
                        f2fs_destroy_extent_node(inode);
 
@@ -1543,7 +1538,7 @@ static void f2fs_put_super(struct super_block *sb)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        int i;
-       bool dropped;
+       bool done;
 
        /* unregister procfs/sysfs entries in advance to avoid race case */
        f2fs_unregister_sysfs(sbi);
@@ -1573,9 +1568,8 @@ static void f2fs_put_super(struct super_block *sb)
        }
 
        /* be sure to wait for any on-going discard commands */
-       dropped = f2fs_issue_discard_timeout(sbi);
-
-       if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && !dropped) {
+       done = f2fs_issue_discard_timeout(sbi);
+       if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && done) {
                struct cp_control cpc = {
                        .reason = CP_UMOUNT | CP_TRIMMED,
                };
@@ -1900,15 +1894,24 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 
        if (test_opt(sbi, GC_MERGE))
                seq_puts(seq, ",gc_merge");
+       else
+               seq_puts(seq, ",nogc_merge");
 
        if (test_opt(sbi, DISABLE_ROLL_FORWARD))
                seq_puts(seq, ",disable_roll_forward");
        if (test_opt(sbi, NORECOVERY))
                seq_puts(seq, ",norecovery");
-       if (test_opt(sbi, DISCARD))
+       if (test_opt(sbi, DISCARD)) {
                seq_puts(seq, ",discard");
-       else
+               if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+                       seq_printf(seq, ",discard_unit=%s", "block");
+               else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+                       seq_printf(seq, ",discard_unit=%s", "segment");
+               else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+                       seq_printf(seq, ",discard_unit=%s", "section");
+       } else {
                seq_puts(seq, ",nodiscard");
+       }
        if (test_opt(sbi, NOHEAP))
                seq_puts(seq, ",no_heap");
        else
@@ -2032,13 +2035,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        if (test_opt(sbi, ATGC))
                seq_puts(seq, ",atgc");
 
-       if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
-               seq_printf(seq, ",discard_unit=%s", "block");
-       else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
-               seq_printf(seq, ",discard_unit=%s", "segment");
-       else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
-               seq_printf(seq, ",discard_unit=%s", "section");
-
        if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_NORMAL)
                seq_printf(seq, ",memory=%s", "normal");
        else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
@@ -2300,6 +2296,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                }
        }
 #endif
+       if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) {
+               err = -EINVAL;
+               f2fs_warn(sbi, "LFS is not compatible with IPU");
+               goto restore_opts;
+       }
+
        /* disallow enable atgc dynamically */
        if (no_atgc == !!test_opt(sbi, ATGC)) {
                err = -EINVAL;
@@ -2589,10 +2591,8 @@ retry:
 
 int f2fs_dquot_initialize(struct inode *inode)
 {
-       if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) {
-               f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT);
+       if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT))
                return -ESRCH;
-       }
 
        return dquot_initialize(inode);
 }
@@ -4083,8 +4083,9 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
                if (f2fs_block_unit_discard(sbi))
                        SM_I(sbi)->dcc_info->discard_granularity =
                                                MIN_DISCARD_GRANULARITY;
-               SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE |
-                                       1 << F2FS_IPU_HONOR_OPU_WRITE;
+               if (!f2fs_lfs_mode(sbi))
+                       SM_I(sbi)->ipu_policy = BIT(F2FS_IPU_FORCE) |
+                                               BIT(F2FS_IPU_HONOR_OPU_WRITE);
        }
 
        sbi->readdir_ra = true;
index 83a366f3ee80ec493cfb36e1c19977387aa7f460..0b19163c90d412297aa08e62c904772bba2de81d 100644 (file)
@@ -473,6 +473,17 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "discard_io_aware_gran")) {
+               if (t > MAX_PLIST_NUM)
+                       return -EINVAL;
+               if (!f2fs_block_unit_discard(sbi))
+                       return -EINVAL;
+               if (t == *ui)
+                       return count;
+               *ui = t;
+               return count;
+       }
+
        if (!strcmp(a->attr.name, "discard_granularity")) {
                if (t == 0 || t > MAX_PLIST_NUM)
                        return -EINVAL;
@@ -511,7 +522,7 @@ out:
                } else if (t == 1) {
                        sbi->gc_mode = GC_URGENT_HIGH;
                        if (sbi->gc_thread) {
-                               sbi->gc_thread->gc_wake = 1;
+                               sbi->gc_thread->gc_wake = true;
                                wake_up_interruptible_all(
                                        &sbi->gc_thread->gc_wait_queue_head);
                                wake_up_discard_thread(sbi, true);
@@ -521,7 +532,7 @@ out:
                } else if (t == 3) {
                        sbi->gc_mode = GC_URGENT_MID;
                        if (sbi->gc_thread) {
-                               sbi->gc_thread->gc_wake = 1;
+                               sbi->gc_thread->gc_wake = true;
                                wake_up_interruptible_all(
                                        &sbi->gc_thread->gc_wait_queue_head);
                        }
@@ -678,7 +689,16 @@ out:
        }
 
        if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
-               if (t == 0 || t <= sbi->hot_data_age_threshold)
+               if (t <= sbi->hot_data_age_threshold)
+                       return -EINVAL;
+               if (t == *ui)
+                       return count;
+               *ui = (unsigned int)t;
+               return count;
+       }
+
+       if (!strcmp(a->attr.name, "last_age_weight")) {
+               if (t > 100)
                        return -EINVAL;
                if (t == *ui)
                        return count;
@@ -686,6 +706,15 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "ipu_policy")) {
+               if (t >= BIT(F2FS_IPU_MAX))
+                       return -EINVAL;
+               if (t && f2fs_lfs_mode(sbi))
+                       return -EINVAL;
+               SM_I(sbi)->ipu_policy = (unsigned int)t;
+               return count;
+       }
+
        *ui = (unsigned int)t;
 
        return count;
@@ -825,6 +854,7 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_req
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_io_aware_gran, discard_io_aware_gran);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard);
@@ -944,6 +974,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block)
 /* For block age extent cache */
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, last_age_weight, last_age_weight);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -960,6 +991,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(min_discard_issue_time),
        ATTR_LIST(mid_discard_issue_time),
        ATTR_LIST(max_discard_issue_time),
+       ATTR_LIST(discard_io_aware_gran),
        ATTR_LIST(discard_urgent_util),
        ATTR_LIST(discard_granularity),
        ATTR_LIST(max_ordered_discard),
@@ -1042,6 +1074,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(revoked_atomic_block),
        ATTR_LIST(hot_data_age_threshold),
        ATTR_LIST(warm_data_age_threshold),
+       ATTR_LIST(last_age_weight),
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
@@ -1129,13 +1162,13 @@ static const struct sysfs_ops f2fs_attr_ops = {
        .store  = f2fs_attr_store,
 };
 
-static struct kobj_type f2fs_sb_ktype = {
+static const struct kobj_type f2fs_sb_ktype = {
        .default_groups = f2fs_groups,
        .sysfs_ops      = &f2fs_attr_ops,
        .release        = f2fs_sb_release,
 };
 
-static struct kobj_type f2fs_ktype = {
+static const struct kobj_type f2fs_ktype = {
        .sysfs_ops      = &f2fs_attr_ops,
 };
 
@@ -1143,7 +1176,7 @@ static struct kset f2fs_kset = {
        .kobj   = {.ktype = &f2fs_ktype},
 };
 
-static struct kobj_type f2fs_feat_ktype = {
+static const struct kobj_type f2fs_feat_ktype = {
        .default_groups = f2fs_feat_groups,
        .sysfs_ops      = &f2fs_attr_ops,
 };
@@ -1184,7 +1217,7 @@ static const struct sysfs_ops f2fs_stat_attr_ops = {
        .store  = f2fs_stat_attr_store,
 };
 
-static struct kobj_type f2fs_stat_ktype = {
+static const struct kobj_type f2fs_stat_ktype = {
        .default_groups = f2fs_stat_groups,
        .sysfs_ops      = &f2fs_stat_attr_ops,
        .release        = f2fs_stat_kobj_release,
@@ -1211,7 +1244,7 @@ static const struct sysfs_ops f2fs_feature_list_attr_ops = {
        .show   = f2fs_sb_feat_attr_show,
 };
 
-static struct kobj_type f2fs_feature_list_ktype = {
+static const struct kobj_type f2fs_feature_list_ktype = {
        .default_groups = f2fs_sb_feat_groups,
        .sysfs_ops      = &f2fs_feature_list_attr_ops,
        .release        = f2fs_feature_list_kobj_release,
index f320ed8172ecd49d10125427c93a3ddf738c3227..4fc95f353a7a96c6cc67c6ea5dfabea0cc591e9c 100644 (file)
@@ -81,7 +81,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
                size_t n = min_t(size_t, count,
                                 PAGE_SIZE - offset_in_page(pos));
                struct page *page;
-               void *fsdata;
+               void *fsdata = NULL;
                int res;
 
                res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);
index 122ed89ebf9f22901a7580a0470e5a0d8636cc0b..1986b4f18a9013ee27f056b7c871df215f05f862 100644 (file)
@@ -295,11 +295,11 @@ static void hfsplus_put_super(struct super_block *sb)
                hfsplus_sync_fs(sb, 1);
        }
 
+       iput(sbi->alloc_file);
+       iput(sbi->hidden_dir);
        hfs_btree_close(sbi->attr_tree);
        hfs_btree_close(sbi->cat_tree);
        hfs_btree_close(sbi->ext_tree);
-       iput(sbi->alloc_file);
-       iput(sbi->hidden_dir);
        kfree(sbi->s_vhdr_buf);
        kfree(sbi->s_backup_vhdr_buf);
        unload_nls(sbi->nls);
index c18bb50c31b6f5cf4229cbf5499e169282058c50..28b4f15c19eb03af3b5b598dac01ed70c2cb923e 100644 (file)
@@ -412,7 +412,7 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
        if (page->index >= end_index)
                count = inode->i_size & (PAGE_SIZE-1);
 
-       buffer = kmap(page);
+       buffer = kmap_local_page(page);
 
        err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
        if (err != count) {
@@ -428,9 +428,9 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
        err = 0;
 
  out:
-       kunmap(page);
-
+       kunmap_local(buffer);
        unlock_page(page);
+
        return err;
 }
 
@@ -441,7 +441,7 @@ static int hostfs_read_folio(struct file *file, struct folio *folio)
        loff_t start = page_offset(page);
        int bytes_read, ret = 0;
 
-       buffer = kmap(page);
+       buffer = kmap_local_page(page);
        bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
                        PAGE_SIZE);
        if (bytes_read < 0) {
@@ -458,8 +458,9 @@ static int hostfs_read_folio(struct file *file, struct folio *folio)
 
  out:
        flush_dcache_page(page);
-       kunmap(page);
+       kunmap_local(buffer);
        unlock_page(page);
+
        return ret;
 }
 
@@ -484,9 +485,9 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
        unsigned from = pos & (PAGE_SIZE - 1);
        int err;
 
-       buffer = kmap(page);
+       buffer = kmap_local_page(page);
        err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
-       kunmap(page);
+       kunmap_local(buffer);
 
        if (!PageUptodate(page) && err == PAGE_SIZE)
                SetPageUptodate(page);
index 6a404ac1c178f03652d7c8f8acc1b1191441c9b3..15de1385012eb4c6de0b6886d5029858b0f50684 100644 (file)
@@ -1010,36 +1010,28 @@ repeat:
         * ie. locked but not dirty) or tune2fs (which may actually have
         * the buffer dirtied, ugh.)  */
 
-       if (buffer_dirty(bh)) {
+       if (buffer_dirty(bh) && jh->b_transaction) {
+               warn_dirty_buffer(bh);
                /*
-                * First question: is this buffer already part of the current
-                * transaction or the existing committing transaction?
-                */
-               if (jh->b_transaction) {
-                       J_ASSERT_JH(jh,
-                               jh->b_transaction == transaction ||
-                               jh->b_transaction ==
-                                       journal->j_committing_transaction);
-                       if (jh->b_next_transaction)
-                               J_ASSERT_JH(jh, jh->b_next_transaction ==
-                                                       transaction);
-                       warn_dirty_buffer(bh);
-               }
-               /*
-                * In any case we need to clean the dirty flag and we must
-                * do it under the buffer lock to be sure we don't race
-                * with running write-out.
+                * We need to clean the dirty flag and we must do it under the
+                * buffer lock to be sure we don't race with running write-out.
                 */
                JBUFFER_TRACE(jh, "Journalling dirty buffer");
                clear_buffer_dirty(bh);
+               /*
+                * The buffer is going to be added to BJ_Reserved list now and
+                * nothing guarantees jbd2_journal_dirty_metadata() will be
+                * ever called for it. So we need to set jbddirty bit here to
+                * make sure the buffer is dirtied and written out when the
+                * journaling machinery is done with it.
+                */
                set_buffer_jbddirty(bh);
        }
 
-       unlock_buffer(bh);
-
        error = -EROFS;
        if (is_handle_aborted(handle)) {
                spin_unlock(&jh->b_state_lock);
+               unlock_buffer(bh);
                goto out;
        }
        error = 0;
@@ -1049,8 +1041,10 @@ repeat:
         * b_next_transaction points to it
         */
        if (jh->b_transaction == transaction ||
-           jh->b_next_transaction == transaction)
+           jh->b_next_transaction == transaction) {
+               unlock_buffer(bh);
                goto done;
+       }
 
        /*
         * this is the first time this transaction is touching this buffer,
@@ -1074,10 +1068,24 @@ repeat:
                 */
                smp_wmb();
                spin_lock(&journal->j_list_lock);
+               if (test_clear_buffer_dirty(bh)) {
+                       /*
+                        * Execute buffer dirty clearing and jh->b_transaction
+                        * assignment under journal->j_list_lock locked to
+                        * prevent bh being removed from checkpoint list if
+                        * the buffer is in an intermediate state (not dirty
+                        * and jh->b_transaction is NULL).
+                        */
+                       JBUFFER_TRACE(jh, "Journalling dirty buffer");
+                       set_buffer_jbddirty(bh);
+               }
                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
                spin_unlock(&journal->j_list_lock);
+               unlock_buffer(bh);
                goto done;
        }
+       unlock_buffer(bh);
+
        /*
         * If there is already a copy-out version of this buffer, then we don't
         * need to make another one
index 4849a4c9a0e24f1b8b4202630d68e491eac113dc..764f19dec3f0f04fc3aa5b444f6f6659136c03a4 100644 (file)
@@ -364,20 +364,25 @@ void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig)
 
 int __init jffs2_compressors_init(void)
 {
+       int ret = 0;
 /* Registering compressors */
-#ifdef CONFIG_JFFS2_ZLIB
-       jffs2_zlib_init();
-#endif
-#ifdef CONFIG_JFFS2_RTIME
-       jffs2_rtime_init();
-#endif
-#ifdef CONFIG_JFFS2_RUBIN
-       jffs2_rubinmips_init();
-       jffs2_dynrubin_init();
-#endif
-#ifdef CONFIG_JFFS2_LZO
-       jffs2_lzo_init();
-#endif
+       ret = jffs2_zlib_init();
+       if (ret)
+               goto exit;
+       ret = jffs2_rtime_init();
+       if (ret)
+               goto exit_zlib;
+       ret = jffs2_rubinmips_init();
+       if (ret)
+               goto exit_rtime;
+       ret = jffs2_dynrubin_init();
+       if (ret)
+               goto exit_runinmips;
+       ret = jffs2_lzo_init();
+       if (ret)
+               goto exit_dynrubin;
+
+
 /* Setting default compression mode */
 #ifdef CONFIG_JFFS2_CMODE_NONE
        jffs2_compression_mode = JFFS2_COMPR_MODE_NONE;
@@ -396,23 +401,26 @@ int __init jffs2_compressors_init(void)
 #endif
 #endif
        return 0;
+
+exit_dynrubin:
+       jffs2_dynrubin_exit();
+exit_runinmips:
+       jffs2_rubinmips_exit();
+exit_rtime:
+       jffs2_rtime_exit();
+exit_zlib:
+       jffs2_zlib_exit();
+exit:
+       return ret;
 }
 
 int jffs2_compressors_exit(void)
 {
 /* Unregistering compressors */
-#ifdef CONFIG_JFFS2_LZO
        jffs2_lzo_exit();
-#endif
-#ifdef CONFIG_JFFS2_RUBIN
        jffs2_dynrubin_exit();
        jffs2_rubinmips_exit();
-#endif
-#ifdef CONFIG_JFFS2_RTIME
        jffs2_rtime_exit();
-#endif
-#ifdef CONFIG_JFFS2_ZLIB
        jffs2_zlib_exit();
-#endif
        return 0;
 }
index 5e91d578f4ed858c2ac575f375619000c5c6814c..3716b6b7924c05333cd6f0b63fd7175eaa41b9d2 100644 (file)
@@ -88,18 +88,32 @@ int jffs2_rubinmips_init(void);
 void jffs2_rubinmips_exit(void);
 int jffs2_dynrubin_init(void);
 void jffs2_dynrubin_exit(void);
+#else
+static inline int jffs2_rubinmips_init(void) { return 0; }
+static inline void jffs2_rubinmips_exit(void) {}
+static inline int jffs2_dynrubin_init(void) { return 0; }
+static inline void jffs2_dynrubin_exit(void) {}
 #endif
 #ifdef CONFIG_JFFS2_RTIME
-int jffs2_rtime_init(void);
-void jffs2_rtime_exit(void);
+extern int jffs2_rtime_init(void);
+extern void jffs2_rtime_exit(void);
+#else
+static inline int jffs2_rtime_init(void) { return 0; }
+static inline void jffs2_rtime_exit(void) {}
 #endif
 #ifdef CONFIG_JFFS2_ZLIB
-int jffs2_zlib_init(void);
-void jffs2_zlib_exit(void);
+extern int jffs2_zlib_init(void);
+extern void jffs2_zlib_exit(void);
+#else
+static inline int jffs2_zlib_init(void) { return 0; }
+static inline void jffs2_zlib_exit(void) {}
 #endif
 #ifdef CONFIG_JFFS2_LZO
-int jffs2_lzo_init(void);
-void jffs2_lzo_exit(void);
+extern int jffs2_lzo_init(void);
+extern void jffs2_lzo_exit(void);
+#else
+static inline int jffs2_lzo_init(void) { return 0; }
+static inline void jffs2_lzo_exit(void) {}
 #endif
 
 #endif /* __JFFS2_COMPR_H__ */
index 3cf71befa47546c8c9184e1b675dcee85b570f26..96b0275ce95747f3377dcd6ac0240735048ab70b 100644 (file)
@@ -137,19 +137,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
        pgoff_t index = pos >> PAGE_SHIFT;
-       uint32_t pageofs = index << PAGE_SHIFT;
        int ret = 0;
 
        jffs2_dbg(1, "%s()\n", __func__);
 
-       if (pageofs > inode->i_size) {
-               /* Make new hole frag from old EOF to new page */
+       if (pos > inode->i_size) {
+               /* Make new hole frag from old EOF to new position */
                struct jffs2_raw_inode ri;
                struct jffs2_full_dnode *fn;
                uint32_t alloc_len;
 
-               jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
-                         (unsigned int)inode->i_size, pageofs);
+               jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n",
+                         (unsigned int)inode->i_size, (uint32_t)pos);
 
                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
@@ -169,10 +168,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                ri.mode = cpu_to_jemode(inode->i_mode);
                ri.uid = cpu_to_je16(i_uid_read(inode));
                ri.gid = cpu_to_je16(i_gid_read(inode));
-               ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
+               ri.isize = cpu_to_je32((uint32_t)pos);
                ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
                ri.offset = cpu_to_je32(inode->i_size);
-               ri.dsize = cpu_to_je32(pageofs - inode->i_size);
+               ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size);
                ri.csize = cpu_to_je32(0);
                ri.compr = JFFS2_COMPR_ZERO;
                ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
@@ -202,7 +201,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                        goto out_err;
                }
                jffs2_complete_reservation(c);
-               inode->i_size = pageofs;
+               inode->i_size = pos;
                mutex_unlock(&f->sem);
        }
 
index 09174898efd008d1b9a2939ee48a3a736931aeff..038516bee1abaac6b8acb0ff8652cc12f474e848 100644 (file)
@@ -403,7 +403,7 @@ int jffs2_do_remount_fs(struct super_block *sb, struct fs_context *fc)
        /* We stop if it was running, then restart if it needs to.
           This also catches the case where it was stopped and this
           is just a remount to restart it.
-          Flush the writebuffer, if neccecary, else we loose it */
+          Flush the writebuffer, if necessary, else we loose it */
        if (!sb_rdonly(sb)) {
                jffs2_stop_garbage_collect_thread(c);
                mutex_lock(&c->alloc_sem);
index 765838578a722ca034b151f9ce6596123422752f..a3eb1e82694778e907658fd6ea79f0d8d2eee724 100644 (file)
@@ -193,7 +193,8 @@ int dbMount(struct inode *ipbmap)
        bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
        bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
        bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
-       if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG) {
+       if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
+           bmp->db_agl2size < 0) {
                err = -EINVAL;
                goto err_release_metapage;
        }
index f00d43b8ac0a8c6c51ebfadebb359aa1152fdfbf..e9a45dea748a2494707e851735deeafa1b89fdd2 100644 (file)
@@ -134,7 +134,7 @@ static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
                npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
                sg_max -= npages;
 
-               for (; npages < 0; npages--) {
+               for (; npages > 0; npages--) {
                        struct page *page = *pages;
                        size_t seg = min_t(size_t, PAGE_SIZE - off, len);
 
index 192cad0662d8b942124df25ab0dfcccd3a4401c9..b1e32ec4a9d4188d807c0e3a8d09c7992f9c6eda 100644 (file)
@@ -105,14 +105,6 @@ static int __ocfs2_move_extent(handle_t *handle,
         */
        replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
 
-       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
-                                     context->et.et_root_bh,
-                                     OCFS2_JOURNAL_ACCESS_WRITE);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
-       }
-
        ret = ocfs2_split_extent(handle, &context->et, path, index,
                                 &replace_rec, context->meta_ac,
                                 &context->dealloc);
@@ -121,8 +113,6 @@ static int __ocfs2_move_extent(handle_t *handle,
                goto out;
        }
 
-       ocfs2_journal_dirty(handle, context->et.et_root_bh);
-
        context->new_phys_cpos = new_p_cpos;
 
        /*
@@ -444,7 +434,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
                        bg = (struct ocfs2_group_desc *)gd_bh->b_data;
 
                        if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
-                                               le16_to_cpu(bg->bg_bits))) {
+                                               (le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
 
                                *ret_bh = gd_bh;
                                *vict_bit = (vict_blkno - blkno) >>
@@ -559,6 +549,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
                        last_free_bits++;
 
                if (last_free_bits == move_len) {
+                       i -= move_len;
                        *goal_bit = i;
                        *phys_cpos = base_cpos + i;
                        break;
@@ -1030,18 +1021,19 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
 
        context->range = &range;
 
+       /*
+        * ok, the default theshold for the defragmentation
+        * is 1M, since our maximum clustersize was 1M also.
+        * any thought?
+        */
+       if (!range.me_threshold)
+               range.me_threshold = 1024 * 1024;
+
+       if (range.me_threshold > i_size_read(inode))
+               range.me_threshold = i_size_read(inode);
+
        if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
                context->auto_defrag = 1;
-               /*
-                * ok, the default theshold for the defragmentation
-                * is 1M, since our maximum clustersize was 1M also.
-                * any thought?
-                */
-               if (!range.me_threshold)
-                       range.me_threshold = 1024 * 1024;
-
-               if (range.me_threshold > i_size_read(inode))
-                       range.me_threshold = i_size_read(inode);
 
                if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
                        context->partial = 1;
index 8038cf6525831c9e6f9c0789694e5ae27fb6467d..4401a73d4032d856958a7164b9efa133466688a3 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -368,7 +368,37 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset
  * access() needs to use the real uid/gid, not the effective uid/gid.
  * We do this by temporarily clearing all FS-related capabilities and
  * switching the fsuid/fsgid around to the real ones.
+ *
+ * Creating new credentials is expensive, so we try to skip doing it,
+ * which we can if the result would match what we already got.
  */
+static bool access_need_override_creds(int flags)
+{
+       const struct cred *cred;
+
+       if (flags & AT_EACCESS)
+               return false;
+
+       cred = current_cred();
+       if (!uid_eq(cred->fsuid, cred->uid) ||
+           !gid_eq(cred->fsgid, cred->gid))
+               return true;
+
+       if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+               kuid_t root_uid = make_kuid(cred->user_ns, 0);
+               if (!uid_eq(cred->uid, root_uid)) {
+                       if (!cap_isclear(cred->cap_effective))
+                               return true;
+               } else {
+                       if (!cap_isidentical(cred->cap_effective,
+                           cred->cap_permitted))
+                               return true;
+               }
+       }
+
+       return false;
+}
+
 static const struct cred *access_override_creds(void)
 {
        const struct cred *old_cred;
@@ -378,6 +408,12 @@ static const struct cred *access_override_creds(void)
        if (!override_cred)
                return NULL;
 
+       /*
+        * XXX access_need_override_creds performs checks in hopes of skipping
+        * this work. Make sure it stays in sync if making any changes in this
+        * routine.
+        */
+
        override_cred->fsuid = override_cred->uid;
        override_cred->fsgid = override_cred->gid;
 
@@ -437,7 +473,7 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;
 
-       if (!(flags & AT_EACCESS)) {
+       if (access_need_override_creds(flags)) {
                old_cred = access_override_creds();
                if (!old_cred)
                        return -ENOMEM;
index 49283b8103c7ea28809bc1ea4026272a0eebff59..9b0315d34c58fb3d1780901cd420d02d1d53f269 100644 (file)
@@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 static void render_cap_t(struct seq_file *m, const char *header,
                        kernel_cap_t *a)
 {
-       unsigned __capi;
-
        seq_puts(m, header);
-       CAP_FOR_EACH_U32(__capi) {
-               seq_put_hex_ll(m, NULL,
-                          a->cap[CAP_LAST_U32 - __capi], 8);
-       }
+       seq_put_hex_ll(m, NULL, a->val, 16);
        seq_putc(m, '\n');
 }
 
index e8b9b756f0acaa488ed2c8739a58d05a985213c9..d76eb7b39f56419a703fa28472e1868b97a3c81c 100644 (file)
@@ -209,11 +209,10 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
        subtract_lebs += 1;
 
        /*
-        * The GC journal head LEB is not really accessible. And since
-        * different write types go to different heads, we may count only on
-        * one head's space.
+        * Since different write types go to different heads, we should
+        * reserve one leb for each head.
         */
-       subtract_lebs += c->jhead_cnt - 1;
+       subtract_lebs += c->jhead_cnt;
 
        /* We also reserve one LEB for deletions, which bypass budgeting */
        subtract_lebs += 1;
@@ -400,7 +399,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
        dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
 
        if (req->dirtied_ino)
-               dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
+               dd_growth += c->bi.inode_budget * req->dirtied_ino;
        if (req->mod_dent)
                dd_growth += c->bi.dent_budget;
        dd_growth += req->dirtied_ino_d;
index 1e92c1730c16540cf804fa74299a7b1228eda45e..1505539f6fe974ae73990e57773a8c96628136f4 100644 (file)
@@ -1151,7 +1151,6 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
        int err, sz_change, len = strlen(symname);
        struct fscrypt_str disk_link;
        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
-                                       .new_ino_d = ALIGN(len, 8),
                                        .dirtied_ino = 1 };
        struct fscrypt_name nm;
 
@@ -1167,6 +1166,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
         * Budget request settings: new inode, new direntry and changing parent
         * directory inode.
         */
+       req.new_ino_d = ALIGN(disk_link.len - 1, 8);
        err = ubifs_budget_space(c, &req);
        if (err)
                return err;
@@ -1324,6 +1324,8 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlink) {
                ubifs_assert(c, inode_is_locked(new_inode));
 
+               /* Budget for old inode's data when its nlink > 1. */
+               req.dirtied_ino_d = ALIGN(ubifs_inode(new_inode)->data_len, 8);
                err = ubifs_purge_xattrs(new_inode);
                if (err)
                        return err;
@@ -1566,6 +1568,15 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 
        ubifs_assert(c, fst_inode && snd_inode);
 
+       /*
+        * Budget request settings: changing two direntries, changing the two
+        * parent directory inodes.
+        */
+
+       dbg_gen("dent '%pd' ino %lu in dir ino %lu exchange dent '%pd' ino %lu in dir ino %lu",
+               old_dentry, fst_inode->i_ino, old_dir->i_ino,
+               new_dentry, snd_inode->i_ino, new_dir->i_ino);
+
        err = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &fst_nm);
        if (err)
                return err;
@@ -1576,6 +1587,10 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
                return err;
        }
 
+       err = ubifs_budget_space(c, &req);
+       if (err)
+               goto out;
+
        lock_4_inodes(old_dir, new_dir, NULL, NULL);
 
        time = current_time(old_dir);
@@ -1601,6 +1616,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
        unlock_4_inodes(old_dir, new_dir, NULL, NULL);
        ubifs_release_budget(c, &req);
 
+out:
        fscrypt_free_filename(&fst_nm);
        fscrypt_free_filename(&snd_nm);
        return err;
index 8cb5d76b301ccde611be04b7662acdcbdb1ead82..979ab1d9d0c39ddf56ea9d9f8e1f5c2e4c30a3d3 100644 (file)
@@ -1032,7 +1032,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
                if (page->index >= synced_i_size >> PAGE_SHIFT) {
                        err = inode->i_sb->s_op->write_inode(inode, NULL);
                        if (err)
-                               goto out_unlock;
+                               goto out_redirty;
                        /*
                         * The inode has been written, but the write-buffer has
                         * not been synchronized, so in case of an unclean
@@ -1060,11 +1060,17 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
        if (i_size > synced_i_size) {
                err = inode->i_sb->s_op->write_inode(inode, NULL);
                if (err)
-                       goto out_unlock;
+                       goto out_redirty;
        }
 
        return do_writepage(page, len);
-
+out_redirty:
+       /*
+        * redirty_page_for_writepage() won't call ubifs_dirty_inode() because
+        * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
+        * there is no need to do space budget for dirty inode.
+        */
+       redirty_page_for_writepage(wbc, page);
 out_unlock:
        unlock_page(page);
        return err;
@@ -1466,14 +1472,23 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
        struct inode *inode = folio->mapping->host;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
 
-       /*
-        * An attempt to release a dirty page without budgeting for it - should
-        * not happen.
-        */
        if (folio_test_writeback(folio))
                return false;
+
+       /*
+        * Page is private but not dirty, weird? There is one condition
+        * making it happened. ubifs_writepage skipped the page because
+        * page index beyonds isize (for example. truncated by other
+        * process named A), then the page is invalidated by fadvise64
+        * syscall before being truncated by process A.
+        */
        ubifs_assert(c, folio_test_private(folio));
-       ubifs_assert(c, 0);
+       if (folio_test_checked(folio))
+               release_new_page_budget(c);
+       else
+               release_existing_page_budget(c);
+
+       atomic_long_dec(&c->dirty_pg_cnt);
        folio_detach_private(folio);
        folio_clear_checked(folio);
        return true;
index 1607a3c76681a287e1c9c81f79ab06f19f96efc5..01d8eb1703820c0f55c4c7b9a8ca1a2d3c205dcd 100644 (file)
@@ -488,7 +488,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
 }
 
 /**
- * wbuf_timer_callback - write-buffer timer callback function.
+ * wbuf_timer_callback_nolock - write-buffer timer callback function.
  * @timer: timer data (write-buffer descriptor)
  *
  * This function is called when the write-buffer timer expires.
@@ -505,7 +505,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
 }
 
 /**
- * new_wbuf_timer - start new write-buffer timer.
+ * new_wbuf_timer_nolock - start new write-buffer timer.
  * @c: UBIFS file-system description object
  * @wbuf: write-buffer descriptor
  */
@@ -531,7 +531,7 @@ static void new_wbuf_timer_nolock(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 }
 
 /**
- * cancel_wbuf_timer - cancel write-buffer timer.
+ * cancel_wbuf_timer_nolock - cancel write-buffer timer.
  * @wbuf: write-buffer descriptor
  */
 static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
index d02509920bafadd9897c3c4fece885d3bc1efed9..dc52ac0f4a345f30d43fc69b7642d76014aeadc0 100644 (file)
@@ -1201,9 +1201,13 @@ out_free:
  * ubifs_jnl_rename - rename a directory entry.
  * @c: UBIFS file-system description object
  * @old_dir: parent inode of directory entry to rename
- * @old_dentry: directory entry to rename
+ * @old_inode: directory entry's inode to rename
+ * @old_nm: name of the old directory entry to rename
  * @new_dir: parent inode of directory entry to rename
- * @new_dentry: new directory entry (or directory entry to replace)
+ * @new_inode: new directory entry's inode (or directory entry's inode to
+ *             replace)
+ * @new_nm: new name of the new directory entry
+ * @whiteout: whiteout inode
  * @sync: non-zero if the write-buffer has to be synchronized
  *
  * This function implements the re-name operation which may involve writing up
index d0c9a09988bc7ba59d78391fe10cc9e15dbdaa28..32cb147597960ca2aa20aab79a3b14152a304900 100644 (file)
@@ -833,7 +833,7 @@ static int alloc_wbufs(struct ubifs_info *c)
                INIT_LIST_HEAD(&c->jheads[i].buds_list);
                err = ubifs_wbuf_init(c, &c->jheads[i].wbuf);
                if (err)
-                       return err;
+                       goto out_wbuf;
 
                c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
                c->jheads[i].wbuf.jhead = i;
@@ -841,7 +841,7 @@ static int alloc_wbufs(struct ubifs_info *c)
                c->jheads[i].log_hash = ubifs_hash_get_desc(c);
                if (IS_ERR(c->jheads[i].log_hash)) {
                        err = PTR_ERR(c->jheads[i].log_hash);
-                       goto out;
+                       goto out_log_hash;
                }
        }
 
@@ -854,9 +854,18 @@ static int alloc_wbufs(struct ubifs_info *c)
 
        return 0;
 
-out:
-       while (i--)
+out_log_hash:
+       kfree(c->jheads[i].wbuf.buf);
+       kfree(c->jheads[i].wbuf.inodes);
+
+out_wbuf:
+       while (i--) {
+               kfree(c->jheads[i].wbuf.buf);
+               kfree(c->jheads[i].wbuf.inodes);
                kfree(c->jheads[i].log_hash);
+       }
+       kfree(c->jheads);
+       c->jheads = NULL;
 
        return err;
 }
index 06ad8fa1fcfb08b7c69aec565db4a7186b2629a9..1c958148bb877f7ba6d9c562945798b363d6bd55 100644 (file)
@@ -74,13 +74,13 @@ static const struct sysfs_ops ubifs_attr_ops = {
        .show   = ubifs_attr_show,
 };
 
-static struct kobj_type ubifs_sb_ktype = {
+static const struct kobj_type ubifs_sb_ktype = {
        .default_groups = ubifs_groups,
        .sysfs_ops      = &ubifs_attr_ops,
        .release        = ubifs_sb_release,
 };
 
-static struct kobj_type ubifs_ktype = {
+static const struct kobj_type ubifs_ktype = {
        .sysfs_ops      = &ubifs_attr_ops,
 };
 
@@ -144,6 +144,8 @@ int __init ubifs_sysfs_init(void)
        kobject_set_name(&ubifs_kset.kobj, "ubifs");
        ubifs_kset.kobj.parent = fs_kobj;
        ret = kset_register(&ubifs_kset);
+       if (ret)
+               kset_put(&ubifs_kset);
 
        return ret;
 }
index 488f3da7a6c6ceef0e81a6323605249422c0b5af..2469f72eeaabb16867c6485b8acb745c36f969f5 100644 (file)
@@ -267,11 +267,18 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
        if (zbr->len) {
                err = insert_old_idx(c, zbr->lnum, zbr->offs);
                if (unlikely(err))
-                       return ERR_PTR(err);
+                       /*
+                        * Obsolete znodes will be freed by tnc_destroy_cnext()
+                        * or free_obsolete_znodes(), copied up znodes should
+                        * be added back to tnc and freed by
+                        * ubifs_destroy_tnc_subtree().
+                        */
+                       goto out;
                err = add_idx_dirt(c, zbr->lnum, zbr->len);
        } else
                err = 0;
 
+out:
        zbr->znode = zn;
        zbr->lnum = 0;
        zbr->offs = 0;
@@ -3053,6 +3060,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
                cnext = cnext->cnext;
                if (ubifs_zn_obsolete(znode))
                        kfree(znode);
+               else if (!ubifs_zn_cow(znode)) {
+                       /*
+                        * Don't forget to update clean znode count after
+                        * committing failed, because ubifs will check this
+                        * count while closing tnc. Non-obsolete znode could
+                        * be re-dirtied during committing process, so dirty
+                        * flag is untrustable. The flag 'COW_ZNODE' is set
+                        * for each dirty znode before committing, and it is
+                        * cleared as long as the znode become clean, so we
+                        * can statistic clean znode count according to this
+                        * flag.
+                        */
+                       atomic_long_inc(&c->clean_zn_cnt);
+                       atomic_long_inc(&ubifs_clean_zn_cnt);
+               }
        } while (cnext && cnext != c->cnext);
 }
 
index 9063b73536f805cffecd8e5e25bdd8fa48d29fb8..4c36044140e7eba9234b87c3a4bc6ab4b73ec64b 100644 (file)
@@ -1623,8 +1623,13 @@ static inline int ubifs_check_hmac(const struct ubifs_info *c,
        return crypto_memneq(expected, got, c->hmac_desc_len);
 }
 
+#ifdef CONFIG_UBIFS_FS_AUTHENTICATION
 void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
                    const u8 *hash, int lnum, int offs);
+#else
+static inline void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
+                                 const u8 *hash, int lnum, int offs) {};
+#endif
 
 int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf,
                          const u8 *expected);
index f7a9607c2b9578ce459dee3ebc87360ea834f327..2210e5eb1ea06e9b16b824c0662f976f919e8d05 100644 (file)
@@ -193,7 +193,7 @@ static int udf_adinicb_writepage(struct folio *folio,
        struct udf_inode_info *iinfo = UDF_I(inode);
 
        BUG_ON(!PageLocked(page));
-       memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr,
+       memcpy_from_page(iinfo->i_data + iinfo->i_lenEAttr, page, 0,
                       i_size_read(inode));
        unlock_page(page);
        mark_inode_dirty(inode);
@@ -241,6 +241,15 @@ static int udf_read_folio(struct file *file, struct folio *folio)
 
 static void udf_readahead(struct readahead_control *rac)
 {
+       struct udf_inode_info *iinfo = UDF_I(rac->mapping->host);
+
+       /*
+        * No readahead needed for in-ICB files and udf_get_block() would get
+        * confused for such file anyway.
+        */
+       if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+               return;
+
        mpage_readahead(rac, udf_get_block);
 }
 
@@ -407,6 +416,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
        int err;
        struct udf_inode_info *iinfo = UDF_I(inode);
 
+       if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB))
+               return -EFSCORRUPTED;
+
        map->oflags = 0;
        if (!(map->iflags & UDF_MAP_CREATE)) {
                struct kernel_lb_addr eloc;
index bb0c700afe3cb19d3daac50d274af8d0941fb8a0..86696a1c6891b779340afcc1a0bb53ac910459e1 100644 (file)
@@ -44,16 +44,15 @@ xfs_perag_get(
        xfs_agnumber_t          agno)
 {
        struct xfs_perag        *pag;
-       int                     ref = 0;
 
        rcu_read_lock();
        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
        if (pag) {
+               trace_xfs_perag_get(pag, _RET_IP_);
                ASSERT(atomic_read(&pag->pag_ref) >= 0);
-               ref = atomic_inc_return(&pag->pag_ref);
+               atomic_inc(&pag->pag_ref);
        }
        rcu_read_unlock();
-       trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
        return pag;
 }
 
@@ -68,7 +67,6 @@ xfs_perag_get_tag(
 {
        struct xfs_perag        *pag;
        int                     found;
-       int                     ref;
 
        rcu_read_lock();
        found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
@@ -77,9 +75,9 @@ xfs_perag_get_tag(
                rcu_read_unlock();
                return NULL;
        }
-       ref = atomic_inc_return(&pag->pag_ref);
+       trace_xfs_perag_get_tag(pag, _RET_IP_);
+       atomic_inc(&pag->pag_ref);
        rcu_read_unlock();
-       trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
        return pag;
 }
 
@@ -87,11 +85,68 @@ void
 xfs_perag_put(
        struct xfs_perag        *pag)
 {
-       int     ref;
-
+       trace_xfs_perag_put(pag, _RET_IP_);
        ASSERT(atomic_read(&pag->pag_ref) > 0);
-       ref = atomic_dec_return(&pag->pag_ref);
-       trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
+       atomic_dec(&pag->pag_ref);
+}
+
+/*
+ * Active references for perag structures. This is for short term access to the
+ * per ag structures for walking trees or accessing state. If an AG is being
+ * shrunk or is offline, then this will fail to find that AG and return NULL
+ * instead.
+ */
+struct xfs_perag *
+xfs_perag_grab(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_perag        *pag;
+
+       rcu_read_lock();
+       pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+       if (pag) {
+               trace_xfs_perag_grab(pag, _RET_IP_);
+               if (!atomic_inc_not_zero(&pag->pag_active_ref))
+                       pag = NULL;
+       }
+       rcu_read_unlock();
+       return pag;
+}
+
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_grab_tag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          first,
+       int                     tag)
+{
+       struct xfs_perag        *pag;
+       int                     found;
+
+       rcu_read_lock();
+       found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                       (void **)&pag, first, 1, tag);
+       if (found <= 0) {
+               rcu_read_unlock();
+               return NULL;
+       }
+       trace_xfs_perag_grab_tag(pag, _RET_IP_);
+       if (!atomic_inc_not_zero(&pag->pag_active_ref))
+               pag = NULL;
+       rcu_read_unlock();
+       return pag;
+}
+
+void
+xfs_perag_rele(
+       struct xfs_perag        *pag)
+{
+       trace_xfs_perag_rele(pag, _RET_IP_);
+       if (atomic_dec_and_test(&pag->pag_active_ref))
+               wake_up(&pag->pag_active_wq);
 }
 
 /*
@@ -196,6 +251,10 @@ xfs_free_perag(
                cancel_delayed_work_sync(&pag->pag_blockgc_work);
                xfs_buf_hash_destroy(pag);
 
+               /* drop the mount's active reference */
+               xfs_perag_rele(pag);
+               XFS_IS_CORRUPT(pag->pag_mount,
+                               atomic_read(&pag->pag_active_ref) != 0);
                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
@@ -314,6 +373,7 @@ xfs_initialize_perag(
                INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
                init_waitqueue_head(&pag->pagb_wait);
+               init_waitqueue_head(&pag->pag_active_wq);
                pag->pagb_count = 0;
                pag->pagb_tree = RB_ROOT;
 #endif /* __KERNEL__ */
@@ -322,6 +382,9 @@ xfs_initialize_perag(
                if (error)
                        goto out_remove_pag;
 
+               /* Active ref owned by mount indicates AG is online. */
+               atomic_set(&pag->pag_active_ref, 1);
+
                /* first new pag is fully initialized */
                if (first_initialised == NULLAGNUMBER)
                        first_initialised = index;
@@ -824,7 +887,7 @@ xfs_ag_shrink_space(
        struct xfs_alloc_arg    args = {
                .tp     = *tpp,
                .mp     = mp,
-               .type   = XFS_ALLOCTYPE_THIS_BNO,
+               .pag    = pag,
                .minlen = delta,
                .maxlen = delta,
                .oinfo  = XFS_RMAP_OINFO_SKIP_UPDATE,
@@ -856,14 +919,11 @@ xfs_ag_shrink_space(
        if (delta >= aglen)
                return -EINVAL;
 
-       args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta);
-
        /*
         * Make sure that the last inode cluster cannot overlap with the new
         * end of the AG, even if it's sparse.
         */
-       error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp,
-                       aglen - delta);
+       error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta);
        if (error)
                return error;
 
@@ -876,7 +936,8 @@ xfs_ag_shrink_space(
                return error;
 
        /* internal log shouldn't also show up in the free space btrees */
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_exact_bno(&args,
+                       XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta));
        if (!error && args.agbno == NULLAGBLOCK)
                error = -ENOSPC;
 
index 191b22b9a35bfd67981de4f05c97703ad2371f7f..5e18536dfdcecce70572565cac837b20e59b4131 100644 (file)
@@ -32,14 +32,12 @@ struct xfs_ag_resv {
 struct xfs_perag {
        struct xfs_mount *pag_mount;    /* owner filesystem */
        xfs_agnumber_t  pag_agno;       /* AG this structure belongs to */
-       atomic_t        pag_ref;        /* perag reference count */
-       char            pagf_init;      /* this agf's entry is initialized */
-       char            pagi_init;      /* this agi's entry is initialized */
-       char            pagf_metadata;  /* the agf is preferred to be metadata */
-       char            pagi_inodeok;   /* The agi is ok for inodes */
+       atomic_t        pag_ref;        /* passive reference count */
+       atomic_t        pag_active_ref; /* active reference count */
+       wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
+       unsigned long   pag_opstate;
        uint8_t         pagf_levels[XFS_BTNUM_AGF];
                                        /* # of levels in bno & cnt btree */
-       bool            pagf_agflreset; /* agfl requires reset before use */
        uint32_t        pagf_flcount;   /* count of blocks in freelist */
        xfs_extlen_t    pagf_freeblks;  /* total free blocks */
        xfs_extlen_t    pagf_longest;   /* longest free space */
@@ -106,16 +104,44 @@ struct xfs_perag {
 #endif /* __KERNEL__ */
 };
 
+/*
+ * Per-AG operational state. These are atomic flag bits.
+ */
+#define XFS_AGSTATE_AGF_INIT           0
+#define XFS_AGSTATE_AGI_INIT           1
+#define XFS_AGSTATE_PREFERS_METADATA   2
+#define XFS_AGSTATE_ALLOWS_INODES      3
+#define XFS_AGSTATE_AGFL_NEEDS_RESET   4
+
+#define __XFS_AG_OPSTATE(name, NAME) \
+static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \
+{ \
+       return test_bit(XFS_AGSTATE_ ## NAME, &pag->pag_opstate); \
+}
+
+__XFS_AG_OPSTATE(initialised_agf, AGF_INIT)
+__XFS_AG_OPSTATE(initialised_agi, AGI_INIT)
+__XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
+__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
+__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
+
 int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
                        xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
 int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
 void xfs_free_perag(struct xfs_mount *mp);
 
+/* Passive AG references */
 struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
 struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
                unsigned int tag);
 void xfs_perag_put(struct xfs_perag *pag);
 
+/* Active AG references */
+struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
+struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
+                                  int tag);
+void xfs_perag_rele(struct xfs_perag *pag);
+
 /*
  * Per-ag geometry infomation and validation
  */
@@ -193,31 +219,86 @@ xfs_perag_next(
        struct xfs_mount        *mp = pag->pag_mount;
 
        *agno = pag->pag_agno + 1;
-       xfs_perag_put(pag);
-       if (*agno > end_agno)
-               return NULL;
-       return xfs_perag_get(mp, *agno);
+       xfs_perag_rele(pag);
+       while (*agno <= end_agno) {
+               pag = xfs_perag_grab(mp, *agno);
+               if (pag)
+                       return pag;
+               (*agno)++;
+       }
+       return NULL;
 }
 
 #define for_each_perag_range(mp, agno, end_agno, pag) \
-       for ((pag) = xfs_perag_get((mp), (agno)); \
+       for ((pag) = xfs_perag_grab((mp), (agno)); \
                (pag) != NULL; \
                (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
 
 #define for_each_perag_from(mp, agno, pag) \
        for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
 
-
 #define for_each_perag(mp, agno, pag) \
        (agno) = 0; \
        for_each_perag_from((mp), (agno), (pag))
 
 #define for_each_perag_tag(mp, agno, pag, tag) \
-       for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
+       for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
                (pag) != NULL; \
                (agno) = (pag)->pag_agno + 1, \
-               xfs_perag_put(pag), \
-               (pag) = xfs_perag_get_tag((mp), (agno), (tag)))
+               xfs_perag_rele(pag), \
+               (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
+
+static inline struct xfs_perag *
+xfs_perag_next_wrap(
+       struct xfs_perag        *pag,
+       xfs_agnumber_t          *agno,
+       xfs_agnumber_t          stop_agno,
+       xfs_agnumber_t          restart_agno,
+       xfs_agnumber_t          wrap_agno)
+{
+       struct xfs_mount        *mp = pag->pag_mount;
+
+       *agno = pag->pag_agno + 1;
+       xfs_perag_rele(pag);
+       while (*agno != stop_agno) {
+               if (*agno >= wrap_agno) {
+                       if (restart_agno >= stop_agno)
+                               break;
+                       *agno = restart_agno;
+               }
+
+               pag = xfs_perag_grab(mp, *agno);
+               if (pag)
+                       return pag;
+               (*agno)++;
+       }
+       return NULL;
+}
+
+/*
+ * Iterate all AGs from start_agno through wrap_agno, then restart_agno through
+ * (start_agno - 1).
+ */
+#define for_each_perag_wrap_range(mp, start_agno, restart_agno, wrap_agno, agno, pag) \
+       for ((agno) = (start_agno), (pag) = xfs_perag_grab((mp), (agno)); \
+               (pag) != NULL; \
+               (pag) = xfs_perag_next_wrap((pag), &(agno), (start_agno), \
+                               (restart_agno), (wrap_agno)))
+/*
+ * Iterate all AGs from start_agno through wrap_agno, then 0 through
+ * (start_agno - 1).
+ */
+#define for_each_perag_wrap_at(mp, start_agno, wrap_agno, agno, pag) \
+       for_each_perag_wrap_range((mp), (start_agno), 0, (wrap_agno), (agno), (pag))
+
+/*
+ * Iterate all AGs from start_agno through to the end of the filesystem, then 0
+ * through (start_agno - 1).
+ */
+#define for_each_perag_wrap(mp, start_agno, agno, pag) \
+       for_each_perag_wrap_at((mp), (start_agno), (mp)->m_sb.sb_agcount, \
+                               (agno), (pag))
+
 
 struct aghdr_init_data {
        /* per ag data */
index 5af123d13a637c318d5ef4fd174addb8b89e5984..7fd1fea95552f163ab341f9bf193118ab0587b8c 100644 (file)
@@ -264,7 +264,7 @@ xfs_ag_resv_init(
                if (error)
                        goto out;
 
-               error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used);
+               error = xfs_finobt_calc_reserves(pag, tp, &ask, &used);
                if (error)
                        goto out;
 
index f8ff81c3de761fc8e5b644ccb266113ea7e768cd..6a037173d20d99e60239ad51e15c2a66822ae5df 100644 (file)
@@ -36,10 +36,6 @@ struct workqueue_struct *xfs_alloc_wq;
 #define        XFSA_FIXUP_BNO_OK       1
 #define        XFSA_FIXUP_CNT_OK       2
 
-STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-
 /*
  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
  * the beginning of the block for a proper header with the location information
@@ -772,8 +768,6 @@ xfs_alloc_cur_setup(
        int                     error;
        int                     i;
 
-       ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
-
        acur->cur_len = args->maxlen;
        acur->rec_bno = 0;
        acur->rec_len = 0;
@@ -887,7 +881,6 @@ xfs_alloc_cur_check(
         * We have an aligned record that satisfies minlen and beats or matches
         * the candidate extent size. Compare locality for near allocation mode.
         */
-       ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
        diff = xfs_alloc_compute_diff(args->agbno, args->len,
                                      args->alignment, args->datatype,
                                      bnoa, lena, &bnew);
@@ -1132,78 +1125,6 @@ error:
        return error;
 }
 
-/*
- * Allocate a variable extent in the allocation group agno.
- * Type and bno are used to determine where in the allocation group the
- * extent will start.
- * Extent's length (returned in *len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent(
-       xfs_alloc_arg_t *args)  /* argument structure for allocation */
-{
-       int             error=0;
-
-       ASSERT(args->minlen > 0);
-       ASSERT(args->maxlen > 0);
-       ASSERT(args->minlen <= args->maxlen);
-       ASSERT(args->mod < args->prod);
-       ASSERT(args->alignment > 0);
-
-       /*
-        * Branch to correct routine based on the type.
-        */
-       args->wasfromfl = 0;
-       switch (args->type) {
-       case XFS_ALLOCTYPE_THIS_AG:
-               error = xfs_alloc_ag_vextent_size(args);
-               break;
-       case XFS_ALLOCTYPE_NEAR_BNO:
-               error = xfs_alloc_ag_vextent_near(args);
-               break;
-       case XFS_ALLOCTYPE_THIS_BNO:
-               error = xfs_alloc_ag_vextent_exact(args);
-               break;
-       default:
-               ASSERT(0);
-               /* NOTREACHED */
-       }
-
-       if (error || args->agbno == NULLAGBLOCK)
-               return error;
-
-       ASSERT(args->len >= args->minlen);
-       ASSERT(args->len <= args->maxlen);
-       ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
-       ASSERT(args->agbno % args->alignment == 0);
-
-       /* if not file data, insert new block into the reverse map btree */
-       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
-               error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
-                                      args->agbno, args->len, &args->oinfo);
-               if (error)
-                       return error;
-       }
-
-       if (!args->wasfromfl) {
-               error = xfs_alloc_update_counters(args->tp, args->agbp,
-                                                 -((long)(args->len)));
-               if (error)
-                       return error;
-
-               ASSERT(!xfs_extent_busy_search(args->mp, args->pag,
-                                             args->agbno, args->len));
-       }
-
-       xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
-
-       XFS_STATS_INC(args->mp, xs_allocx);
-       XFS_STATS_ADD(args->mp, xs_allocb, args->len);
-       return error;
-}
-
 /*
  * Allocate a variable extent at exactly agno/bno.
  * Extent's length (returned in *len) will be between minlen and maxlen,
@@ -1389,7 +1310,6 @@ xfs_alloc_ag_vextent_locality(
        bool                    fbinc;
 
        ASSERT(acur->len == 0);
-       ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
 
        *stat = 0;
 
@@ -2435,7 +2355,7 @@ xfs_agfl_reset(
        struct xfs_mount        *mp = tp->t_mountp;
        struct xfs_agf          *agf = agbp->b_addr;
 
-       ASSERT(pag->pagf_agflreset);
+       ASSERT(xfs_perag_agfl_needs_reset(pag));
        trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
 
        xfs_warn(mp,
@@ -2450,7 +2370,7 @@ xfs_agfl_reset(
                                    XFS_AGF_FLCOUNT);
 
        pag->pagf_flcount = 0;
-       pag->pagf_agflreset = false;
+       clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
 }
 
 /*
@@ -2605,7 +2525,7 @@ xfs_alloc_fix_freelist(
        /* deferred ops (AGFL block frees) require permanent transactions */
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 
-       if (!pag->pagf_init) {
+       if (!xfs_perag_initialised_agf(pag)) {
                error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
                if (error) {
                        /* Couldn't lock the AGF so skip this AG. */
@@ -2620,7 +2540,8 @@ xfs_alloc_fix_freelist(
         * somewhere else if we are not being asked to try harder at this
         * point
         */
-       if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
+       if (xfs_perag_prefers_metadata(pag) &&
+           (args->datatype & XFS_ALLOC_USERDATA) &&
            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
                goto out_agbp_relse;
@@ -2646,7 +2567,7 @@ xfs_alloc_fix_freelist(
        }
 
        /* reset a padding mismatched agfl before final free space check */
-       if (pag->pagf_agflreset)
+       if (xfs_perag_agfl_needs_reset(pag))
                xfs_agfl_reset(tp, agbp, pag);
 
        /* If there isn't enough total space or single-extent, reject it. */
@@ -2707,7 +2628,6 @@ xfs_alloc_fix_freelist(
        targs.agbp = agbp;
        targs.agno = args->agno;
        targs.alignment = targs.minlen = targs.prod = 1;
-       targs.type = XFS_ALLOCTYPE_THIS_AG;
        targs.pag = pag;
        error = xfs_alloc_read_agfl(pag, tp, &agflbp);
        if (error)
@@ -2720,7 +2640,7 @@ xfs_alloc_fix_freelist(
                targs.resv = XFS_AG_RESV_AGFL;
 
                /* Allocate as many blocks as possible at once. */
-               error = xfs_alloc_ag_vextent(&targs);
+               error = xfs_alloc_ag_vextent_size(&targs);
                if (error)
                        goto out_agflbp_relse;
 
@@ -2734,6 +2654,18 @@ xfs_alloc_fix_freelist(
                                break;
                        goto out_agflbp_relse;
                }
+
+               if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
+                       error = xfs_rmap_alloc(tp, agbp, pag,
+                                      targs.agbno, targs.len, &targs.oinfo);
+                       if (error)
+                               goto out_agflbp_relse;
+               }
+               error = xfs_alloc_update_counters(tp, agbp,
+                                                 -((long)(targs.len)));
+               if (error)
+                       goto out_agflbp_relse;
+
                /*
                 * Put each allocated block on the list.
                 */
@@ -2803,7 +2735,7 @@ xfs_alloc_get_freelist(
        if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
                agf->agf_flfirst = 0;
 
-       ASSERT(!pag->pagf_agflreset);
+       ASSERT(!xfs_perag_agfl_needs_reset(pag));
        be32_add_cpu(&agf->agf_flcount, -1);
        pag->pagf_flcount--;
 
@@ -2892,7 +2824,7 @@ xfs_alloc_put_freelist(
        if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
                agf->agf_fllast = 0;
 
-       ASSERT(!pag->pagf_agflreset);
+       ASSERT(!xfs_perag_agfl_needs_reset(pag));
        be32_add_cpu(&agf->agf_flcount, 1);
        pag->pagf_flcount++;
 
@@ -3099,7 +3031,7 @@ xfs_alloc_read_agf(
                return error;
 
        agf = agfbp->b_addr;
-       if (!pag->pagf_init) {
+       if (!xfs_perag_initialised_agf(pag)) {
                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
@@ -3111,8 +3043,8 @@ xfs_alloc_read_agf(
                pag->pagf_levels[XFS_BTNUM_RMAPi] =
                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
                pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
-               pag->pagf_init = 1;
-               pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf);
+               if (xfs_agfl_needs_reset(pag->pag_mount, agf))
+                       set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
 
                /*
                 * Update the in-core allocbt counter. Filter out the rmapbt
@@ -3127,6 +3059,8 @@ xfs_alloc_read_agf(
                if (allocbt_blks > 0)
                        atomic64_add(allocbt_blks,
                                        &pag->pag_mount->m_allocbt_blks);
+
+               set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
        }
 #ifdef DEBUG
        else if (!xfs_is_shutdown(pag->pag_mount)) {
@@ -3148,26 +3082,25 @@ xfs_alloc_read_agf(
 }
 
 /*
- * Allocate an extent (variable-size).
- * Depending on the allocation type, we either look in a single allocation
- * group or loop over the allocation groups to find the result.
+ * Pre-proces allocation arguments to set initial state that we don't require
+ * callers to set up correctly, as well as bounds check the allocation args
+ * that are set up.
  */
-int                            /* error */
-xfs_alloc_vextent(
-       struct xfs_alloc_arg    *args)  /* allocation argument structure */
+static int
+xfs_alloc_vextent_check_args(
+       struct xfs_alloc_arg    *args,
+       xfs_fsblock_t           target,
+       xfs_agnumber_t          *minimum_agno)
 {
-       xfs_agblock_t           agsize; /* allocation group size */
-       int                     error;
-       int                     flags;  /* XFS_ALLOC_FLAG_... locking flags */
-       struct xfs_mount        *mp;    /* mount structure pointer */
-       xfs_agnumber_t          sagno;  /* starting allocation group number */
-       xfs_alloctype_t         type;   /* input allocation type */
-       int                     bump_rotor = 0;
-       xfs_agnumber_t          rotorstep = xfs_rotorstep; /* inode32 agf stepper */
-
-       mp = args->mp;
-       type = args->otype = args->type;
-       args->agbno = NULLAGBLOCK;
+       struct xfs_mount        *mp = args->mp;
+       xfs_agblock_t           agsize;
+
+       args->fsbno = NULLFSBLOCK;
+
+       *minimum_agno = 0;
+       if (args->tp->t_highest_agno != NULLAGNUMBER)
+               *minimum_agno = args->tp->t_highest_agno;
+
        /*
         * Just fix this up, for the case where the last a.g. is shorter
         * (or there's only one a.g.) and the caller couldn't easily figure
@@ -3178,168 +3111,414 @@ xfs_alloc_vextent(
                args->maxlen = agsize;
        if (args->alignment == 0)
                args->alignment = 1;
-       ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
-       ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
+
+       ASSERT(args->minlen > 0);
+       ASSERT(args->maxlen > 0);
+       ASSERT(args->alignment > 0);
+       ASSERT(args->resv != XFS_AG_RESV_AGFL);
+
+       ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
        ASSERT(args->minlen <= args->maxlen);
        ASSERT(args->minlen <= agsize);
        ASSERT(args->mod < args->prod);
-       if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
-           XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
+
+       if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
+           XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
            args->minlen > args->maxlen || args->minlen > agsize ||
            args->mod >= args->prod) {
-               args->fsbno = NULLFSBLOCK;
                trace_xfs_alloc_vextent_badargs(args);
+               return -ENOSPC;
+       }
+
+       if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
+               trace_xfs_alloc_vextent_skip_deadlock(args);
+               return -ENOSPC;
+       }
+       return 0;
+
+}
+
+/*
+ * Prepare an AG for allocation. If the AG is not prepared to accept the
+ * allocation, return failure.
+ *
+ * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
+ * modified to hold their own perag references.
+ */
+static int
+xfs_alloc_vextent_prepare_ag(
+       struct xfs_alloc_arg    *args)
+{
+       bool                    need_pag = !args->pag;
+       int                     error;
+
+       if (need_pag)
+               args->pag = xfs_perag_get(args->mp, args->agno);
+
+       args->agbp = NULL;
+       error = xfs_alloc_fix_freelist(args, 0);
+       if (error) {
+               trace_xfs_alloc_vextent_nofix(args);
+               if (need_pag)
+                       xfs_perag_put(args->pag);
+               args->agbno = NULLAGBLOCK;
+               return error;
+       }
+       if (!args->agbp) {
+               /* cannot allocate in this AG at all */
+               trace_xfs_alloc_vextent_noagbp(args);
+               args->agbno = NULLAGBLOCK;
                return 0;
        }
+       args->wasfromfl = 0;
+       return 0;
+}
 
-       switch (type) {
-       case XFS_ALLOCTYPE_THIS_AG:
-       case XFS_ALLOCTYPE_NEAR_BNO:
-       case XFS_ALLOCTYPE_THIS_BNO:
-               /*
-                * These three force us into a single a.g.
-                */
-               args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-               args->pag = xfs_perag_get(mp, args->agno);
-               error = xfs_alloc_fix_freelist(args, 0);
-               if (error) {
-                       trace_xfs_alloc_vextent_nofix(args);
-                       goto error0;
-               }
-               if (!args->agbp) {
-                       trace_xfs_alloc_vextent_noagbp(args);
+/*
+ * Post-process allocation results to account for the allocation if it succeed
+ * and set the allocated block number correctly for the caller.
+ *
+ * XXX: we should really be returning ENOSPC for ENOSPC, not
+ * hiding it behind a "successful" NULLFSBLOCK allocation.
+ */
+static int
+xfs_alloc_vextent_finish(
+       struct xfs_alloc_arg    *args,
+       xfs_agnumber_t          minimum_agno,
+       int                     alloc_error,
+       bool                    drop_perag)
+{
+       struct xfs_mount        *mp = args->mp;
+       int                     error = 0;
+
+       /*
+        * We can end up here with a locked AGF. If we failed, the caller is
+        * likely going to try to allocate again with different parameters, and
+        * that can widen the AGs that are searched for free space. If we have
+        * to do BMBT block allocation, we have to do a new allocation.
+        *
+        * Hence leaving this function with the AGF locked opens up potential
+        * ABBA AGF deadlocks because a future allocation attempt in this
+        * transaction may attempt to lock a lower number AGF.
+        *
+        * We can't release the AGF until the transaction is commited, so at
+        * this point we must update the "first allocation" tracker to point at
+        * this AG if the tracker is empty or points to a lower AG. This allows
+        * the next allocation attempt to be modified appropriately to avoid
+        * deadlocks.
+        */
+       if (args->agbp &&
+           (args->tp->t_highest_agno == NULLAGNUMBER ||
+            args->agno > minimum_agno))
+               args->tp->t_highest_agno = args->agno;
+
+       /*
+        * If the allocation failed with an error or we had an ENOSPC result,
+        * preserve the returned error whilst also marking the allocation result
+        * as "no extent allocated". This ensures that callers that fail to
+        * capture the error will still treat it as a failed allocation.
+        */
+       if (alloc_error || args->agbno == NULLAGBLOCK) {
+               args->fsbno = NULLFSBLOCK;
+               error = alloc_error;
+               goto out_drop_perag;
+       }
+
+       args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+
+       ASSERT(args->len >= args->minlen);
+       ASSERT(args->len <= args->maxlen);
+       ASSERT(args->agbno % args->alignment == 0);
+       XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
+
+       /* if not file data, insert new block into the reverse map btree */
+       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
+               error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
+                                      args->agbno, args->len, &args->oinfo);
+               if (error)
+                       goto out_drop_perag;
+       }
+
+       if (!args->wasfromfl) {
+               error = xfs_alloc_update_counters(args->tp, args->agbp,
+                                                 -((long)(args->len)));
+               if (error)
+                       goto out_drop_perag;
+
+               ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
+                               args->len));
+       }
+
+       xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
+
+       XFS_STATS_INC(mp, xs_allocx);
+       XFS_STATS_ADD(mp, xs_allocb, args->len);
+
+out_drop_perag:
+       if (drop_perag && args->pag) {
+               xfs_perag_rele(args->pag);
+               args->pag = NULL;
+       }
+       return error;
+}
+
+/*
+ * Allocate within a single AG only. This uses a best-fit length algorithm so if
+ * you need an exact sized allocation without locality constraints, this is the
+ * fastest way to do it.
+ *
+ * Caller is expected to hold a perag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_this_ag(
+       struct xfs_alloc_arg    *args,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          minimum_agno;
+       int                     error;
+
+       args->agno = agno;
+       args->agbno = 0;
+       error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
+                       &minimum_agno);
+       if (error) {
+               if (error == -ENOSPC)
+                       return 0;
+               return error;
+       }
+
+       error = xfs_alloc_vextent_prepare_ag(args);
+       if (!error && args->agbp)
+               error = xfs_alloc_ag_vextent_size(args);
+
+       return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
+}
+
+/*
+ * Iterate all AGs trying to allocate an extent starting from @start_ag.
+ *
+ * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
+ * allocation attempts in @start_agno have locality information. If we fail to
+ * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
+ * we attempt to allocation in as there is no locality optimisation possible for
+ * those allocations.
+ *
+ * On return, args->pag may be left referenced if we finish before the "all
+ * failed" return point. The allocation finish still needs the perag, and
+ * so the caller will release it once they've finished the allocation.
+ *
+ * When we wrap the AG iteration at the end of the filesystem, we have to be
+ * careful not to wrap into AGs below ones we already have locked in the
+ * transaction if we are doing a blocking iteration. This will result in an
+ * out-of-order locking of AGFs and hence can cause deadlocks.
+ */
+static int
+xfs_alloc_vextent_iterate_ags(
+       struct xfs_alloc_arg    *args,
+       xfs_agnumber_t          minimum_agno,
+       xfs_agnumber_t          start_agno,
+       xfs_agblock_t           target_agbno,
+       uint32_t                flags)
+{
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          agno;
+       int                     error = 0;
+
+restart:
+       for_each_perag_wrap_range(mp, start_agno, minimum_agno,
+                       mp->m_sb.sb_agcount, agno, args->pag) {
+               args->agno = agno;
+               error = xfs_alloc_vextent_prepare_ag(args);
+               if (error)
                        break;
+               if (!args->agbp) {
+                       trace_xfs_alloc_vextent_loopfailed(args);
+                       continue;
                }
-               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
-               if ((error = xfs_alloc_ag_vextent(args)))
-                       goto error0;
-               break;
-       case XFS_ALLOCTYPE_START_BNO:
-               /*
-                * Try near allocation first, then anywhere-in-ag after
-                * the first a.g. fails.
-                */
-               if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
-                   xfs_is_inode32(mp)) {
-                       args->fsbno = XFS_AGB_TO_FSB(mp,
-                                       ((mp->m_agfrotor / rotorstep) %
-                                       mp->m_sb.sb_agcount), 0);
-                       bump_rotor = 1;
-               }
-               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
-               args->type = XFS_ALLOCTYPE_NEAR_BNO;
-               fallthrough;
-       case XFS_ALLOCTYPE_FIRST_AG:
+
                /*
-                * Rotate through the allocation groups looking for a winner.
+                * Allocation is supposed to succeed now, so break out of the
+                * loop regardless of whether we succeed or not.
                 */
-               if (type == XFS_ALLOCTYPE_FIRST_AG) {
-                       /*
-                        * Start with allocation group given by bno.
-                        */
-                       args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-                       args->type = XFS_ALLOCTYPE_THIS_AG;
-                       sagno = 0;
-                       flags = 0;
+               if (args->agno == start_agno && target_agbno) {
+                       args->agbno = target_agbno;
+                       error = xfs_alloc_ag_vextent_near(args);
                } else {
-                       /*
-                        * Start with the given allocation group.
-                        */
-                       args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-                       flags = XFS_ALLOC_FLAG_TRYLOCK;
-               }
-               /*
-                * Loop over allocation groups twice; first time with
-                * trylock set, second time without.
-                */
-               for (;;) {
-                       args->pag = xfs_perag_get(mp, args->agno);
-                       error = xfs_alloc_fix_freelist(args, flags);
-                       if (error) {
-                               trace_xfs_alloc_vextent_nofix(args);
-                               goto error0;
-                       }
-                       /*
-                        * If we get a buffer back then the allocation will fly.
-                        */
-                       if (args->agbp) {
-                               if ((error = xfs_alloc_ag_vextent(args)))
-                                       goto error0;
-                               break;
-                       }
-
-                       trace_xfs_alloc_vextent_loopfailed(args);
-
-                       /*
-                        * Didn't work, figure out the next iteration.
-                        */
-                       if (args->agno == sagno &&
-                           type == XFS_ALLOCTYPE_START_BNO)
-                               args->type = XFS_ALLOCTYPE_THIS_AG;
-                       /*
-                       * For the first allocation, we can try any AG to get
-                       * space.  However, if we already have allocated a
-                       * block, we don't want to try AGs whose number is below
-                       * sagno. Otherwise, we may end up with out-of-order
-                       * locking of AGF, which might cause deadlock.
-                       */
-                       if (++(args->agno) == mp->m_sb.sb_agcount) {
-                               if (args->tp->t_firstblock != NULLFSBLOCK)
-                                       args->agno = sagno;
-                               else
-                                       args->agno = 0;
-                       }
-                       /*
-                        * Reached the starting a.g., must either be done
-                        * or switch to non-trylock mode.
-                        */
-                       if (args->agno == sagno) {
-                               if (flags == 0) {
-                                       args->agbno = NULLAGBLOCK;
-                                       trace_xfs_alloc_vextent_allfailed(args);
-                                       break;
-                               }
-
-                               flags = 0;
-                               if (type == XFS_ALLOCTYPE_START_BNO) {
-                                       args->agbno = XFS_FSB_TO_AGBNO(mp,
-                                               args->fsbno);
-                                       args->type = XFS_ALLOCTYPE_NEAR_BNO;
-                               }
-                       }
-                       xfs_perag_put(args->pag);
-               }
-               if (bump_rotor) {
-                       if (args->agno == sagno)
-                               mp->m_agfrotor = (mp->m_agfrotor + 1) %
-                                       (mp->m_sb.sb_agcount * rotorstep);
-                       else
-                               mp->m_agfrotor = (args->agno * rotorstep + 1) %
-                                       (mp->m_sb.sb_agcount * rotorstep);
+                       args->agbno = 0;
+                       error = xfs_alloc_ag_vextent_size(args);
                }
                break;
-       default:
-               ASSERT(0);
-               /* NOTREACHED */
        }
-       if (args->agbno == NULLAGBLOCK)
-               args->fsbno = NULLFSBLOCK;
-       else {
-               args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
-#ifdef DEBUG
-               ASSERT(args->len >= args->minlen);
-               ASSERT(args->len <= args->maxlen);
-               ASSERT(args->agbno % args->alignment == 0);
-               XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
-                       args->len);
-#endif
+       if (error) {
+               xfs_perag_rele(args->pag);
+               args->pag = NULL;
+               return error;
+       }
+       if (args->agbp)
+               return 0;
 
+       /*
+        * We didn't find an AG we can alloation from. If we were given
+        * constraining flags by the caller, drop them and retry the allocation
+        * without any constraints being set.
+        */
+       if (flags) {
+               flags = 0;
+               goto restart;
        }
-       xfs_perag_put(args->pag);
+
+       ASSERT(args->pag == NULL);
+       trace_xfs_alloc_vextent_allfailed(args);
        return 0;
-error0:
-       xfs_perag_put(args->pag);
-       return error;
+}
+
+/*
+ * Iterate from the AGs from the start AG to the end of the filesystem, trying
+ * to allocate blocks. It starts with a near allocation attempt in the initial
+ * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
+ * back to zero if allowed by previous allocations in this transaction,
+ * otherwise will wrap back to the start AG and run a second blocking pass to
+ * the end of the filesystem.
+ */
+int
+xfs_alloc_vextent_start_ag(
+       struct xfs_alloc_arg    *args,
+       xfs_fsblock_t           target)
+{
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          minimum_agno;
+       xfs_agnumber_t          start_agno;
+       xfs_agnumber_t          rotorstep = xfs_rotorstep;
+       bool                    bump_rotor = false;
+       int                     error;
+
+       args->agno = NULLAGNUMBER;
+       args->agbno = NULLAGBLOCK;
+       error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+       if (error) {
+               if (error == -ENOSPC)
+                       return 0;
+               return error;
+       }
+
+       if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
+           xfs_is_inode32(mp)) {
+               target = XFS_AGB_TO_FSB(mp,
+                               ((mp->m_agfrotor / rotorstep) %
+                               mp->m_sb.sb_agcount), 0);
+               bump_rotor = 1;
+       }
+
+       start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
+       error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
+                       XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
+
+       if (bump_rotor) {
+               if (args->agno == start_agno)
+                       mp->m_agfrotor = (mp->m_agfrotor + 1) %
+                               (mp->m_sb.sb_agcount * rotorstep);
+               else
+                       mp->m_agfrotor = (args->agno * rotorstep + 1) %
+                               (mp->m_sb.sb_agcount * rotorstep);
+       }
+
+       return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
+}
+
+/*
+ * Iterate from the agno indicated via @target through to the end of the
+ * filesystem attempting blocking allocation. This does not wrap or try a second
+ * pass, so will not recurse into AGs lower than indicated by the target.
+ */
+int
+xfs_alloc_vextent_first_ag(
+       struct xfs_alloc_arg    *args,
+       xfs_fsblock_t           target)
+ {
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          minimum_agno;
+       xfs_agnumber_t          start_agno;
+       int                     error;
+
+       args->agno = NULLAGNUMBER;
+       args->agbno = NULLAGBLOCK;
+       error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+       if (error) {
+               if (error == -ENOSPC)
+                       return 0;
+               return error;
+       }
+
+       start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
+       error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
+                       XFS_FSB_TO_AGBNO(mp, target), 0);
+       return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
+}
+
+/*
+ * Allocate at the exact block target or fail. Caller is expected to hold a
+ * perag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_exact_bno(
+       struct xfs_alloc_arg    *args,
+       xfs_fsblock_t           target)
+{
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          minimum_agno;
+       int                     error;
+
+       args->agno = XFS_FSB_TO_AGNO(mp, target);
+       args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+       error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+       if (error) {
+               if (error == -ENOSPC)
+                       return 0;
+               return error;
+       }
+
+       error = xfs_alloc_vextent_prepare_ag(args);
+       if (!error && args->agbp)
+               error = xfs_alloc_ag_vextent_exact(args);
+
+       return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
+}
+
+/*
+ * Allocate an extent as close to the target as possible. If there are not
+ * viable candidates in the AG, then fail the allocation.
+ *
+ * Caller may or may not have a per-ag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_near_bno(
+       struct xfs_alloc_arg    *args,
+       xfs_fsblock_t           target)
+{
+       struct xfs_mount        *mp = args->mp;
+       xfs_agnumber_t          minimum_agno;
+       bool                    needs_perag = args->pag == NULL;
+       int                     error;
+
+       args->agno = XFS_FSB_TO_AGNO(mp, target);
+       args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+       error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+       if (error) {
+               if (error == -ENOSPC)
+                       return 0;
+               return error;
+       }
+
+       if (needs_perag)
+               args->pag = xfs_perag_grab(mp, args->agno);
+
+       error = xfs_alloc_vextent_prepare_ag(args);
+       if (!error && args->agbp)
+               error = xfs_alloc_ag_vextent_near(args);
+
+       return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
 }
 
 /* Ensure that the freelist is at full capacity. */
index 2c3f762dfb581871bdf6955e19a5d76c926a3c3c..2b246d74c18901051e3ae97c84e18ab3674ed1c4 100644 (file)
@@ -16,25 +16,6 @@ extern struct workqueue_struct *xfs_alloc_wq;
 
 unsigned int xfs_agfl_size(struct xfs_mount *mp);
 
-/*
- * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
- */
-#define XFS_ALLOCTYPE_FIRST_AG 0x02    /* ... start at ag 0 */
-#define XFS_ALLOCTYPE_THIS_AG  0x08    /* anywhere in this a.g. */
-#define XFS_ALLOCTYPE_START_BNO        0x10    /* near this block else anywhere */
-#define XFS_ALLOCTYPE_NEAR_BNO 0x20    /* in this a.g. and near this block */
-#define XFS_ALLOCTYPE_THIS_BNO 0x40    /* at exactly this block */
-
-/* this should become an enum again when the tracing code is fixed */
-typedef unsigned int xfs_alloctype_t;
-
-#define XFS_ALLOC_TYPES \
-       { XFS_ALLOCTYPE_FIRST_AG,       "FIRST_AG" }, \
-       { XFS_ALLOCTYPE_THIS_AG,        "THIS_AG" }, \
-       { XFS_ALLOCTYPE_START_BNO,      "START_BNO" }, \
-       { XFS_ALLOCTYPE_NEAR_BNO,       "NEAR_BNO" }, \
-       { XFS_ALLOCTYPE_THIS_BNO,       "THIS_BNO" }
-
 /*
  * Flags for xfs_alloc_fix_freelist.
  */
@@ -68,8 +49,6 @@ typedef struct xfs_alloc_arg {
        xfs_agblock_t   min_agbno;      /* set an agbno range for NEAR allocs */
        xfs_agblock_t   max_agbno;      /* ... */
        xfs_extlen_t    len;            /* output: actual size of extent */
-       xfs_alloctype_t type;           /* allocation type XFS_ALLOCTYPE_... */
-       xfs_alloctype_t otype;          /* original allocation type */
        int             datatype;       /* mask defining data type treatment */
        char            wasdel;         /* set if allocation was prev delayed */
        char            wasfromfl;      /* set if allocation is from freelist */
@@ -118,11 +97,43 @@ xfs_alloc_log_agf(
        uint32_t        fields);/* mask of fields to be logged (XFS_AGF_...) */
 
 /*
- * Allocate an extent (variable-size).
+ * Allocate an extent anywhere in the specific AG given. If there is no
+ * space matching the requirements in that AG, then the allocation will fail.
  */
-int                            /* error */
-xfs_alloc_vextent(
-       xfs_alloc_arg_t *args); /* allocation argument structure */
+int xfs_alloc_vextent_this_ag(struct xfs_alloc_arg *args, xfs_agnumber_t agno);
+
+/*
+ * Allocate an extent as close to the target as possible. If there are not
+ * viable candidates in the AG, then fail the allocation.
+ */
+int xfs_alloc_vextent_near_bno(struct xfs_alloc_arg *args,
+               xfs_fsblock_t target);
+
+/*
+ * Allocate an extent exactly at the target given. If this is not possible
+ * then the allocation fails.
+ */
+int xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg *args,
+               xfs_fsblock_t target);
+
+/*
+ * Best effort full filesystem allocation scan.
+ *
+ * Locality aware allocation will be attempted in the initial AG, but on failure
+ * non-localised attempts will be made. The AGs are constrained by previous
+ * allocations in the current transaction. Two passes will be made - the first
+ * non-blocking, the second blocking.
+ */
+int xfs_alloc_vextent_start_ag(struct xfs_alloc_arg *args,
+               xfs_fsblock_t target);
+
+/*
+ * Iterate from the AG indicated from args->fsbno through to the end of the
+ * filesystem attempting blocking allocation. This is for use in last
+ * resort allocation attempts when everything else has failed.
+ */
+int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args,
+               xfs_fsblock_t target);
 
 /*
  * Free an extent.
index 549a3cba0234de698ec7035c07d6170a034f88fe..0f29c7b1b39f32fc4fa87812a5fff553b2699f42 100644 (file)
@@ -315,7 +315,7 @@ xfs_allocbt_verify(
        level = be16_to_cpu(block->bb_level);
        if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
                btnum = XFS_BTNUM_CNTi;
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                if (level >= pag->pagf_levels[btnum])
                        return __this_address;
        } else if (level >= mp->m_alloc_maxlevels)
index c8c65387136c9dd66faf6e8d2af6052c5bb9e77c..34de6e6898c48029760c91279aede0debd3b9234 100644 (file)
@@ -645,34 +645,23 @@ xfs_bmap_extents_to_btree(
        args.tp = tp;
        args.mp = mp;
        xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
-       if (tp->t_firstblock == NULLFSBLOCK) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
-       } else if (tp->t_flags & XFS_TRANS_LOWMODE) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = tp->t_firstblock;
-       } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.fsbno = tp->t_firstblock;
-       }
+
        args.minlen = args.maxlen = args.prod = 1;
        args.wasdel = wasdel;
        *logflagsp = 0;
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_start_ag(&args,
+                               XFS_INO_TO_FSB(mp, ip->i_ino));
        if (error)
                goto out_root_realloc;
 
+       /*
+        * Allocation can't fail, the space was reserved.
+        */
        if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
                error = -ENOSPC;
                goto out_root_realloc;
        }
 
-       /*
-        * Allocation can't fail, the space was reserved.
-        */
-       ASSERT(tp->t_firstblock == NULLFSBLOCK ||
-              args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
-       tp->t_firstblock = args.fsbno;
        cur->bc_ino.allocated++;
        ip->i_nblocks++;
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
@@ -799,28 +788,24 @@ xfs_bmap_local_to_extents(
        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = ip->i_mount;
+       args.total = total;
+       args.minlen = args.maxlen = args.prod = 1;
        xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
+
        /*
         * Allocate a block.  We know we need only one, since the
         * file currently fits in an inode.
         */
-       if (tp->t_firstblock == NULLFSBLOCK) {
-               args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
-               args.type = XFS_ALLOCTYPE_START_BNO;
-       } else {
-               args.fsbno = tp->t_firstblock;
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       }
        args.total = total;
        args.minlen = args.maxlen = args.prod = 1;
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_start_ag(&args,
+                       XFS_INO_TO_FSB(args.mp, ip->i_ino));
        if (error)
                goto done;
 
        /* Can't fail, the space was reserved. */
        ASSERT(args.fsbno != NULLFSBLOCK);
        ASSERT(args.len == 1);
-       tp->t_firstblock = args.fsbno;
        error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
                        XFS_FSB_TO_DADDR(args.mp, args.fsbno),
                        args.mp->m_bsize, 0, &bp);
@@ -854,8 +839,7 @@ xfs_bmap_local_to_extents(
 
        ifp->if_nextents = 1;
        ip->i_nblocks = 1;
-       xfs_trans_mod_dquot_byino(tp, ip,
-               XFS_TRANS_DQ_BCOUNT, 1L);
+       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
        flags |= xfs_ilog_fext(whichfork);
 
 done:
@@ -3025,9 +3009,7 @@ xfs_bmap_adjacent(
        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
 {
        xfs_fsblock_t   adjust;         /* adjustment to block numbers */
-       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
        xfs_mount_t     *mp;            /* mount point structure */
-       int             nullfb;         /* true if ap->firstblock isn't set */
        int             rt;             /* true if inode is realtime */
 
 #define        ISVALID(x,y)    \
@@ -3038,11 +3020,8 @@ xfs_bmap_adjacent(
                XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
 
        mp = ap->ip->i_mount;
-       nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
        rt = XFS_IS_REALTIME_INODE(ap->ip) &&
                (ap->datatype & XFS_ALLOC_USERDATA);
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
-                                                       ap->tp->t_firstblock);
        /*
         * If allocating at eof, and there's a previous real block,
         * try to use its last block as our starting point.
@@ -3101,13 +3080,6 @@ xfs_bmap_adjacent(
                                prevbno += adjust;
                        else
                                prevdiff += adjust;
-                       /*
-                        * If the firstblock forbids it, can't use it,
-                        * must use default.
-                        */
-                       if (!rt && !nullfb &&
-                           XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
-                               prevbno = NULLFSBLOCK;
                }
                /*
                 * No previous block or can't follow it, just default.
@@ -3143,13 +3115,6 @@ xfs_bmap_adjacent(
                                gotdiff += adjust - ap->length;
                        } else
                                gotdiff += adjust;
-                       /*
-                        * If the firstblock forbids it, can't use it,
-                        * must use default.
-                        */
-                       if (!rt && !nullfb &&
-                           XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
-                               gotbno = NULLFSBLOCK;
                }
                /*
                 * No next block, just default.
@@ -3170,147 +3135,91 @@ xfs_bmap_adjacent(
 #undef ISVALID
 }
 
-static int
+int
 xfs_bmap_longest_free_extent(
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
-       xfs_agnumber_t          ag,
-       xfs_extlen_t            *blen,
-       int                     *notinit)
+       xfs_extlen_t            *blen)
 {
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_perag        *pag;
        xfs_extlen_t            longest;
        int                     error = 0;
 
-       pag = xfs_perag_get(mp, ag);
-       if (!pag->pagf_init) {
+       if (!xfs_perag_initialised_agf(pag)) {
                error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK,
                                NULL);
-               if (error) {
-                       /* Couldn't lock the AGF, so skip this AG. */
-                       if (error == -EAGAIN) {
-                               *notinit = 1;
-                               error = 0;
-                       }
-                       goto out;
-               }
+               if (error)
+                       return error;
        }
 
        longest = xfs_alloc_longest_free_extent(pag,
-                               xfs_alloc_min_freelist(mp, pag),
+                               xfs_alloc_min_freelist(pag->pag_mount, pag),
                                xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
        if (*blen < longest)
                *blen = longest;
 
-out:
-       xfs_perag_put(pag);
-       return error;
+       return 0;
 }
 
-static void
+static xfs_extlen_t
 xfs_bmap_select_minlen(
        struct xfs_bmalloca     *ap,
        struct xfs_alloc_arg    *args,
-       xfs_extlen_t            *blen,
-       int                     notinit)
+       xfs_extlen_t            blen)
 {
-       if (notinit || *blen < ap->minlen) {
-               /*
-                * Since we did a BUF_TRYLOCK above, it is possible that
-                * there is space for this request.
-                */
-               args->minlen = ap->minlen;
-       } else if (*blen < args->maxlen) {
-               /*
-                * If the best seen length is less than the request length,
-                * use the best as the minimum.
-                */
-               args->minlen = *blen;
-       } else {
-               /*
-                * Otherwise we've seen an extent as big as maxlen, use that
-                * as the minimum.
-                */
-               args->minlen = args->maxlen;
-       }
-}
-
-STATIC int
-xfs_bmap_btalloc_nullfb(
-       struct xfs_bmalloca     *ap,
-       struct xfs_alloc_arg    *args,
-       xfs_extlen_t            *blen)
-{
-       struct xfs_mount        *mp = ap->ip->i_mount;
-       xfs_agnumber_t          ag, startag;
-       int                     notinit = 0;
-       int                     error;
 
-       args->type = XFS_ALLOCTYPE_START_BNO;
-       args->total = ap->total;
-
-       startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
-       if (startag == NULLAGNUMBER)
-               startag = ag = 0;
-
-       while (*blen < args->maxlen) {
-               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
-                                                    &notinit);
-               if (error)
-                       return error;
-
-               if (++ag == mp->m_sb.sb_agcount)
-                       ag = 0;
-               if (ag == startag)
-                       break;
-       }
+       /*
+        * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
+        * possible that there is enough contiguous free space for this request.
+        */
+       if (blen < ap->minlen)
+               return ap->minlen;
 
-       xfs_bmap_select_minlen(ap, args, blen, notinit);
-       return 0;
+       /*
+        * If the best seen length is less than the request length,
+        * use the best as the minimum, otherwise we've got the maxlen we
+        * were asked for.
+        */
+       if (blen < args->maxlen)
+               return blen;
+       return args->maxlen;
 }
 
-STATIC int
-xfs_bmap_btalloc_filestreams(
+static int
+xfs_bmap_btalloc_select_lengths(
        struct xfs_bmalloca     *ap,
        struct xfs_alloc_arg    *args,
        xfs_extlen_t            *blen)
 {
-       struct xfs_mount        *mp = ap->ip->i_mount;
-       xfs_agnumber_t          ag;
-       int                     notinit = 0;
-       int                     error;
-
-       args->type = XFS_ALLOCTYPE_NEAR_BNO;
-       args->total = ap->total;
-
-       ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
-       if (ag == NULLAGNUMBER)
-               ag = 0;
-
-       error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
-       if (error)
-               return error;
+       struct xfs_mount        *mp = args->mp;
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno, startag;
+       int                     error = 0;
 
-       if (*blen < args->maxlen) {
-               error = xfs_filestream_new_ag(ap, &ag);
-               if (error)
-                       return error;
+       if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
+               args->total = ap->minlen;
+               args->minlen = ap->minlen;
+               return 0;
+       }
 
-               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
-                                                    &notinit);
-               if (error)
-                       return error;
+       args->total = ap->total;
+       startag = XFS_FSB_TO_AGNO(mp, ap->blkno);
+       if (startag == NULLAGNUMBER)
+               startag = 0;
 
+       *blen = 0;
+       for_each_perag_wrap(mp, startag, agno, pag) {
+               error = xfs_bmap_longest_free_extent(pag, args->tp, blen);
+               if (error && error != -EAGAIN)
+                       break;
+               error = 0;
+               if (*blen >= args->maxlen)
+                       break;
        }
+       if (pag)
+               xfs_perag_rele(pag);
 
-       xfs_bmap_select_minlen(ap, args, blen, notinit);
-
-       /*
-        * Set the failure fallback case to look in the selected AG as stream
-        * may have moved.
-        */
-       ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
-       return 0;
+       args->minlen = xfs_bmap_select_minlen(ap, args, *blen);
+       return error;
 }
 
 /* Update all inode and quota accounting for the allocation we just did. */
@@ -3413,21 +3322,7 @@ xfs_bmap_process_allocated_extent(
        xfs_fileoff_t           orig_offset,
        xfs_extlen_t            orig_length)
 {
-       int                     nullfb;
-
-       nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
-
-       /*
-        * check the allocation happened at the same or higher AG than
-        * the first block that was allocated.
-        */
-       ASSERT(nullfb ||
-               XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
-               XFS_FSB_TO_AGNO(args->mp, args->fsbno));
-
        ap->blkno = args->fsbno;
-       if (nullfb)
-               ap->tp->t_firstblock = args->fsbno;
        ap->length = args->len;
        /*
         * If the extent size hint is active, we tried to round the
@@ -3474,23 +3369,17 @@ xfs_bmap_exact_minlen_extent_alloc(
 
        xfs_bmap_compute_alignments(ap, &args);
 
-       if (ap->tp->t_firstblock == NULLFSBLOCK) {
-               /*
-                * Unlike the longest extent available in an AG, we don't track
-                * the length of an AG's shortest extent.
-                * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
-                * hence we can afford to start traversing from the 0th AG since
-                * we need not be concerned about a drop in performance in
-                * "debug only" code paths.
-                */
-               ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
-       } else {
-               ap->blkno = ap->tp->t_firstblock;
-       }
+       /*
+        * Unlike the longest extent available in an AG, we don't track
+        * the length of an AG's shortest extent.
+        * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
+        * hence we can afford to start traversing from the 0th AG since
+        * we need not be concerned about a drop in performance in
+        * "debug only" code paths.
+        */
+       ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
 
-       args.fsbno = ap->blkno;
        args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
-       args.type = XFS_ALLOCTYPE_FIRST_AG;
        args.minlen = args.maxlen = ap->minlen;
        args.total = ap->total;
 
@@ -3502,7 +3391,7 @@ xfs_bmap_exact_minlen_extent_alloc(
        args.resv = XFS_AG_RESV_NONE;
        args.datatype = ap->datatype;
 
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
        if (error)
                return error;
 
@@ -3522,193 +3411,270 @@ xfs_bmap_exact_minlen_extent_alloc(
 
 #endif
 
-STATIC int
-xfs_bmap_btalloc(
-       struct xfs_bmalloca     *ap)
+/*
+ * If we are not low on available data blocks and we are allocating at
+ * EOF, optimise allocation for contiguous file extension and/or stripe
+ * alignment of the new extent.
+ *
+ * NOTE: ap->aeof is only set if the allocation length is >= the
+ * stripe unit and the allocation offset is at the end of file.
+ */
+static int
+xfs_bmap_btalloc_at_eof(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            blen,
+       int                     stripe_align,
+       bool                    ag_only)
 {
-       struct xfs_mount        *mp = ap->ip->i_mount;
-       struct xfs_alloc_arg    args = { .tp = ap->tp, .mp = mp };
-       xfs_alloctype_t         atype = 0;
-       xfs_agnumber_t          fb_agno;        /* ag number of ap->firstblock */
-       xfs_agnumber_t          ag;
-       xfs_fileoff_t           orig_offset;
-       xfs_extlen_t            orig_length;
-       xfs_extlen_t            blen;
-       xfs_extlen_t            nextminlen = 0;
-       int                     nullfb; /* true if ap->firstblock isn't set */
-       int                     isaligned;
-       int                     tryagain;
+       struct xfs_mount        *mp = args->mp;
+       struct xfs_perag        *caller_pag = args->pag;
        int                     error;
-       int                     stripe_align;
-
-       ASSERT(ap->length);
-       orig_offset = ap->offset;
-       orig_length = ap->length;
-
-       stripe_align = xfs_bmap_compute_alignments(ap, &args);
-
-       nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
-                                                       ap->tp->t_firstblock);
-       if (nullfb) {
-               if ((ap->datatype & XFS_ALLOC_USERDATA) &&
-                   xfs_inode_is_filestream(ap->ip)) {
-                       ag = xfs_filestream_lookup_ag(ap->ip);
-                       ag = (ag != NULLAGNUMBER) ? ag : 0;
-                       ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
-               } else {
-                       ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-               }
-       } else
-               ap->blkno = ap->tp->t_firstblock;
 
-       xfs_bmap_adjacent(ap);
-
-       /*
-        * If allowed, use ap->blkno; otherwise must use firstblock since
-        * it's in the right allocation group.
-        */
-       if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
-               ;
-       else
-               ap->blkno = ap->tp->t_firstblock;
        /*
-        * Normal allocation, done through xfs_alloc_vextent.
+        * If there are already extents in the file, try an exact EOF block
+        * allocation to extend the file as a contiguous extent. If that fails,
+        * or it's the first allocation in a file, just try for a stripe aligned
+        * allocation.
         */
-       tryagain = isaligned = 0;
-       args.fsbno = ap->blkno;
-       args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
+       if (ap->offset) {
+               xfs_extlen_t    nextminlen = 0;
 
-       /* Trim the allocation back to the maximum an AG can fit. */
-       args.maxlen = min(ap->length, mp->m_ag_max_usable);
-       blen = 0;
-       if (nullfb) {
                /*
-                * Search for an allocation group with a single extent large
-                * enough for the request.  If one isn't found, then adjust
-                * the minimum allocation size to the largest space found.
+                * Compute the minlen+alignment for the next case.  Set slop so
+                * that the value of minlen+alignment+slop doesn't go up between
+                * the calls.
                 */
-               if ((ap->datatype & XFS_ALLOC_USERDATA) &&
-                   xfs_inode_is_filestream(ap->ip))
-                       error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
+               args->alignment = 1;
+               if (blen > stripe_align && blen <= args->maxlen)
+                       nextminlen = blen - stripe_align;
+               else
+                       nextminlen = args->minlen;
+               if (nextminlen + stripe_align > args->minlen + 1)
+                       args->minalignslop = nextminlen + stripe_align -
+                                       args->minlen - 1;
                else
-                       error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
+                       args->minalignslop = 0;
+
+               if (!caller_pag)
+                       args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
+               error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
+               if (!caller_pag)
+                       xfs_perag_put(args->pag);
                if (error)
                        return error;
-       } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
-               if (xfs_inode_is_filestream(ap->ip))
-                       args.type = XFS_ALLOCTYPE_FIRST_AG;
-               else
-                       args.type = XFS_ALLOCTYPE_START_BNO;
-               args.total = args.minlen = ap->minlen;
+
+               if (args->fsbno != NULLFSBLOCK)
+                       return 0;
+               /*
+                * Exact allocation failed. Reset to try an aligned allocation
+                * according to the original allocation specification.
+                */
+               args->pag = NULL;
+               args->alignment = stripe_align;
+               args->minlen = nextminlen;
+               args->minalignslop = 0;
        } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.total = ap->total;
-               args.minlen = ap->minlen;
+               /*
+                * Adjust minlen to try and preserve alignment if we
+                * can't guarantee an aligned maxlen extent.
+                */
+               args->alignment = stripe_align;
+               if (blen > args->alignment &&
+                   blen <= args->maxlen + args->alignment)
+                       args->minlen = blen - args->alignment;
+               args->minalignslop = 0;
        }
 
-       /*
-        * If we are not low on available data blocks, and the underlying
-        * logical volume manager is a stripe, and the file offset is zero then
-        * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
-        * is only set if the allocation length is >= the stripe unit and the
-        * allocation offset is at the end of file.
-        */
-       if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
-               if (!ap->offset) {
-                       args.alignment = stripe_align;
-                       atype = args.type;
-                       isaligned = 1;
-                       /*
-                        * Adjust minlen to try and preserve alignment if we
-                        * can't guarantee an aligned maxlen extent.
-                        */
-                       if (blen > args.alignment &&
-                           blen <= args.maxlen + args.alignment)
-                               args.minlen = blen - args.alignment;
-                       args.minalignslop = 0;
-               } else {
-                       /*
-                        * First try an exact bno allocation.
-                        * If it fails then do a near or start bno
-                        * allocation with alignment turned on.
-                        */
-                       atype = args.type;
-                       tryagain = 1;
-                       args.type = XFS_ALLOCTYPE_THIS_BNO;
-                       args.alignment = 1;
-                       /*
-                        * Compute the minlen+alignment for the
-                        * next case.  Set slop so that the value
-                        * of minlen+alignment+slop doesn't go up
-                        * between the calls.
-                        */
-                       if (blen > stripe_align && blen <= args.maxlen)
-                               nextminlen = blen - stripe_align;
-                       else
-                               nextminlen = args.minlen;
-                       if (nextminlen + stripe_align > args.minlen + 1)
-                               args.minalignslop =
-                                       nextminlen + stripe_align -
-                                       args.minlen - 1;
-                       else
-                               args.minalignslop = 0;
-               }
+       if (ag_only) {
+               error = xfs_alloc_vextent_near_bno(args, ap->blkno);
        } else {
-               args.alignment = 1;
-               args.minalignslop = 0;
+               args->pag = NULL;
+               error = xfs_alloc_vextent_start_ag(args, ap->blkno);
+               ASSERT(args->pag == NULL);
+               args->pag = caller_pag;
        }
-       args.minleft = ap->minleft;
-       args.wasdel = ap->wasdel;
-       args.resv = XFS_AG_RESV_NONE;
-       args.datatype = ap->datatype;
-
-       error = xfs_alloc_vextent(&args);
        if (error)
                return error;
 
-       if (tryagain && args.fsbno == NULLFSBLOCK) {
-               /*
-                * Exact allocation failed. Now try with alignment
-                * turned on.
-                */
-               args.type = atype;
-               args.fsbno = ap->blkno;
-               args.alignment = stripe_align;
-               args.minlen = nextminlen;
-               args.minalignslop = 0;
-               isaligned = 1;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-       if (isaligned && args.fsbno == NULLFSBLOCK) {
-               /*
-                * allocation failed, so turn off alignment and
-                * try again.
-                */
-               args.type = atype;
-               args.fsbno = ap->blkno;
-               args.alignment = 0;
-               if ((error = xfs_alloc_vextent(&args)))
+       if (args->fsbno != NULLFSBLOCK)
+               return 0;
+
+       /*
+        * Allocation failed, so turn return the allocation args to their
+        * original non-aligned state so the caller can proceed on allocation
+        * failure as if this function was never called.
+        */
+       args->fsbno = ap->blkno;
+       args->alignment = 1;
+       return 0;
+}
+
+/*
+ * We have failed multiple allocation attempts so now are in a low space
+ * allocation situation. Try a locality first full filesystem minimum length
+ * allocation whilst still maintaining necessary total block reservation
+ * requirements.
+ *
+ * If that fails, we are now critically low on space, so perform a last resort
+ * allocation attempt: no reserve, no locality, blocking, minimum length, full
+ * filesystem free space scan. We also indicate to future allocations in this
+ * transaction that we are critically low on space so they don't waste time on
+ * allocation modes that are unlikely to succeed.
+ */
+int
+xfs_bmap_btalloc_low_space(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args)
+{
+       int                     error;
+
+       if (args->minlen > ap->minlen) {
+               args->minlen = ap->minlen;
+               error = xfs_alloc_vextent_start_ag(args, ap->blkno);
+               if (error || args->fsbno != NULLFSBLOCK)
                        return error;
        }
-       if (args.fsbno == NULLFSBLOCK && nullfb &&
-           args.minlen > ap->minlen) {
-               args.minlen = ap->minlen;
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = ap->blkno;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
+
+       /* Last ditch attempt before failure is declared. */
+       args->total = ap->minlen;
+       error = xfs_alloc_vextent_first_ag(args, 0);
+       if (error)
+               return error;
+       ap->tp->t_flags |= XFS_TRANS_LOWMODE;
+       return 0;
+}
+
+static int
+xfs_bmap_btalloc_filestreams(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       int                     stripe_align)
+{
+       xfs_extlen_t            blen = 0;
+       int                     error = 0;
+
+
+       error = xfs_filestream_select_ag(ap, args, &blen);
+       if (error)
+               return error;
+       ASSERT(args->pag);
+
+       /*
+        * If we are in low space mode, then optimal allocation will fail so
+        * prepare for minimal allocation and jump to the low space algorithm
+        * immediately.
+        */
+       if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
+               args->minlen = ap->minlen;
+               ASSERT(args->fsbno == NULLFSBLOCK);
+               goto out_low_space;
        }
-       if (args.fsbno == NULLFSBLOCK && nullfb) {
-               args.fsbno = 0;
-               args.type = XFS_ALLOCTYPE_FIRST_AG;
-               args.total = ap->minlen;
-               if ((error = xfs_alloc_vextent(&args)))
+
+       args->minlen = xfs_bmap_select_minlen(ap, args, blen);
+       if (ap->aeof)
+               error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
+                               true);
+
+       if (!error && args->fsbno == NULLFSBLOCK)
+               error = xfs_alloc_vextent_near_bno(args, ap->blkno);
+
+out_low_space:
+       /*
+        * We are now done with the perag reference for the filestreams
+        * association provided by xfs_filestream_select_ag(). Release it now as
+        * we've either succeeded, had a fatal error or we are out of space and
+        * need to do a full filesystem scan for free space which will take it's
+        * own references.
+        */
+       xfs_perag_rele(args->pag);
+       args->pag = NULL;
+       if (error || args->fsbno != NULLFSBLOCK)
+               return error;
+
+       return xfs_bmap_btalloc_low_space(ap, args);
+}
+
+static int
+xfs_bmap_btalloc_best_length(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       int                     stripe_align)
+{
+       xfs_extlen_t            blen = 0;
+       int                     error;
+
+       ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino);
+       xfs_bmap_adjacent(ap);
+
+       /*
+        * Search for an allocation group with a single extent large enough for
+        * the request.  If one isn't found, then adjust the minimum allocation
+        * size to the largest space found.
+        */
+       error = xfs_bmap_btalloc_select_lengths(ap, args, &blen);
+       if (error)
+               return error;
+
+       /*
+        * Don't attempt optimal EOF allocation if previous allocations barely
+        * succeeded due to being near ENOSPC. It is highly unlikely we'll get
+        * optimal or even aligned allocations in this case, so don't waste time
+        * trying.
+        */
+       if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
+               error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
+                               false);
+               if (error || args->fsbno != NULLFSBLOCK)
                        return error;
-               ap->tp->t_flags |= XFS_TRANS_LOWMODE;
        }
 
+       error = xfs_alloc_vextent_start_ag(args, ap->blkno);
+       if (error || args->fsbno != NULLFSBLOCK)
+               return error;
+
+       return xfs_bmap_btalloc_low_space(ap, args);
+}
+
+static int
+xfs_bmap_btalloc(
+       struct xfs_bmalloca     *ap)
+{
+       struct xfs_mount        *mp = ap->ip->i_mount;
+       struct xfs_alloc_arg    args = {
+               .tp             = ap->tp,
+               .mp             = mp,
+               .fsbno          = NULLFSBLOCK,
+               .oinfo          = XFS_RMAP_OINFO_SKIP_UPDATE,
+               .minleft        = ap->minleft,
+               .wasdel         = ap->wasdel,
+               .resv           = XFS_AG_RESV_NONE,
+               .datatype       = ap->datatype,
+               .alignment      = 1,
+               .minalignslop   = 0,
+       };
+       xfs_fileoff_t           orig_offset;
+       xfs_extlen_t            orig_length;
+       int                     error;
+       int                     stripe_align;
+
+       ASSERT(ap->length);
+       orig_offset = ap->offset;
+       orig_length = ap->length;
+
+       stripe_align = xfs_bmap_compute_alignments(ap, &args);
+
+       /* Trim the allocation back to the maximum an AG can fit. */
+       args.maxlen = min(ap->length, mp->m_ag_max_usable);
+
+       if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+           xfs_inode_is_filestream(ap->ip))
+               error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
+       else
+               error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
+       if (error)
+               return error;
+
        if (args.fsbno != NULLFSBLOCK) {
                xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
                        orig_length);
@@ -4256,7 +4222,7 @@ xfs_bmapi_convert_unwritten(
        return 0;
 }
 
-static inline xfs_extlen_t
+xfs_extlen_t
 xfs_bmapi_minleft(
        struct xfs_trans        *tp,
        struct xfs_inode        *ip,
@@ -4264,7 +4230,7 @@ xfs_bmapi_minleft(
 {
        struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, fork);
 
-       if (tp && tp->t_firstblock != NULLFSBLOCK)
+       if (tp && tp->t_highest_agno != NULLAGNUMBER)
                return 0;
        if (ifp->if_format != XFS_DINODE_FMT_BTREE)
                return 1;
@@ -6151,7 +6117,7 @@ xfs_bmap_finish_one(
        struct xfs_bmbt_irec            *bmap = &bi->bi_bmap;
        int                             error = 0;
 
-       ASSERT(tp->t_firstblock == NULLFSBLOCK);
+       ASSERT(tp->t_highest_agno == NULLAGNUMBER);
 
        trace_xfs_bmap_deferred(tp->t_mountp,
                        XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
index 01c2df35c3e35a5bfac25b164b39b45b317ea544..dd08361ca5a6998981991ec53d85926395bb598a 100644 (file)
@@ -12,6 +12,7 @@ struct xfs_ifork;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
+struct xfs_alloc_arg;
 
 /*
  * Argument structure for xfs_bmap_alloc.
@@ -168,6 +169,8 @@ static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
 #define xfs_valid_startblock(ip, startblock) \
        ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip))
 
+int    xfs_bmap_longest_free_extent(struct xfs_perag *pag,
+               struct xfs_trans *tp, xfs_extlen_t *blen);
 void   xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
                xfs_filblks_t len);
 unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp);
@@ -220,6 +223,10 @@ int        xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
                struct xfs_inode *ip, int whichfork,
                struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
                struct xfs_bmbt_irec *new, int *logflagsp);
+xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip,
+               int fork);
+int    xfs_bmap_btalloc_low_space(struct xfs_bmalloca *ap,
+               struct xfs_alloc_arg *args);
 
 enum xfs_bmap_intent_type {
        XFS_BMAP_MAP = 1,
index cfa052d40105dc1bd493469531dc75e3a9d58dff..b8ad95050c9bd1b6a5e566c30756fbd691039d27 100644 (file)
@@ -21,6 +21,7 @@
 #include "xfs_quota.h"
 #include "xfs_trace.h"
 #include "xfs_rmap.h"
+#include "xfs_ag.h"
 
 static struct kmem_cache       *xfs_bmbt_cur_cache;
 
@@ -184,11 +185,11 @@ xfs_bmbt_update_cursor(
        struct xfs_btree_cur    *src,
        struct xfs_btree_cur    *dst)
 {
-       ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) ||
+       ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) ||
               (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME));
 
        dst->bc_ino.allocated += src->bc_ino.allocated;
-       dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock;
+       dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno;
 
        src->bc_ino.allocated = 0;
 }
@@ -200,46 +201,32 @@ xfs_bmbt_alloc_block(
        union xfs_btree_ptr             *new,
        int                             *stat)
 {
-       xfs_alloc_arg_t         args;           /* block allocation args */
-       int                     error;          /* error return value */
+       struct xfs_alloc_arg    args;
+       int                     error;
 
        memset(&args, 0, sizeof(args));
        args.tp = cur->bc_tp;
        args.mp = cur->bc_mp;
-       args.fsbno = cur->bc_tp->t_firstblock;
        xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino,
                        cur->bc_ino.whichfork);
-
-       if (args.fsbno == NULLFSBLOCK) {
-               args.fsbno = be64_to_cpu(start->l);
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               /*
-                * Make sure there is sufficient room left in the AG to
-                * complete a full tree split for an extent insert.  If
-                * we are converting the middle part of an extent then
-                * we may need space for two tree splits.
-                *
-                * We are relying on the caller to make the correct block
-                * reservation for this operation to succeed.  If the
-                * reservation amount is insufficient then we may fail a
-                * block allocation here and corrupt the filesystem.
-                */
-               args.minleft = args.tp->t_blk_res;
-       } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-       } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       }
-
        args.minlen = args.maxlen = args.prod = 1;
        args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL;
-       if (!args.wasdel && args.tp->t_blk_res == 0) {
-               error = -ENOSPC;
-               goto error0;
-       }
-       error = xfs_alloc_vextent(&args);
+       if (!args.wasdel && args.tp->t_blk_res == 0)
+               return -ENOSPC;
+
+       /*
+        * If we are coming here from something like unwritten extent
+        * conversion, there has been no data extent allocation already done, so
+        * we have to ensure that we attempt to locate the entire set of bmbt
+        * allocations in the same AG, as xfs_bmapi_write() would have reserved.
+        */
+       if (cur->bc_tp->t_highest_agno == NULLAGNUMBER)
+               args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip,
+                                       cur->bc_ino.whichfork);
+
+       error = xfs_alloc_vextent_start_ag(&args, be64_to_cpu(start->l));
        if (error)
-               goto error0;
+               return error;
 
        if (args.fsbno == NULLFSBLOCK && args.minleft) {
                /*
@@ -247,11 +234,10 @@ xfs_bmbt_alloc_block(
                 * a full btree split.  Try again and if
                 * successful activate the lowspace algorithm.
                 */
-               args.fsbno = 0;
-               args.type = XFS_ALLOCTYPE_FIRST_AG;
-               error = xfs_alloc_vextent(&args);
+               args.minleft = 0;
+               error = xfs_alloc_vextent_start_ag(&args, 0);
                if (error)
-                       goto error0;
+                       return error;
                cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE;
        }
        if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
@@ -260,7 +246,6 @@ xfs_bmbt_alloc_block(
        }
 
        ASSERT(args.len == 1);
-       cur->bc_tp->t_firstblock = args.fsbno;
        cur->bc_ino.allocated++;
        cur->bc_ino.ip->i_nblocks++;
        xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE);
@@ -271,9 +256,6 @@ xfs_bmbt_alloc_block(
 
        *stat = 1;
        return 0;
-
- error0:
-       return error;
 }
 
 STATIC int
index da8c769887fddccf4088493ab04e28084445bdf8..c4649cc624e1bc6b9a511a29c2eb5345a6b88fa0 100644 (file)
@@ -2943,7 +2943,7 @@ xfs_btree_split(
        DECLARE_COMPLETION_ONSTACK(done);
 
        if (cur->bc_btnum != XFS_BTNUM_BMAP ||
-           cur->bc_tp->t_firstblock == NULLFSBLOCK)
+           cur->bc_tp->t_highest_agno == NULLAGNUMBER)
                return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
 
        args.cur = cur;
index 5118dedf9267b2dbc71308eaeeae3c62d5cdeafc..7ee292aecbeb008216def0e30b8adb20bd4871c4 100644 (file)
@@ -169,10 +169,9 @@ xfs_inobt_insert_rec(
  */
 STATIC int
 xfs_inobt_insert(
-       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
        struct xfs_buf          *agbp,
-       struct xfs_perag        *pag,
        xfs_agino_t             newino,
        xfs_agino_t             newlen,
        xfs_btnum_t             btnum)
@@ -182,7 +181,7 @@ xfs_inobt_insert(
        int                     i;
        int                     error;
 
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
 
        for (thisino = newino;
             thisino < newino + newlen;
@@ -514,20 +513,20 @@ __xfs_inobt_rec_merge(
  */
 STATIC int
 xfs_inobt_insert_sprec(
-       struct xfs_mount                *mp,
+       struct xfs_perag                *pag,
        struct xfs_trans                *tp,
        struct xfs_buf                  *agbp,
-       struct xfs_perag                *pag,
        int                             btnum,
        struct xfs_inobt_rec_incore     *nrec,  /* in/out: new/merged rec. */
        bool                            merge)  /* merge or replace */
 {
+       struct xfs_mount                *mp = pag->pag_mount;
        struct xfs_btree_cur            *cur;
        int                             error;
        int                             i;
        struct xfs_inobt_rec_incore     rec;
 
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
 
        /* the new record is pre-aligned so we know where to look */
        error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
@@ -609,9 +608,9 @@ error:
  */
 STATIC int
 xfs_ialloc_ag_alloc(
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       struct xfs_perag        *pag)
+       struct xfs_buf          *agbp)
 {
        struct xfs_agi          *agi;
        struct xfs_alloc_arg    args;
@@ -631,6 +630,7 @@ xfs_ialloc_ag_alloc(
        args.mp = tp->t_mountp;
        args.fsbno = NULLFSBLOCK;
        args.oinfo = XFS_RMAP_OINFO_INODES;
+       args.pag = pag;
 
 #ifdef DEBUG
        /* randomly do sparse inode allocations */
@@ -662,8 +662,6 @@ xfs_ialloc_ag_alloc(
                goto sparse_alloc;
        if (likely(newino != NULLAGINO &&
                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
-               args.type = XFS_ALLOCTYPE_THIS_BNO;
                args.prod = 1;
 
                /*
@@ -684,7 +682,10 @@ xfs_ialloc_ag_alloc(
 
                /* Allow space for the inode btree to split. */
                args.minleft = igeo->inobt_maxlevels;
-               if ((error = xfs_alloc_vextent(&args)))
+               error = xfs_alloc_vextent_exact_bno(&args,
+                               XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
+                                               args.agbno));
+               if (error)
                        return error;
 
                /*
@@ -716,23 +717,18 @@ xfs_ialloc_ag_alloc(
                        isaligned = 1;
                } else
                        args.alignment = igeo->cluster_align;
-               /*
-                * Need to figure out where to allocate the inode blocks.
-                * Ideally they should be spaced out through the a.g.
-                * For now, just allocate blocks up front.
-                */
-               args.agbno = be32_to_cpu(agi->agi_root);
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
                /*
                 * Allocate a fixed-size extent of inodes.
                 */
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
                args.prod = 1;
                /*
                 * Allow space for the inode btree to split.
                 */
                args.minleft = igeo->inobt_maxlevels;
-               if ((error = xfs_alloc_vextent(&args)))
+               error = xfs_alloc_vextent_near_bno(&args,
+                               XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
+                                               be32_to_cpu(agi->agi_root)));
+               if (error)
                        return error;
        }
 
@@ -741,11 +737,11 @@ xfs_ialloc_ag_alloc(
         * alignment.
         */
        if (isaligned && args.fsbno == NULLFSBLOCK) {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.agbno = be32_to_cpu(agi->agi_root);
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
                args.alignment = igeo->cluster_align;
-               if ((error = xfs_alloc_vextent(&args)))
+               error = xfs_alloc_vextent_near_bno(&args,
+                               XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
+                                               be32_to_cpu(agi->agi_root)));
+               if (error)
                        return error;
        }
 
@@ -757,9 +753,6 @@ xfs_ialloc_ag_alloc(
            igeo->ialloc_min_blks < igeo->ialloc_blks &&
            args.fsbno == NULLFSBLOCK) {
 sparse_alloc:
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.agbno = be32_to_cpu(agi->agi_root);
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
                args.alignment = args.mp->m_sb.sb_spino_align;
                args.prod = 1;
 
@@ -781,7 +774,9 @@ sparse_alloc:
                                            args.mp->m_sb.sb_inoalignmt) -
                                 igeo->ialloc_blks;
 
-               error = xfs_alloc_vextent(&args);
+               error = xfs_alloc_vextent_near_bno(&args,
+                               XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
+                                               be32_to_cpu(agi->agi_root)));
                if (error)
                        return error;
 
@@ -831,7 +826,7 @@ sparse_alloc:
                 * if necessary. If a merge does occur, rec is updated to the
                 * merged record.
                 */
-               error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag,
+               error = xfs_inobt_insert_sprec(pag, tp, agbp,
                                XFS_BTNUM_INO, &rec, true);
                if (error == -EFSCORRUPTED) {
                        xfs_alert(args.mp,
@@ -856,20 +851,20 @@ sparse_alloc:
                 * existing record with this one.
                 */
                if (xfs_has_finobt(args.mp)) {
-                       error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag,
+                       error = xfs_inobt_insert_sprec(pag, tp, agbp,
                                       XFS_BTNUM_FINO, &rec, false);
                        if (error)
                                return error;
                }
        } else {
                /* full chunk - insert new records to both btrees */
-               error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen,
+               error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
                                         XFS_BTNUM_INO);
                if (error)
                        return error;
 
                if (xfs_has_finobt(args.mp)) {
-                       error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino,
+                       error = xfs_inobt_insert(pag, tp, agbp, newino,
                                                 newlen, XFS_BTNUM_FINO);
                        if (error)
                                return error;
@@ -981,9 +976,9 @@ xfs_inobt_first_free_inode(
  */
 STATIC int
 xfs_dialloc_ag_inobt(
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
        struct xfs_buf          *agbp,
-       struct xfs_perag        *pag,
        xfs_ino_t               parent,
        xfs_ino_t               *inop)
 {
@@ -999,12 +994,12 @@ xfs_dialloc_ag_inobt(
        int                     i, j;
        int                     searchdistance = 10;
 
-       ASSERT(pag->pagi_init);
-       ASSERT(pag->pagi_inodeok);
+       ASSERT(xfs_perag_initialised_agi(pag));
+       ASSERT(xfs_perag_allows_inodes(pag));
        ASSERT(pag->pagi_freecount > 0);
 
  restart_pagno:
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
        /*
         * If pagino is 0 (this is the root inode allocation) use newino.
         * This must work because we've just allocated some.
@@ -1429,9 +1424,9 @@ xfs_dialloc_ag_update_inobt(
  */
 static int
 xfs_dialloc_ag(
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
        struct xfs_buf          *agbp,
-       struct xfs_perag        *pag,
        xfs_ino_t               parent,
        xfs_ino_t               *inop)
 {
@@ -1448,7 +1443,7 @@ xfs_dialloc_ag(
        int                             i;
 
        if (!xfs_has_finobt(mp))
-               return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop);
+               return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
 
        /*
         * If pagino is 0 (this is the root inode allocation) use newino.
@@ -1457,7 +1452,7 @@ xfs_dialloc_ag(
        if (!pagino)
                pagino = be32_to_cpu(agi->agi_newino);
 
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
 
        error = xfs_check_agi_freecount(cur);
        if (error)
@@ -1500,7 +1495,7 @@ xfs_dialloc_ag(
         * the original freecount. If all is well, make the equivalent update to
         * the inobt using the finobt record and offset information.
         */
-       icur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+       icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
 
        error = xfs_check_agi_freecount(icur);
        if (error)
@@ -1577,25 +1572,10 @@ xfs_dialloc_roll(
        return error;
 }
 
-static xfs_agnumber_t
-xfs_ialloc_next_ag(
-       xfs_mount_t     *mp)
-{
-       xfs_agnumber_t  agno;
-
-       spin_lock(&mp->m_agirotor_lock);
-       agno = mp->m_agirotor;
-       if (++mp->m_agirotor >= mp->m_maxagi)
-               mp->m_agirotor = 0;
-       spin_unlock(&mp->m_agirotor_lock);
-
-       return agno;
-}
-
 static bool
 xfs_dialloc_good_ag(
-       struct xfs_trans        *tp,
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        umode_t                 mode,
        int                     flags,
        bool                    ok_alloc)
@@ -1606,10 +1586,12 @@ xfs_dialloc_good_ag(
        int                     needspace;
        int                     error;
 
-       if (!pag->pagi_inodeok)
+       if (!pag)
+               return false;
+       if (!xfs_perag_allows_inodes(pag))
                return false;
 
-       if (!pag->pagi_init) {
+       if (!xfs_perag_initialised_agi(pag)) {
                error = xfs_ialloc_read_agi(pag, tp, NULL);
                if (error)
                        return false;
@@ -1620,7 +1602,7 @@ xfs_dialloc_good_ag(
        if (!ok_alloc)
                return false;
 
-       if (!pag->pagf_init) {
+       if (!xfs_perag_initialised_agf(pag)) {
                error = xfs_alloc_read_agf(pag, tp, flags, NULL);
                if (error)
                        return false;
@@ -1665,8 +1647,8 @@ xfs_dialloc_good_ag(
 
 static int
 xfs_dialloc_try_ag(
-       struct xfs_trans        **tpp,
        struct xfs_perag        *pag,
+       struct xfs_trans        **tpp,
        xfs_ino_t               parent,
        xfs_ino_t               *new_ino,
        bool                    ok_alloc)
@@ -1689,7 +1671,7 @@ xfs_dialloc_try_ag(
                        goto out_release;
                }
 
-               error = xfs_ialloc_ag_alloc(*tpp, agbp, pag);
+               error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
                if (error < 0)
                        goto out_release;
 
@@ -1705,7 +1687,7 @@ xfs_dialloc_try_ag(
        }
 
        /* Allocate an inode in the found AG */
-       error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino);
+       error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
        if (!error)
                *new_ino = ino;
        return error;
@@ -1737,8 +1719,9 @@ xfs_dialloc(
        struct xfs_perag        *pag;
        struct xfs_ino_geometry *igeo = M_IGEO(mp);
        bool                    ok_alloc = true;
+       bool                    low_space = false;
        int                     flags;
-       xfs_ino_t               ino;
+       xfs_ino_t               ino = NULLFSINO;
 
        /*
         * Directories, symlinks, and regular files frequently allocate at least
@@ -1746,7 +1729,8 @@ xfs_dialloc(
         * an AG has enough space for file creation.
         */
        if (S_ISDIR(mode))
-               start_agno = xfs_ialloc_next_ag(mp);
+               start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
+                               mp->m_maxagi;
        else {
                start_agno = XFS_INO_TO_AGNO(mp, parent);
                if (start_agno >= mp->m_maxagi)
@@ -1767,42 +1751,56 @@ xfs_dialloc(
                ok_alloc = false;
        }
 
+       /*
+        * If we are near to ENOSPC, we want to prefer allocation from AGs that
+        * have free inodes in them rather than use up free space allocating new
+        * inode chunks. Hence we turn off allocation for the first non-blocking
+        * pass through the AGs if we are near ENOSPC to consume free inodes
+        * that we can immediately allocate, but then we allow allocation on the
+        * second pass if we fail to find an AG with free inodes in it.
+        */
+       if (percpu_counter_read_positive(&mp->m_fdblocks) <
+                       mp->m_low_space[XFS_LOWSP_1_PCNT]) {
+               ok_alloc = false;
+               low_space = true;
+       }
+
        /*
         * Loop until we find an allocation group that either has free inodes
         * or in which we can allocate some inodes.  Iterate through the
         * allocation groups upward, wrapping at the end.
         */
-       agno = start_agno;
        flags = XFS_ALLOC_FLAG_TRYLOCK;
-       for (;;) {
-               pag = xfs_perag_get(mp, agno);
-               if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) {
-                       error = xfs_dialloc_try_ag(tpp, pag, parent,
+retry:
+       for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
+               if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
+                       error = xfs_dialloc_try_ag(pag, tpp, parent,
                                        &ino, ok_alloc);
                        if (error != -EAGAIN)
                                break;
+                       error = 0;
                }
 
                if (xfs_is_shutdown(mp)) {
                        error = -EFSCORRUPTED;
                        break;
                }
-               if (++agno == mp->m_maxagi)
-                       agno = 0;
-               if (agno == start_agno) {
-                       if (!flags) {
-                               error = -ENOSPC;
-                               break;
-                       }
+       }
+       if (pag)
+               xfs_perag_rele(pag);
+       if (error)
+               return error;
+       if (ino == NULLFSINO) {
+               if (flags) {
                        flags = 0;
+                       if (low_space)
+                               ok_alloc = true;
+                       goto retry;
                }
-               xfs_perag_put(pag);
+               return -ENOSPC;
        }
-
-       if (!error)
-               *new_ino = ino;
-       xfs_perag_put(pag);
-       return error;
+       *new_ino = ino;
+       return 0;
 }
 
 /*
@@ -1885,14 +1883,14 @@ next:
 
 STATIC int
 xfs_difree_inobt(
-       struct xfs_mount                *mp,
+       struct xfs_perag                *pag,
        struct xfs_trans                *tp,
        struct xfs_buf                  *agbp,
-       struct xfs_perag                *pag,
        xfs_agino_t                     agino,
        struct xfs_icluster             *xic,
        struct xfs_inobt_rec_incore     *orec)
 {
+       struct xfs_mount                *mp = pag->pag_mount;
        struct xfs_agi                  *agi = agbp->b_addr;
        struct xfs_btree_cur            *cur;
        struct xfs_inobt_rec_incore     rec;
@@ -1907,7 +1905,7 @@ xfs_difree_inobt(
        /*
         * Initialize the cursor.
         */
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
 
        error = xfs_check_agi_freecount(cur);
        if (error)
@@ -2019,20 +2017,20 @@ error0:
  */
 STATIC int
 xfs_difree_finobt(
-       struct xfs_mount                *mp,
+       struct xfs_perag                *pag,
        struct xfs_trans                *tp,
        struct xfs_buf                  *agbp,
-       struct xfs_perag                *pag,
        xfs_agino_t                     agino,
        struct xfs_inobt_rec_incore     *ibtrec) /* inobt record */
 {
+       struct xfs_mount                *mp = pag->pag_mount;
        struct xfs_btree_cur            *cur;
        struct xfs_inobt_rec_incore     rec;
        int                             offset = agino - ibtrec->ir_startino;
        int                             error;
        int                             i;
 
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
 
        error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
        if (error)
@@ -2179,7 +2177,7 @@ xfs_difree(
        /*
         * Fix up the inode allocation btree.
         */
-       error = xfs_difree_inobt(mp, tp, agbp, pag, agino, xic, &rec);
+       error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
        if (error)
                goto error0;
 
@@ -2187,7 +2185,7 @@ xfs_difree(
         * Fix up the free inode btree.
         */
        if (xfs_has_finobt(mp)) {
-               error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec);
+               error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
                if (error)
                        goto error0;
        }
@@ -2200,15 +2198,15 @@ error0:
 
 STATIC int
 xfs_imap_lookup(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        xfs_agino_t             agino,
        xfs_agblock_t           agbno,
        xfs_agblock_t           *chunk_agbno,
        xfs_agblock_t           *offset_agbno,
        int                     flags)
 {
+       struct xfs_mount        *mp = pag->pag_mount;
        struct xfs_inobt_rec_incore rec;
        struct xfs_btree_cur    *cur;
        struct xfs_buf          *agbp;
@@ -2229,7 +2227,7 @@ xfs_imap_lookup(
         * we have a record, we need to ensure it contains the inode number
         * we are looking up.
         */
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+       cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
        if (!error) {
                if (i)
@@ -2263,12 +2261,13 @@ xfs_imap_lookup(
  */
 int
 xfs_imap(
-       struct xfs_mount         *mp,   /* file system mount structure */
-       struct xfs_trans         *tp,   /* transaction pointer */
+       struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        xfs_ino_t               ino,    /* inode to locate */
        struct xfs_imap         *imap,  /* location map structure */
        uint                    flags)  /* flags for inode btree lookup */
 {
+       struct xfs_mount        *mp = pag->pag_mount;
        xfs_agblock_t           agbno;  /* block number of inode in the alloc group */
        xfs_agino_t             agino;  /* inode number within alloc group */
        xfs_agblock_t           chunk_agbno;    /* first block in inode chunk */
@@ -2276,17 +2275,15 @@ xfs_imap(
        int                     error;  /* error code */
        int                     offset; /* index of inode in its buffer */
        xfs_agblock_t           offset_agbno;   /* blks from chunk start to inode */
-       struct xfs_perag        *pag;
 
        ASSERT(ino != NULLFSINO);
 
        /*
         * Split up the inode number into its parts.
         */
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
        agino = XFS_INO_TO_AGINO(mp, ino);
        agbno = XFS_AGINO_TO_AGBNO(mp, agino);
-       if (!pag || agbno >= mp->m_sb.sb_agblocks ||
+       if (agbno >= mp->m_sb.sb_agblocks ||
            ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
                error = -EINVAL;
 #ifdef DEBUG
@@ -2295,20 +2292,14 @@ xfs_imap(
                 * as they can be invalid without implying corruption.
                 */
                if (flags & XFS_IGET_UNTRUSTED)
-                       goto out_drop;
-               if (!pag) {
-                       xfs_alert(mp,
-                               "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
-                               __func__, XFS_INO_TO_AGNO(mp, ino),
-                               mp->m_sb.sb_agcount);
-               }
+                       return error;
                if (agbno >= mp->m_sb.sb_agblocks) {
                        xfs_alert(mp,
                "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
                                __func__, (unsigned long long)agbno,
                                (unsigned long)mp->m_sb.sb_agblocks);
                }
-               if (pag && ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+               if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
                        xfs_alert(mp,
                "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
                                __func__, ino,
@@ -2316,7 +2307,7 @@ xfs_imap(
                }
                xfs_stack_trace();
 #endif /* DEBUG */
-               goto out_drop;
+               return error;
        }
 
        /*
@@ -2327,10 +2318,10 @@ xfs_imap(
         * in all cases where an untrusted inode number is passed.
         */
        if (flags & XFS_IGET_UNTRUSTED) {
-               error = xfs_imap_lookup(mp, tp, pag, agino, agbno,
+               error = xfs_imap_lookup(pag, tp, agino, agbno,
                                        &chunk_agbno, &offset_agbno, flags);
                if (error)
-                       goto out_drop;
+                       return error;
                goto out_map;
        }
 
@@ -2346,8 +2337,7 @@ xfs_imap(
                imap->im_len = XFS_FSB_TO_BB(mp, 1);
                imap->im_boffset = (unsigned short)(offset <<
                                                        mp->m_sb.sb_inodelog);
-               error = 0;
-               goto out_drop;
+               return 0;
        }
 
        /*
@@ -2359,10 +2349,10 @@ xfs_imap(
                offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
                chunk_agbno = agbno - offset_agbno;
        } else {
-               error = xfs_imap_lookup(mp, tp, pag, agino, agbno,
+               error = xfs_imap_lookup(pag, tp, agino, agbno,
                                        &chunk_agbno, &offset_agbno, flags);
                if (error)
-                       goto out_drop;
+                       return error;
        }
 
 out_map:
@@ -2390,14 +2380,9 @@ out_map:
                        __func__, (unsigned long long) imap->im_blkno,
                        (unsigned long long) imap->im_len,
                        XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-               error = -EINVAL;
-               goto out_drop;
+               return -EINVAL;
        }
-       error = 0;
-out_drop:
-       if (pag)
-               xfs_perag_put(pag);
-       return error;
+       return 0;
 }
 
 /*
@@ -2613,10 +2598,10 @@ xfs_ialloc_read_agi(
                return error;
 
        agi = agibp->b_addr;
-       if (!pag->pagi_init) {
+       if (!xfs_perag_initialised_agi(pag)) {
                pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
                pag->pagi_count = be32_to_cpu(agi->agi_count);
-               pag->pagi_init = 1;
+               set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
        }
 
        /*
@@ -2924,26 +2909,24 @@ xfs_ialloc_calc_rootino(
  */
 int
 xfs_ialloc_check_shrink(
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
        struct xfs_buf          *agibp,
        xfs_agblock_t           new_length)
 {
        struct xfs_inobt_rec_incore rec;
        struct xfs_btree_cur    *cur;
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_perag        *pag;
-       xfs_agino_t             agino = XFS_AGB_TO_AGINO(mp, new_length);
+       xfs_agino_t             agino;
        int                     has;
        int                     error;
 
-       if (!xfs_has_sparseinodes(mp))
+       if (!xfs_has_sparseinodes(pag->pag_mount))
                return 0;
 
-       pag = xfs_perag_get(mp, agno);
-       cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
+       cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
 
        /* Look up the inobt record that would correspond to the new EOFS. */
+       agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
        if (error || !has)
                goto out;
@@ -2964,6 +2947,5 @@ xfs_ialloc_check_shrink(
        }
 out:
        xfs_btree_del_cursor(cur, error);
-       xfs_perag_put(pag);
        return error;
 }
index 9bbbca6ac4edf9c00ae5fc0dd53da51c72be775e..ab8c30b4ec22cdb0b6637c98ff194c57673c09eb 100644 (file)
@@ -12,6 +12,7 @@ struct xfs_imap;
 struct xfs_mount;
 struct xfs_trans;
 struct xfs_btree_cur;
+struct xfs_perag;
 
 /* Move inodes in clusters of this size */
 #define        XFS_INODE_BIG_CLUSTER_SIZE      8192
@@ -47,7 +48,7 @@ int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag,
  */
 int
 xfs_imap(
-       struct xfs_mount *mp,           /* file system mount structure */
+       struct xfs_perag *pag,
        struct xfs_trans *tp,           /* transaction pointer */
        xfs_ino_t       ino,            /* inode to locate */
        struct xfs_imap *imap,          /* location map structure */
@@ -106,7 +107,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
 xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
 
-int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno,
+int xfs_ialloc_check_shrink(struct xfs_perag *pag, struct xfs_trans *tp,
                struct xfs_buf *agibp, xfs_agblock_t new_length);
 
 #endif /* __XFS_IALLOC_H__ */
index 8c83e265770c107affb1b9d5b7ace07d4e979b7f..9b28211d5a4c56f5f10cdc0b9d3bc888474d9f03 100644 (file)
@@ -36,8 +36,8 @@ STATIC struct xfs_btree_cur *
 xfs_inobt_dup_cursor(
        struct xfs_btree_cur    *cur)
 {
-       return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
-                       cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum);
+       return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+                       cur->bc_ag.agbp, cur->bc_btnum);
 }
 
 STATIC void
@@ -103,15 +103,15 @@ __xfs_inobt_alloc_block(
        memset(&args, 0, sizeof(args));
        args.tp = cur->bc_tp;
        args.mp = cur->bc_mp;
+       args.pag = cur->bc_ag.pag;
        args.oinfo = XFS_RMAP_OINFO_INOBT;
-       args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.pag->pag_agno, sbno);
        args.minlen = 1;
        args.maxlen = 1;
        args.prod = 1;
-       args.type = XFS_ALLOCTYPE_NEAR_BNO;
        args.resv = resv;
 
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_near_bno(&args,
+                       XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno));
        if (error)
                return error;
 
@@ -291,8 +291,8 @@ xfs_inobt_verify(
         * Similarly, during log recovery we will have a perag structure
         * attached, but the agi information will not yet have been initialised
         * from the on disk AGI. We don't currently use any of this information,
-        * but beware of the landmine (i.e. need to check pag->pagi_init) if we
-        * ever do.
+        * but beware of the landmine (i.e. need to check
+        * xfs_perag_initialised_agi(pag)) if we ever do.
         */
        if (xfs_has_crc(mp)) {
                fa = xfs_btree_sblock_v5hdr_verify(bp);
@@ -427,11 +427,11 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
  */
 static struct xfs_btree_cur *
 xfs_inobt_init_common(
-       struct xfs_mount        *mp,            /* file system mount point */
-       struct xfs_trans        *tp,            /* transaction pointer */
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,            /* transaction pointer */
        xfs_btnum_t             btnum)          /* ialloc or free ino btree */
 {
+       struct xfs_mount        *mp = pag->pag_mount;
        struct xfs_btree_cur    *cur;
 
        cur = xfs_btree_alloc_cursor(mp, tp, btnum,
@@ -456,16 +456,15 @@ xfs_inobt_init_common(
 /* Create an inode btree cursor. */
 struct xfs_btree_cur *
 xfs_inobt_init_cursor(
-       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
        struct xfs_trans        *tp,
        struct xfs_buf          *agbp,
-       struct xfs_perag        *pag,
        xfs_btnum_t             btnum)
 {
        struct xfs_btree_cur    *cur;
        struct xfs_agi          *agi = agbp->b_addr;
 
-       cur = xfs_inobt_init_common(mp, tp, pag, btnum);
+       cur = xfs_inobt_init_common(pag, tp, btnum);
        if (btnum == XFS_BTNUM_INO)
                cur->bc_nlevels = be32_to_cpu(agi->agi_level);
        else
@@ -477,14 +476,13 @@ xfs_inobt_init_cursor(
 /* Create an inode btree cursor with a fake root for staging. */
 struct xfs_btree_cur *
 xfs_inobt_stage_cursor(
-       struct xfs_mount        *mp,
-       struct xbtree_afakeroot *afake,
        struct xfs_perag        *pag,
+       struct xbtree_afakeroot *afake,
        xfs_btnum_t             btnum)
 {
        struct xfs_btree_cur    *cur;
 
-       cur = xfs_inobt_init_common(mp, NULL, pag, btnum);
+       cur = xfs_inobt_init_common(pag, NULL, btnum);
        xfs_btree_stage_afakeroot(cur, afake);
        return cur;
 }
@@ -708,9 +706,8 @@ xfs_inobt_max_size(
 /* Read AGI and create inobt cursor. */
 int
 xfs_inobt_cur(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        xfs_btnum_t             which,
        struct xfs_btree_cur    **curpp,
        struct xfs_buf          **agi_bpp)
@@ -725,16 +722,15 @@ xfs_inobt_cur(
        if (error)
                return error;
 
-       cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, pag, which);
+       cur = xfs_inobt_init_cursor(pag, tp, *agi_bpp, which);
        *curpp = cur;
        return 0;
 }
 
 static int
 xfs_inobt_count_blocks(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        xfs_btnum_t             btnum,
        xfs_extlen_t            *tree_blocks)
 {
@@ -742,7 +738,7 @@ xfs_inobt_count_blocks(
        struct xfs_btree_cur    *cur = NULL;
        int                     error;
 
-       error = xfs_inobt_cur(mp, tp, pag, btnum, &cur, &agbp);
+       error = xfs_inobt_cur(pag, tp, btnum, &cur, &agbp);
        if (error)
                return error;
 
@@ -779,22 +775,21 @@ xfs_finobt_read_blocks(
  */
 int
 xfs_finobt_calc_reserves(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
        struct xfs_perag        *pag,
+       struct xfs_trans        *tp,
        xfs_extlen_t            *ask,
        xfs_extlen_t            *used)
 {
        xfs_extlen_t            tree_len = 0;
        int                     error;
 
-       if (!xfs_has_finobt(mp))
+       if (!xfs_has_finobt(pag->pag_mount))
                return 0;
 
-       if (xfs_has_inobtcounts(mp))
+       if (xfs_has_inobtcounts(pag->pag_mount))
                error = xfs_finobt_read_blocks(pag, tp, &tree_len);
        else
-               error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO,
+               error = xfs_inobt_count_blocks(pag, tp, XFS_BTNUM_FINO,
                                &tree_len);
        if (error)
                return error;
index 26451cb76b98bb306d6d50d13b2f290ec1d58b52..e859a6e0523096d56b7b8cef0e6b6c743f40e6a3 100644 (file)
@@ -46,12 +46,10 @@ struct xfs_perag;
                 (maxrecs) * sizeof(xfs_inobt_key_t) + \
                 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
 
-extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *mp,
-               struct xfs_trans *tp, struct xfs_buf *agbp,
-               struct xfs_perag *pag, xfs_btnum_t btnum);
-struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp,
-               struct xbtree_afakeroot *afake, struct xfs_perag *pag,
-               xfs_btnum_t btnum);
+extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag,
+               struct xfs_trans *tp, struct xfs_buf *agbp, xfs_btnum_t btnum);
+struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag,
+               struct xbtree_afakeroot *afake, xfs_btnum_t btnum);
 extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
 
 /* ir_holemask to inode allocation bitmap conversion */
@@ -64,13 +62,13 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
 #define xfs_inobt_rec_check_count(mp, rec)     0
 #endif /* DEBUG */
 
-int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
-               struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used);
+int xfs_finobt_calc_reserves(struct xfs_perag *perag, struct xfs_trans *tp,
+               xfs_extlen_t *ask, xfs_extlen_t *used);
 extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
                unsigned long long len);
-int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
-               struct xfs_perag *pag, xfs_btnum_t btnum,
-               struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
+int xfs_inobt_cur(struct xfs_perag *pag, struct xfs_trans *tp,
+               xfs_btnum_t btnum, struct xfs_btree_cur **curpp,
+               struct xfs_buf **agi_bpp);
 
 void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur,
                struct xfs_trans *tp, struct xfs_buf *agbp);
index e1f7898666831f2bc4bfe413b75553bc0f5927fb..f3b860970b260839365c875e1c7390f4ea7e8b1a 100644 (file)
@@ -67,14 +67,14 @@ xfs_refcountbt_alloc_block(
        memset(&args, 0, sizeof(args));
        args.tp = cur->bc_tp;
        args.mp = cur->bc_mp;
-       args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno,
-                       xfs_refc_block(args.mp));
+       args.pag = cur->bc_ag.pag;
        args.oinfo = XFS_RMAP_OINFO_REFC;
        args.minlen = args.maxlen = args.prod = 1;
        args.resv = XFS_AG_RESV_METADATA;
 
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_near_bno(&args,
+                       XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno,
+                                       xfs_refc_block(args.mp)));
        if (error)
                goto out_error;
        trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
@@ -227,7 +227,7 @@ xfs_refcountbt_verify(
                return fa;
 
        level = be16_to_cpu(block->bb_level);
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                if (level >= pag->pagf_refcount_level)
                        return __this_address;
        } else if (level >= mp->m_refc_maxlevels)
index 7f83f62e51e0bf36c9f16316d2faa9c97930673f..d3285684bb5eb34e8371c1542b1cd0e07bf4e6d6 100644 (file)
@@ -313,7 +313,7 @@ xfs_rmapbt_verify(
                return fa;
 
        level = be16_to_cpu(block->bb_level);
-       if (pag && pag->pagf_init) {
+       if (pag && xfs_perag_initialised_agf(pag)) {
                if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
                        return __this_address;
        } else if (level >= mp->m_rmap_maxlevels)
index 1eeecf2eb2a772303adc6d0de01fbeb0c6f64602..99cc03a298e21c2ba79cd2c4d4ff88e46c0e2997 100644 (file)
@@ -909,7 +909,8 @@ xfs_sb_mount_common(
        struct xfs_mount        *mp,
        struct xfs_sb           *sbp)
 {
-       mp->m_agfrotor = mp->m_agirotor = 0;
+       mp->m_agfrotor = 0;
+       atomic_set(&mp->m_agirotor, 0);
        mp->m_maxagi = mp->m_sb.sb_agcount;
        mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
        mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
index d75d82151eeba3e216b62f17f4f22e9a3a9190f6..c37e6d72760b9dc824b6bde345af273978842b97 100644 (file)
@@ -191,14 +191,15 @@ xrep_agf_init_header(
        struct xfs_agf          *old_agf)
 {
        struct xfs_mount        *mp = sc->mp;
+       struct xfs_perag        *pag = sc->sa.pag;
        struct xfs_agf          *agf = agf_bp->b_addr;
 
        memcpy(old_agf, agf, sizeof(*old_agf));
        memset(agf, 0, BBTOB(agf_bp->b_length));
        agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
        agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
-       agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
-       agf->agf_length = cpu_to_be32(sc->sa.pag->block_count);
+       agf->agf_seqno = cpu_to_be32(pag->pag_agno);
+       agf->agf_length = cpu_to_be32(pag->block_count);
        agf->agf_flfirst = old_agf->agf_flfirst;
        agf->agf_fllast = old_agf->agf_fllast;
        agf->agf_flcount = old_agf->agf_flcount;
@@ -206,8 +207,8 @@ xrep_agf_init_header(
                uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
 
        /* Mark the incore AGF data stale until we're done fixing things. */
-       ASSERT(sc->sa.pag->pagf_init);
-       sc->sa.pag->pagf_init = 0;
+       ASSERT(xfs_perag_initialised_agf(pag));
+       clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
 }
 
 /* Set btree root information in an AGF. */
@@ -333,7 +334,7 @@ xrep_agf_commit_new(
        pag->pagf_levels[XFS_BTNUM_RMAPi] =
                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
        pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
-       pag->pagf_init = 1;
+       set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
 
        return 0;
 }
@@ -434,7 +435,7 @@ xrep_agf(
 
 out_revert:
        /* Mark the incore AGF state stale and revert the AGF. */
-       sc->sa.pag->pagf_init = 0;
+       clear_bit(XFS_AGSTATE_AGF_INIT, &sc->sa.pag->pag_opstate);
        memcpy(agf, &old_agf, sizeof(old_agf));
        return error;
 }
@@ -618,7 +619,7 @@ xrep_agfl_update_agf(
        xfs_force_summary_recalc(sc->mp);
 
        /* Update the AGF counters. */
-       if (sc->sa.pag->pagf_init)
+       if (xfs_perag_initialised_agf(sc->sa.pag))
                sc->sa.pag->pagf_flcount = flcount;
        agf->agf_flfirst = cpu_to_be32(0);
        agf->agf_flcount = cpu_to_be32(flcount);
@@ -822,14 +823,15 @@ xrep_agi_init_header(
        struct xfs_agi          *old_agi)
 {
        struct xfs_agi          *agi = agi_bp->b_addr;
+       struct xfs_perag        *pag = sc->sa.pag;
        struct xfs_mount        *mp = sc->mp;
 
        memcpy(old_agi, agi, sizeof(*old_agi));
        memset(agi, 0, BBTOB(agi_bp->b_length));
        agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
        agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
-       agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
-       agi->agi_length = cpu_to_be32(sc->sa.pag->block_count);
+       agi->agi_seqno = cpu_to_be32(pag->pag_agno);
+       agi->agi_length = cpu_to_be32(pag->block_count);
        agi->agi_newino = cpu_to_be32(NULLAGINO);
        agi->agi_dirino = cpu_to_be32(NULLAGINO);
        if (xfs_has_crc(mp))
@@ -840,8 +842,8 @@ xrep_agi_init_header(
                        sizeof(agi->agi_unlinked));
 
        /* Mark the incore AGF data stale until we're done fixing things. */
-       ASSERT(sc->sa.pag->pagi_init);
-       sc->sa.pag->pagi_init = 0;
+       ASSERT(xfs_perag_initialised_agi(pag));
+       clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
 }
 
 /* Set btree root information in an AGI. */
@@ -873,8 +875,7 @@ xrep_agi_calc_from_btrees(
        xfs_agino_t             freecount;
        int                     error;
 
-       cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp,
-                       sc->sa.pag, XFS_BTNUM_INO);
+       cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp, XFS_BTNUM_INO);
        error = xfs_ialloc_count_inodes(cur, &count, &freecount);
        if (error)
                goto err;
@@ -894,8 +895,8 @@ xrep_agi_calc_from_btrees(
        if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) {
                xfs_agblock_t   blocks;
 
-               cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp,
-                               sc->sa.pag, XFS_BTNUM_FINO);
+               cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp,
+                               XFS_BTNUM_FINO);
                error = xfs_btree_count_blocks(cur, &blocks);
                if (error)
                        goto err;
@@ -929,7 +930,7 @@ xrep_agi_commit_new(
        pag = sc->sa.pag;
        pag->pagi_count = be32_to_cpu(agi->agi_count);
        pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
-       pag->pagi_init = 1;
+       set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
 
        return 0;
 }
@@ -994,7 +995,7 @@ xrep_agi(
 
 out_revert:
        /* Mark the incore AGI state stale and revert the AGI. */
-       sc->sa.pag->pagi_init = 0;
+       clear_bit(XFS_AGSTATE_AGI_INIT, &sc->sa.pag->pag_opstate);
        memcpy(agi, &old_agi, sizeof(old_agi));
        return error;
 }
index d50d0eab196abfe0640bad8385d67e7abed92539..dbbc7037074c4ce7e3a7a1fead738a993cf34930 100644 (file)
@@ -662,7 +662,7 @@ xchk_bmap_check_rmaps(
                error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
                if (error ||
                    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
-                       xfs_perag_put(pag);
+                       xfs_perag_rele(pag);
                        return error;
                }
        }
index 613260b04a3dc6dfcf268d599d8c2904d6a2832f..848a8e32e56f0106166b0ac454d627d305f041ac 100644 (file)
@@ -478,15 +478,15 @@ xchk_ag_btcur_init(
        /* Set up a inobt cursor for cross-referencing. */
        if (sa->agi_bp &&
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
-               sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
-                               sa->pag, XFS_BTNUM_INO);
+               sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
+                               XFS_BTNUM_INO);
        }
 
        /* Set up a finobt cursor for cross-referencing. */
        if (sa->agi_bp && xfs_has_finobt(mp) &&
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
-               sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
-                               sa->pag, XFS_BTNUM_FINO);
+               sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
+                               XFS_BTNUM_FINO);
        }
 
        /* Set up a rmapbt cursor for cross-referencing. */
@@ -636,6 +636,7 @@ xchk_get_inode(
 {
        struct xfs_imap         imap;
        struct xfs_mount        *mp = sc->mp;
+       struct xfs_perag        *pag;
        struct xfs_inode        *ip_in = XFS_I(file_inode(sc->file));
        struct xfs_inode        *ip = NULL;
        int                     error;
@@ -671,10 +672,14 @@ xchk_get_inode(
                 * Otherwise, we really couldn't find it so tell userspace
                 * that it no longer exists.
                 */
-               error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
-                               XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
-               if (error)
-                       return -ENOENT;
+               pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
+               if (pag) {
+                       error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
+                                       XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
+                       xfs_perag_put(pag);
+                       if (error)
+                               return -ENOENT;
+               }
                error = -EFSCORRUPTED;
                fallthrough;
        default:
index 4777e7b89fdc69527fbb31d65801acf693e4af4f..f0c7f41897b9049fa31e1e576c4e6bd9b705f154 100644 (file)
@@ -86,7 +86,8 @@ xchk_fscount_warmup(
        for_each_perag(mp, agno, pag) {
                if (xchk_should_terminate(sc, &error))
                        break;
-               if (pag->pagi_init && pag->pagf_init)
+               if (xfs_perag_initialised_agi(pag) &&
+                   xfs_perag_initialised_agf(pag))
                        continue;
 
                /* Lock both AG headers. */
@@ -101,7 +102,8 @@ xchk_fscount_warmup(
                 * These are supposed to be initialized by the header read
                 * function.
                 */
-               if (!pag->pagi_init || !pag->pagf_init) {
+               if (!xfs_perag_initialised_agi(pag) ||
+                   !xfs_perag_initialised_agf(pag)) {
                        error = -EFSCORRUPTED;
                        break;
                }
@@ -117,7 +119,7 @@ xchk_fscount_warmup(
        if (agi_bp)
                xfs_buf_relse(agi_bp);
        if (pag)
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        return error;
 }
 
@@ -220,7 +222,8 @@ retry:
                        break;
 
                /* This somehow got unset since the warmup? */
-               if (!pag->pagi_init || !pag->pagf_init) {
+               if (!xfs_perag_initialised_agi(pag) ||
+                   !xfs_perag_initialised_agf(pag)) {
                        error = -EFSCORRUPTED;
                        break;
                }
@@ -249,7 +252,7 @@ retry:
 
        }
        if (pag)
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        if (error) {
                xchk_set_incomplete(sc);
                return error;
index 4b92f9253ccd2c2eebda24230082f540fed0db7e..1b71174ec0d6d41e6e4d5a93e6fe31a6f3e5385c 100644 (file)
@@ -206,7 +206,7 @@ xrep_calc_ag_resblks(
                return 0;
 
        pag = xfs_perag_get(mp, sm->sm_agno);
-       if (pag->pagi_init) {
+       if (xfs_perag_initialised_agi(pag)) {
                /* Use in-core icount if possible. */
                icount = pag->pagi_count;
        } else {
@@ -326,15 +326,14 @@ xrep_alloc_ag_block(
 
        args.tp = sc->tp;
        args.mp = sc->mp;
+       args.pag = sc->sa.pag;
        args.oinfo = *oinfo;
-       args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.pag->pag_agno, 0);
        args.minlen = 1;
        args.maxlen = 1;
        args.prod = 1;
-       args.type = XFS_ALLOCTYPE_THIS_AG;
        args.resv = resv;
 
-       error = xfs_alloc_vextent(&args);
+       error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno);
        if (error)
                return error;
        if (args.fsbno == NULLFSBLOCK)
index 867645b74d889ddc96f768f7e74d15690840c1e1..a09dd2606479270d107c63da7ab030447431e8ca 100644 (file)
@@ -1410,7 +1410,7 @@ xfs_swap_extent_rmap(
 
                /* Unmap the old blocks in the source file. */
                while (tirec.br_blockcount) {
-                       ASSERT(tp->t_firstblock == NULLFSBLOCK);
+                       ASSERT(tp->t_highest_agno == NULLAGNUMBER);
                        trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
                        /* Read extent from the source file */
index bfc829c07f035f45e6d09c70b6a1b3ca481952eb..afc4c78b9eed648ccfcb0611a3657eaca083f886 100644 (file)
 
 STATIC int
 xfs_trim_extents(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          agno,
+       struct xfs_perag        *pag,
        xfs_daddr_t             start,
        xfs_daddr_t             end,
        xfs_daddr_t             minlen,
        uint64_t                *blocks_trimmed)
 {
+       struct xfs_mount        *mp = pag->pag_mount;
        struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
        struct xfs_btree_cur    *cur;
        struct xfs_buf          *agbp;
        struct xfs_agf          *agf;
-       struct xfs_perag        *pag;
        int                     error;
        int                     i;
 
-       pag = xfs_perag_get(mp, agno);
-
        /*
         * Force out the log.  This means any transactions that might have freed
         * space before we take the AGF buffer lock are now on disk, and the
@@ -47,7 +44,7 @@ xfs_trim_extents(
 
        error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
        if (error)
-               goto out_put_perag;
+               return error;
        agf = agbp->b_addr;
 
        cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
@@ -71,10 +68,10 @@ xfs_trim_extents(
 
                error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
                if (error)
-                       goto out_del_cursor;
+                       break;
                if (XFS_IS_CORRUPT(mp, i != 1)) {
                        error = -EFSCORRUPTED;
-                       goto out_del_cursor;
+                       break;
                }
                ASSERT(flen <= be32_to_cpu(agf->agf_longest));
 
@@ -83,15 +80,15 @@ xfs_trim_extents(
                 * the format the range/len variables are supplied in by
                 * userspace.
                 */
-               dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
+               dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
                dlen = XFS_FSB_TO_BB(mp, flen);
 
                /*
                 * Too small?  Give up.
                 */
                if (dlen < minlen) {
-                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-                       goto out_del_cursor;
+                       trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
+                       break;
                }
 
                /*
@@ -100,7 +97,7 @@ xfs_trim_extents(
                 * down partially overlapping ranges for now.
                 */
                if (dbno + dlen < start || dbno > end) {
-                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
+                       trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
                        goto next_extent;
                }
 
@@ -109,32 +106,30 @@ xfs_trim_extents(
                 * discard and try again the next time.
                 */
                if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
-                       trace_xfs_discard_busy(mp, agno, fbno, flen);
+                       trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
                        goto next_extent;
                }
 
-               trace_xfs_discard_extent(mp, agno, fbno, flen);
+               trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
                error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
                if (error)
-                       goto out_del_cursor;
+                       break;
                *blocks_trimmed += flen;
 
 next_extent:
                error = xfs_btree_decrement(cur, 0, &i);
                if (error)
-                       goto out_del_cursor;
+                       break;
 
                if (fatal_signal_pending(current)) {
                        error = -ERESTARTSYS;
-                       goto out_del_cursor;
+                       break;
                }
        }
 
 out_del_cursor:
        xfs_btree_del_cursor(cur, error);
        xfs_buf_relse(agbp);
-out_put_perag:
-       xfs_perag_put(pag);
        return error;
 }
 
@@ -152,11 +147,12 @@ xfs_ioc_trim(
        struct xfs_mount                *mp,
        struct fstrim_range __user      *urange)
 {
+       struct xfs_perag        *pag;
        unsigned int            granularity =
                bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
        struct fstrim_range     range;
        xfs_daddr_t             start, end, minlen;
-       xfs_agnumber_t          start_agno, end_agno, agno;
+       xfs_agnumber_t          agno;
        uint64_t                blocks_trimmed = 0;
        int                     error, last_error = 0;
 
@@ -193,18 +189,18 @@ xfs_ioc_trim(
        end = start + BTOBBT(range.len) - 1;
 
        if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
-               end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
-
-       start_agno = xfs_daddr_to_agno(mp, start);
-       end_agno = xfs_daddr_to_agno(mp, end);
+               end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
 
-       for (agno = start_agno; agno <= end_agno; agno++) {
-               error = xfs_trim_extents(mp, agno, start, end, minlen,
+       agno = xfs_daddr_to_agno(mp, start);
+       for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
+               error = xfs_trim_extents(pag, start, end, minlen,
                                          &blocks_trimmed);
                if (error) {
                        last_error = error;
-                       if (error == -ERESTARTSYS)
+                       if (error == -ERESTARTSYS) {
+                               xfs_perag_rele(pag);
                                break;
+                       }
                }
        }
 
index 34b21a29c39bc3608cb977373280c3b63ff7bd58..22c13933c8f80a002c70acd5b40d924caca84ce6 100644 (file)
@@ -12,6 +12,7 @@
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_alloc.h"
 #include "xfs_mru_cache.h"
 #include "xfs_trace.h"
@@ -22,7 +23,7 @@
 
 struct xfs_fstrm_item {
        struct xfs_mru_cache_elem       mru;
-       xfs_agnumber_t                  ag; /* AG in use for this directory */
+       struct xfs_perag                *pag; /* AG in use for this directory */
 };
 
 enum xfs_fstrm_alloc {
@@ -30,117 +31,68 @@ enum xfs_fstrm_alloc {
        XFS_PICK_LOWSPACE = 2,
 };
 
-/*
- * Allocation group filestream associations are tracked with per-ag atomic
- * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it.
- */
-int
-xfs_filestream_peek_ag(
-       xfs_mount_t     *mp,
-       xfs_agnumber_t  agno)
-{
-       struct xfs_perag *pag;
-       int             ret;
-
-       pag = xfs_perag_get(mp, agno);
-       ret = atomic_read(&pag->pagf_fstrms);
-       xfs_perag_put(pag);
-       return ret;
-}
-
-static int
-xfs_filestream_get_ag(
-       xfs_mount_t     *mp,
-       xfs_agnumber_t  agno)
-{
-       struct xfs_perag *pag;
-       int             ret;
-
-       pag = xfs_perag_get(mp, agno);
-       ret = atomic_inc_return(&pag->pagf_fstrms);
-       xfs_perag_put(pag);
-       return ret;
-}
-
-static void
-xfs_filestream_put_ag(
-       xfs_mount_t     *mp,
-       xfs_agnumber_t  agno)
-{
-       struct xfs_perag *pag;
-
-       pag = xfs_perag_get(mp, agno);
-       atomic_dec(&pag->pagf_fstrms);
-       xfs_perag_put(pag);
-}
-
 static void
 xfs_fstrm_free_func(
        void                    *data,
        struct xfs_mru_cache_elem *mru)
 {
-       struct xfs_mount        *mp = data;
        struct xfs_fstrm_item   *item =
                container_of(mru, struct xfs_fstrm_item, mru);
+       struct xfs_perag        *pag = item->pag;
 
-       xfs_filestream_put_ag(mp, item->ag);
-       trace_xfs_filestream_free(mp, mru->key, item->ag);
+       trace_xfs_filestream_free(pag, mru->key);
+       atomic_dec(&pag->pagf_fstrms);
+       xfs_perag_rele(pag);
 
        kmem_free(item);
 }
 
 /*
- * Scan the AGs starting at startag looking for an AG that isn't in use and has
- * at least minlen blocks free.
+ * Scan the AGs starting at start_agno looking for an AG that isn't in use and
+ * has at least minlen blocks free. If no AG is found to match the allocation
+ * requirements, pick the AG with the most free space in it.
  */
 static int
 xfs_filestream_pick_ag(
-       struct xfs_inode        *ip,
-       xfs_agnumber_t          startag,
-       xfs_agnumber_t          *agp,
+       struct xfs_alloc_arg    *args,
+       xfs_ino_t               pino,
+       xfs_agnumber_t          start_agno,
        int                     flags,
-       xfs_extlen_t            minlen)
+       xfs_extlen_t            *longest)
 {
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_fstrm_item   *item;
+       struct xfs_mount        *mp = args->mp;
        struct xfs_perag        *pag;
-       xfs_extlen_t            longest, free = 0, minfree, maxfree = 0;
-       xfs_agnumber_t          ag, max_ag = NULLAGNUMBER;
-       int                     err, trylock, nscan;
-
-       ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
+       struct xfs_perag        *max_pag = NULL;
+       xfs_extlen_t            minlen = *longest;
+       xfs_extlen_t            free = 0, minfree, maxfree = 0;
+       xfs_agnumber_t          agno;
+       bool                    first_pass = true;
+       int                     err;
 
        /* 2% of an AG's blocks must be free for it to be chosen. */
        minfree = mp->m_sb.sb_agblocks / 50;
 
-       ag = startag;
-       *agp = NULLAGNUMBER;
-
-       /* For the first pass, don't sleep trying to init the per-AG. */
-       trylock = XFS_ALLOC_FLAG_TRYLOCK;
-
-       for (nscan = 0; 1; nscan++) {
-               trace_xfs_filestream_scan(mp, ip->i_ino, ag);
-
-               pag = xfs_perag_get(mp, ag);
-
-               if (!pag->pagf_init) {
-                       err = xfs_alloc_read_agf(pag, NULL, trylock, NULL);
-                       if (err) {
-                               if (err != -EAGAIN) {
-                                       xfs_perag_put(pag);
-                                       return err;
-                               }
-                               /* Couldn't lock the AGF, skip this AG. */
-                               goto next_ag;
-                       }
+restart:
+       for_each_perag_wrap(mp, start_agno, agno, pag) {
+               trace_xfs_filestream_scan(pag, pino);
+               *longest = 0;
+               err = xfs_bmap_longest_free_extent(pag, NULL, longest);
+               if (err) {
+                       xfs_perag_rele(pag);
+                       if (err != -EAGAIN)
+                               break;
+                       /* Couldn't lock the AGF, skip this AG. */
+                       err = 0;
+                       continue;
                }
 
                /* Keep track of the AG with the most free blocks. */
                if (pag->pagf_freeblks > maxfree) {
                        maxfree = pag->pagf_freeblks;
-                       max_ag = ag;
+                       if (max_pag)
+                               xfs_perag_rele(max_pag);
+                       atomic_inc(&pag->pag_active_ref);
+                       max_pag = pag;
                }
 
                /*
@@ -149,93 +101,73 @@ xfs_filestream_pick_ag(
                 * loop, and it guards against two filestreams being established
                 * in the same AG as each other.
                 */
-               if (xfs_filestream_get_ag(mp, ag) > 1) {
-                       xfs_filestream_put_ag(mp, ag);
-                       goto next_ag;
-               }
-
-               longest = xfs_alloc_longest_free_extent(pag,
-                               xfs_alloc_min_freelist(mp, pag),
-                               xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
-               if (((minlen && longest >= minlen) ||
-                    (!minlen && pag->pagf_freeblks >= minfree)) &&
-                   (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
-                    (flags & XFS_PICK_LOWSPACE))) {
-
-                       /* Break out, retaining the reference on the AG. */
-                       free = pag->pagf_freeblks;
-                       xfs_perag_put(pag);
-                       *agp = ag;
-                       break;
+               if (atomic_inc_return(&pag->pagf_fstrms) <= 1) {
+                       if (((minlen && *longest >= minlen) ||
+                            (!minlen && pag->pagf_freeblks >= minfree)) &&
+                           (!xfs_perag_prefers_metadata(pag) ||
+                            !(flags & XFS_PICK_USERDATA) ||
+                            (flags & XFS_PICK_LOWSPACE))) {
+                               /* Break out, retaining the reference on the AG. */
+                               free = pag->pagf_freeblks;
+                               break;
+                       }
                }
 
                /* Drop the reference on this AG, it's not usable. */
-               xfs_filestream_put_ag(mp, ag);
-next_ag:
-               xfs_perag_put(pag);
-               /* Move to the next AG, wrapping to AG 0 if necessary. */
-               if (++ag >= mp->m_sb.sb_agcount)
-                       ag = 0;
-
-               /* If a full pass of the AGs hasn't been done yet, continue. */
-               if (ag != startag)
-                       continue;
+               atomic_dec(&pag->pagf_fstrms);
+       }
 
-               /* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
-               if (trylock != 0) {
-                       trylock = 0;
-                       continue;
+       if (err) {
+               xfs_perag_rele(pag);
+               if (max_pag)
+                       xfs_perag_rele(max_pag);
+               return err;
+       }
+
+       if (!pag) {
+               /*
+                * Allow a second pass to give xfs_bmap_longest_free_extent()
+                * another attempt at locking AGFs that it might have skipped
+                * over before we fail.
+                */
+               if (first_pass) {
+                       first_pass = false;
+                       goto restart;
                }
 
-               /* Finally, if lowspace wasn't set, set it for the 3rd pass. */
+               /*
+                * We must be low on data space, so run a final lowspace
+                * optimised selection pass if we haven't already.
+                */
                if (!(flags & XFS_PICK_LOWSPACE)) {
                        flags |= XFS_PICK_LOWSPACE;
-                       continue;
+                       goto restart;
                }
 
                /*
-                * Take the AG with the most free space, regardless of whether
-                * it's already in use by another filestream.
+                * No unassociated AGs are available, so select the AG with the
+                * most free space, regardless of whether it's already in use by
+                * another filestream. It none suit, just use whatever AG we can
+                * grab.
                 */
-               if (max_ag != NULLAGNUMBER) {
-                       xfs_filestream_get_ag(mp, max_ag);
+               if (!max_pag) {
+                       for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
+                               break;
+                       atomic_inc(&args->pag->pagf_fstrms);
+                       *longest = 0;
+               } else {
+                       pag = max_pag;
                        free = maxfree;
-                       *agp = max_ag;
-                       break;
+                       atomic_inc(&pag->pagf_fstrms);
                }
-
-               /* take AG 0 if none matched */
-               trace_xfs_filestream_pick(ip, *agp, free, nscan);
-               *agp = 0;
-               return 0;
-       }
-
-       trace_xfs_filestream_pick(ip, *agp, free, nscan);
-
-       if (*agp == NULLAGNUMBER)
-               return 0;
-
-       err = -ENOMEM;
-       item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
-       if (!item)
-               goto out_put_ag;
-
-       item->ag = *agp;
-
-       err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
-       if (err) {
-               if (err == -EEXIST)
-                       err = 0;
-               goto out_free_item;
+       } else if (max_pag) {
+               xfs_perag_rele(max_pag);
        }
 
+       trace_xfs_filestream_pick(pag, pino, free);
+       args->pag = pag;
        return 0;
 
-out_free_item:
-       kmem_free(item);
-out_put_ag:
-       xfs_filestream_put_ag(mp, *agp);
-       return err;
 }
 
 static struct xfs_inode *
@@ -263,104 +195,187 @@ out:
 }
 
 /*
- * Find the right allocation group for a file, either by finding an
- * existing file stream or creating a new one.
+ * Lookup the mru cache for an existing association. If one exists and we can
+ * use it, return with an active perag reference indicating that the allocation
+ * will proceed with that association.
  *
- * Returns NULLAGNUMBER in case of an error.
+ * If we have no association, or we cannot use the current one and have to
+ * destroy it, return with longest = 0 to tell the caller to create a new
+ * association.
  */
-xfs_agnumber_t
-xfs_filestream_lookup_ag(
-       struct xfs_inode        *ip)
+static int
+xfs_filestream_lookup_association(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_ino_t               pino,
+       xfs_extlen_t            *longest)
 {
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_inode        *pip = NULL;
-       xfs_agnumber_t          startag, ag = NULLAGNUMBER;
+       struct xfs_mount        *mp = args->mp;
+       struct xfs_perag        *pag;
        struct xfs_mru_cache_elem *mru;
+       int                     error = 0;
 
-       ASSERT(S_ISREG(VFS_I(ip)->i_mode));
-
-       pip = xfs_filestream_get_parent(ip);
-       if (!pip)
-               return NULLAGNUMBER;
+       *longest = 0;
+       mru = xfs_mru_cache_lookup(mp->m_filestream, pino);
+       if (!mru)
+               return 0;
+       /*
+        * Grab the pag and take an extra active reference for the caller whilst
+        * the mru item cannot go away. This means we'll pin the perag with
+        * the reference we get here even if the filestreams association is torn
+        * down immediately after we mark the lookup as done.
+        */
+       pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
+       atomic_inc(&pag->pag_active_ref);
+       xfs_mru_cache_done(mp->m_filestream);
 
-       mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
-       if (mru) {
-               ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
-               xfs_mru_cache_done(mp->m_filestream);
+       trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
 
-               trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
-               goto out;
-       }
+       ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
+       xfs_bmap_adjacent(ap);
 
        /*
-        * Set the starting AG using the rotor for inode32, otherwise
-        * use the directory inode's AG.
+        * If there is very little free space before we start a filestreams
+        * allocation, we're almost guaranteed to fail to find a large enough
+        * free space available so just use the cached AG.
         */
-       if (xfs_is_inode32(mp)) {
-               xfs_agnumber_t   rotorstep = xfs_rotorstep;
-               startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
-               mp->m_agfrotor = (mp->m_agfrotor + 1) %
-                                (mp->m_sb.sb_agcount * rotorstep);
-       } else
-               startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
+       if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
+               *longest = 1;
+               goto out_done;
+       }
 
-       if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
-               ag = NULLAGNUMBER;
-out:
-       xfs_irele(pip);
-       return ag;
+       error = xfs_bmap_longest_free_extent(pag, args->tp, longest);
+       if (error == -EAGAIN)
+               error = 0;
+       if (error || *longest < args->maxlen) {
+               /* We aren't going to use this perag */
+               *longest = 0;
+               xfs_perag_rele(pag);
+               return error;
+       }
+
+out_done:
+       args->pag = pag;
+       return 0;
 }
 
-/*
- * Pick a new allocation group for the current file and its file stream.
- *
- * This is called when the allocator can't find a suitable extent in the
- * current AG, and we have to move the stream into a new AG with more space.
- */
-int
-xfs_filestream_new_ag(
+static int
+xfs_filestream_create_association(
        struct xfs_bmalloca     *ap,
-       xfs_agnumber_t          *agp)
+       struct xfs_alloc_arg    *args,
+       xfs_ino_t               pino,
+       xfs_extlen_t            *longest)
 {
-       struct xfs_inode        *ip = ap->ip, *pip;
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_extlen_t            minlen = ap->length;
-       xfs_agnumber_t          startag = 0;
-       int                     flags = 0;
-       int                     err = 0;
+       struct xfs_mount        *mp = args->mp;
        struct xfs_mru_cache_elem *mru;
+       struct xfs_fstrm_item   *item;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, pino);
+       int                     flags = 0;
+       int                     error;
 
-       *agp = NULLAGNUMBER;
-
-       pip = xfs_filestream_get_parent(ip);
-       if (!pip)
-               goto exit;
-
-       mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
+       /* Changing parent AG association now, so remove the existing one. */
+       mru = xfs_mru_cache_remove(mp->m_filestream, pino);
        if (mru) {
                struct xfs_fstrm_item *item =
                        container_of(mru, struct xfs_fstrm_item, mru);
-               startag = (item->ag + 1) % mp->m_sb.sb_agcount;
+
+               agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
+               xfs_fstrm_free_func(mp, mru);
+       } else if (xfs_is_inode32(mp)) {
+               xfs_agnumber_t   rotorstep = xfs_rotorstep;
+
+               agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
+               mp->m_agfrotor = (mp->m_agfrotor + 1) %
+                                (mp->m_sb.sb_agcount * rotorstep);
        }
 
+       ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0);
+       xfs_bmap_adjacent(ap);
+
        if (ap->datatype & XFS_ALLOC_USERDATA)
                flags |= XFS_PICK_USERDATA;
        if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
                flags |= XFS_PICK_LOWSPACE;
 
-       err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
+       *longest = ap->length;
+       error = xfs_filestream_pick_ag(args, pino, agno, flags, longest);
+       if (error)
+               return error;
 
        /*
-        * Only free the item here so we skip over the old AG earlier.
+        * We are going to use this perag now, so create an assoication for it.
+        * xfs_filestream_pick_ag() has already bumped the perag fstrms counter
+        * for us, so all we need to do here is take another active reference to
+        * the perag for the cached association.
+        *
+        * If we fail to store the association, we need to drop the fstrms
+        * counter as well as drop the perag reference we take here for the
+        * item. We do not need to return an error for this failure - as long as
+        * we return a referenced AG, the allocation can still go ahead just
+        * fine.
         */
-       if (mru)
-               xfs_fstrm_free_func(mp, mru);
+       item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
+       if (!item)
+               goto out_put_fstrms;
+
+       atomic_inc(&args->pag->pag_active_ref);
+       item->pag = args->pag;
+       error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
+       if (error)
+               goto out_free_item;
+       return 0;
+
+out_free_item:
+       xfs_perag_rele(item->pag);
+       kmem_free(item);
+out_put_fstrms:
+       atomic_dec(&args->pag->pagf_fstrms);
+       return 0;
+}
+
+/*
+ * Search for an allocation group with a single extent large enough for
+ * the request. First we look for an existing association and use that if it
+ * is found. Otherwise, we create a new association by selecting an AG that fits
+ * the allocation criteria.
+ *
+ * We return with a referenced perag in args->pag to indicate which AG we are
+ * allocating into or an error with no references held.
+ */
+int
+xfs_filestream_select_ag(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            *longest)
+{
+       struct xfs_mount        *mp = args->mp;
+       struct xfs_inode        *pip;
+       xfs_ino_t               ino = 0;
+       int                     error = 0;
+
+       *longest = 0;
+       args->total = ap->total;
+       pip = xfs_filestream_get_parent(ap->ip);
+       if (pip) {
+               ino = pip->i_ino;
+               error = xfs_filestream_lookup_association(ap, args, ino,
+                               longest);
+               xfs_irele(pip);
+               if (error)
+                       return error;
+               if (*longest >= args->maxlen)
+                       goto out_select;
+               if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
+                       goto out_select;
+       }
+
+       error = xfs_filestream_create_association(ap, args, ino, longest);
+       if (error)
+               return error;
 
-       xfs_irele(pip);
-exit:
-       if (*agp == NULLAGNUMBER)
-               *agp = 0;
-       return err;
+out_select:
+       ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
+       return 0;
 }
 
 void
index 403226ebb80bb39642839f43709aac4f9d320110..84149ed0e3402a9a2821bdd112f67a54a9885d05 100644 (file)
@@ -9,13 +9,13 @@
 struct xfs_mount;
 struct xfs_inode;
 struct xfs_bmalloca;
+struct xfs_alloc_arg;
 
 int xfs_filestream_mount(struct xfs_mount *mp);
 void xfs_filestream_unmount(struct xfs_mount *mp);
 void xfs_filestream_deassociate(struct xfs_inode *ip);
-xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
-int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
-int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno);
+int xfs_filestream_select_ag(struct xfs_bmalloca *ap,
+               struct xfs_alloc_arg *args, xfs_extlen_t *blen);
 
 static inline int
 xfs_inode_is_filestream(
index 88a88506ffffe4aaeaba676e2b734bd0bce6e7cf..59e7d1a14b67243d118dce25e51e7adc6d58294c 100644 (file)
@@ -688,11 +688,11 @@ __xfs_getfsmap_datadev(
                info->agf_bp = NULL;
        }
        if (info->pag) {
-               xfs_perag_put(info->pag);
+               xfs_perag_rele(info->pag);
                info->pag = NULL;
        } else if (pag) {
                /* loop termination case */
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        }
 
        return error;
@@ -761,6 +761,7 @@ xfs_getfsmap_datadev_bnobt(
 {
        struct xfs_alloc_rec_incore     akeys[2];
 
+       memset(akeys, 0, sizeof(akeys));
        info->missing_owner = XFS_FMR_OWN_UNKNOWN;
        return __xfs_getfsmap_datadev(tp, keys, info,
                        xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
index ddeaccc04aec94a3362efffb617121ca6157e0ba..c9a7e270a4286b14471cfb679af950ca55c53622 100644 (file)
@@ -255,7 +255,7 @@ xfs_perag_set_inode_tag(
                break;
        }
 
-       trace_xfs_perag_set_inode_tag(mp, pag->pag_agno, tag, _RET_IP_);
+       trace_xfs_perag_set_inode_tag(pag, _RET_IP_);
 }
 
 /* Clear a tag on both the AG incore inode tree and the AG radix tree. */
@@ -289,7 +289,7 @@ xfs_perag_clear_inode_tag(
        radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag);
        spin_unlock(&mp->m_perag_lock);
 
-       trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_);
+       trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
 }
 
 /*
@@ -586,7 +586,7 @@ xfs_iget_cache_miss(
        if (!ip)
                return -ENOMEM;
 
-       error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags);
+       error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags);
        if (error)
                goto out_destroy;
 
@@ -1767,7 +1767,7 @@ xfs_icwalk(
                if (error) {
                        last_error = error;
                        if (error == -EFSCORRUPTED) {
-                               xfs_perag_put(pag);
+                               xfs_perag_rele(pag);
                                break;
                        }
                }
index 7f1d715faab527b888100408e7c513c897c9df1c..5808abab786c790d204cf502a6dce1ec6bc309f3 100644 (file)
@@ -1367,7 +1367,7 @@ xfs_itruncate_extents_flags(
 
        unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
        while (unmap_len > 0) {
-               ASSERT(tp->t_firstblock == NULLFSBLOCK);
+               ASSERT(tp->t_highest_agno == NULLAGNUMBER);
                error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
                                flags, XFS_ITRUNC_MAX_EXTENTS);
                if (error)
index 7558486f49371c861e3d38a0d803ef2537fa4e06..21be93bf006dd60438abb181003759f09f6aff53 100644 (file)
@@ -275,7 +275,7 @@ xfs_iwalk_ag_start(
 
        /* Set up a fresh cursor and empty the inobt cache. */
        iwag->nr_recs = 0;
-       error = xfs_inobt_cur(mp, tp, pag, XFS_BTNUM_INO, curpp, agi_bpp);
+       error = xfs_inobt_cur(pag, tp, XFS_BTNUM_INO, curpp, agi_bpp);
        if (error)
                return error;
 
@@ -390,7 +390,7 @@ xfs_iwalk_run_callbacks(
        }
 
        /* ...and recreate the cursor just past where we left off. */
-       error = xfs_inobt_cur(mp, iwag->tp, iwag->pag, XFS_BTNUM_INO, curpp,
+       error = xfs_inobt_cur(iwag->pag, iwag->tp, XFS_BTNUM_INO, curpp,
                        agi_bpp);
        if (error)
                return error;
@@ -591,7 +591,7 @@ xfs_iwalk(
        }
 
        if (iwag.pag)
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        xfs_iwalk_free(&iwag);
        return error;
 }
@@ -683,7 +683,7 @@ xfs_iwalk_threaded(
                        break;
        }
        if (pag)
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        if (polled)
                xfs_pwork_poll(&pctl);
        return xfs_pwork_destroy(&pctl);
@@ -776,7 +776,7 @@ xfs_inobt_walk(
        }
 
        if (iwag.pag)
-               xfs_perag_put(pag);
+               xfs_perag_rele(pag);
        xfs_iwalk_free(&iwag);
        return error;
 }
index 8aca2cc173ac1d363eb6007673688801c0502c88..f3269c0626f0576b669d8881572f1e45ba5cfb80 100644 (file)
@@ -210,8 +210,7 @@ typedef struct xfs_mount {
        struct xfs_error_cfg    m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
        struct xstats           m_stats;        /* per-fs stats */
        xfs_agnumber_t          m_agfrotor;     /* last ag where space found */
-       xfs_agnumber_t          m_agirotor;     /* last ag dir inode alloced */
-       spinlock_t              m_agirotor_lock;/* .. and lock protecting it */
+       atomic_t                m_agirotor;     /* last ag dir inode alloced */
 
        /* Memory shrinker to throttle and reprioritize inodegc */
        struct shrinker         m_inodegc_shrinker;
index 5535778a98f925d0ce158cc64f09c3981fe3670e..f5dc46ce980311676910dd622c329add19ce8ad4 100644 (file)
@@ -610,7 +610,7 @@ xfs_reflink_cancel_cow_blocks(
                        if (error)
                                break;
                } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-                       ASSERT((*tpp)->t_firstblock == NULLFSBLOCK);
+                       ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
 
                        /* Free the CoW orphan record. */
                        xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
@@ -927,7 +927,7 @@ xfs_reflink_recover_cow(
        for_each_perag(mp, agno, pag) {
                error = xfs_refcount_recover_cow_leftovers(mp, pag);
                if (error) {
-                       xfs_perag_put(pag);
+                       xfs_perag_rele(pag);
                        break;
                }
        }
index 0c4b73e9b29d2925063017399fb40bc317e89e0f..2479b5cbd75ecf51850aa0a1c6dfb79b1def8c2b 100644 (file)
@@ -247,6 +247,32 @@ xfs_fs_show_options(
        return 0;
 }
 
+static bool
+xfs_set_inode_alloc_perag(
+       struct xfs_perag        *pag,
+       xfs_ino_t               ino,
+       xfs_agnumber_t          max_metadata)
+{
+       if (!xfs_is_inode32(pag->pag_mount)) {
+               set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+               return false;
+       }
+
+       if (ino > XFS_MAXINUMBER_32) {
+               clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+               return false;
+       }
+
+       set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+       if (pag->pag_agno < max_metadata)
+               set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+       else
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+       return true;
+}
+
 /*
  * Set parameters for inode allocation heuristics, taking into account
  * filesystem size and inode32/inode64 mount options; i.e. specifically
@@ -310,24 +336,8 @@ xfs_set_inode_alloc(
                ino = XFS_AGINO_TO_INO(mp, index, agino);
 
                pag = xfs_perag_get(mp, index);
-
-               if (xfs_is_inode32(mp)) {
-                       if (ino > XFS_MAXINUMBER_32) {
-                               pag->pagi_inodeok = 0;
-                               pag->pagf_metadata = 0;
-                       } else {
-                               pag->pagi_inodeok = 1;
-                               maxagi++;
-                               if (index < max_metadata)
-                                       pag->pagf_metadata = 1;
-                               else
-                                       pag->pagf_metadata = 0;
-                       }
-               } else {
-                       pag->pagi_inodeok = 1;
-                       pag->pagf_metadata = 0;
-               }
-
+               if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
+                       maxagi++;
                xfs_perag_put(pag);
        }
 
@@ -1922,7 +1932,6 @@ static int xfs_init_fs_context(
                return -ENOMEM;
 
        spin_lock_init(&mp->m_sb_lock);
-       spin_lock_init(&mp->m_agirotor_lock);
        INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
        spin_lock_init(&mp->m_perag_lock);
        mutex_init(&mp->m_growlock);
index 6b0e9ae7c513c523f131357330e9e85e24429192..7dc0fd6a6504743389d67c571d97c12086e7b497 100644 (file)
@@ -74,6 +74,7 @@ struct xfs_inobt_rec_incore;
 union xfs_btree_ptr;
 struct xfs_dqtrx;
 struct xfs_icwalk;
+struct xfs_perag;
 
 #define XFS_ATTR_FILTER_FLAGS \
        { XFS_ATTR_ROOT,        "ROOT" }, \
@@ -159,36 +160,40 @@ TRACE_EVENT(xlog_intent_recovery_failed,
 );
 
 DECLARE_EVENT_CLASS(xfs_perag_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agno, refcount, caller_ip),
+       TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip),
+       TP_ARGS(pag, caller_ip),
        TP_STRUCT__entry(
                __field(dev_t, dev)
                __field(xfs_agnumber_t, agno)
                __field(int, refcount)
+               __field(int, active_refcount)
                __field(unsigned long, caller_ip)
        ),
        TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->refcount = refcount;
+               __entry->dev = pag->pag_mount->m_super->s_dev;
+               __entry->agno = pag->pag_agno;
+               __entry->refcount = atomic_read(&pag->pag_ref);
+               __entry->active_refcount = atomic_read(&pag->pag_active_ref);
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d agno 0x%x refcount %d caller %pS",
+       TP_printk("dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->refcount,
+                 __entry->active_refcount,
                  (char *)__entry->caller_ip)
 );
 
 #define DEFINE_PERAG_REF_EVENT(name)   \
 DEFINE_EVENT(xfs_perag_class, name,    \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
-                unsigned long caller_ip),                                      \
-       TP_ARGS(mp, agno, refcount, caller_ip))
+       TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
+       TP_ARGS(pag, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
 DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
+DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
 DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
 
@@ -634,8 +639,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
 
 DECLARE_EVENT_CLASS(xfs_filestream_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno),
-       TP_ARGS(mp, ino, agno),
+       TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+       TP_ARGS(pag, ino),
        TP_STRUCT__entry(
                __field(dev_t, dev)
                __field(xfs_ino_t, ino)
@@ -643,10 +648,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
                __field(int, streams)
        ),
        TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
+               __entry->dev = pag->pag_mount->m_super->s_dev;
                __entry->ino = ino;
-               __entry->agno = agno;
-               __entry->streams = xfs_filestream_peek_ag(mp, agno);
+               __entry->agno = pag->pag_agno;
+               __entry->streams = atomic_read(&pag->pagf_fstrms);
        ),
        TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -656,39 +661,40 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
 )
 #define DEFINE_FILESTREAM_EVENT(name) \
 DEFINE_EVENT(xfs_filestream_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \
-       TP_ARGS(mp, ino, agno))
+       TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), \
+       TP_ARGS(pag, ino))
 DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
 DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
 DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
 
 TRACE_EVENT(xfs_filestream_pick,
-       TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno,
-                xfs_extlen_t free, int nscan),
-       TP_ARGS(ip, agno, free, nscan),
+       TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
+       TP_ARGS(pag, ino, free),
        TP_STRUCT__entry(
                __field(dev_t, dev)
                __field(xfs_ino_t, ino)
                __field(xfs_agnumber_t, agno)
                __field(int, streams)
                __field(xfs_extlen_t, free)
-               __field(int, nscan)
        ),
        TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->agno = agno;
-               __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+               __entry->dev = pag->pag_mount->m_super->s_dev;
+               __entry->ino = ino;
+               if (pag) {
+                       __entry->agno = pag->pag_agno;
+                       __entry->streams = atomic_read(&pag->pagf_fstrms);
+               } else {
+                       __entry->agno = NULLAGNUMBER;
+                       __entry->streams = 0;
+               }
                __entry->free = free;
-               __entry->nscan = nscan;
        ),
-       TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d nscan %d",
+       TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->agno,
                  __entry->streams,
-                 __entry->free,
-                 __entry->nscan)
+                 __entry->free)
 );
 
 DECLARE_EVENT_CLASS(xfs_lock_class,
@@ -1795,13 +1801,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
                __field(xfs_extlen_t, alignment)
                __field(xfs_extlen_t, minalignslop)
                __field(xfs_extlen_t, len)
-               __field(short, type)
-               __field(short, otype)
                __field(char, wasdel)
                __field(char, wasfromfl)
                __field(int, resv)
                __field(int, datatype)
-               __field(xfs_fsblock_t, firstblock)
+               __field(xfs_agnumber_t, highest_agno)
        ),
        TP_fast_assign(
                __entry->dev = args->mp->m_super->s_dev;
@@ -1816,18 +1820,16 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
                __entry->alignment = args->alignment;
                __entry->minalignslop = args->minalignslop;
                __entry->len = args->len;
-               __entry->type = args->type;
-               __entry->otype = args->otype;
                __entry->wasdel = args->wasdel;
                __entry->wasfromfl = args->wasfromfl;
                __entry->resv = args->resv;
                __entry->datatype = args->datatype;
-               __entry->firstblock = args->tp->t_firstblock;
+               __entry->highest_agno = args->tp->t_highest_agno;
        ),
        TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
                  "prod %u minleft %u total %u alignment %u minalignslop %u "
-                 "len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
-                 "datatype 0x%x firstblock 0x%llx",
+                 "len %u wasdel %d wasfromfl %d resv %d "
+                 "datatype 0x%x highest_agno 0x%x",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->agbno,
@@ -1840,13 +1842,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
                  __entry->alignment,
                  __entry->minalignslop,
                  __entry->len,
-                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
-                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
                  __entry->wasdel,
                  __entry->wasfromfl,
                  __entry->resv,
                  __entry->datatype,
-                 (unsigned long long)__entry->firstblock)
+                 __entry->highest_agno)
 )
 
 #define DEFINE_ALLOC_EVENT(name) \
@@ -1877,6 +1877,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
 DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
 DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_skip_deadlock);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
index 7bd16fbff534103752574dfaa9687178200a543c..8afc0c080861d53e379a53d23bd66c1e1b9f3009 100644 (file)
@@ -102,7 +102,7 @@ xfs_trans_dup(
        INIT_LIST_HEAD(&ntp->t_items);
        INIT_LIST_HEAD(&ntp->t_busy);
        INIT_LIST_HEAD(&ntp->t_dfops);
-       ntp->t_firstblock = NULLFSBLOCK;
+       ntp->t_highest_agno = NULLAGNUMBER;
 
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(tp->t_ticket != NULL);
@@ -278,7 +278,7 @@ retry:
        INIT_LIST_HEAD(&tp->t_items);
        INIT_LIST_HEAD(&tp->t_busy);
        INIT_LIST_HEAD(&tp->t_dfops);
-       tp->t_firstblock = NULLFSBLOCK;
+       tp->t_highest_agno = NULLAGNUMBER;
 
        error = xfs_trans_reserve(tp, resp, blocks, rtextents);
        if (error == -ENOSPC && want_retry) {
@@ -1078,10 +1078,10 @@ xfs_trans_cancel(
        /*
         * It's never valid to cancel a transaction with deferred ops attached,
         * because the transaction is effectively dirty.  Complain about this
-        * loudly before freeing the in-memory defer items.
+        * loudly before freeing the in-memory defer items and shutting down the
+        * filesystem.
         */
        if (!list_empty(&tp->t_dfops)) {
-               ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
                ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
                dirty = true;
                xfs_defer_cancel(tp);
index 55819785941cc426d1be9a1422da6083694f8d54..6e3646d524ceb6060bbf7219d367918a03a700f2 100644 (file)
@@ -132,7 +132,7 @@ typedef struct xfs_trans {
        unsigned int            t_rtx_res;      /* # of rt extents resvd */
        unsigned int            t_rtx_res_used; /* # of resvd rt extents used */
        unsigned int            t_flags;        /* misc flags */
-       xfs_fsblock_t           t_firstblock;   /* first block allocated */
+       xfs_agnumber_t          t_highest_agno; /* highest AGF locked */
        struct xlog_ticket      *t_ticket;      /* log mgr ticket */
        struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
        struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
index 03eb3d9770759cc1826e47d8c55621c0f046fbfd..9e4f7564201a01f7e062e2959ff4c5cb9000d999 100644 (file)
  *
  *****************************************************************************/
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #include <acpi/platform/acgcc.h>
 
 #elif defined(_MSC_VER)
 #include "acmsvc.h"
 
-#elif defined(__INTEL_COMPILER)
-#include <acpi/platform/acintel.h>
-
 #endif
 
 #if defined(_LINUX) || defined(__linux__)
index 3a6b1db9a984d5f38173d6fd7b52703195d56c88..72cc7bab469e9f96701d2b3fd3c5cdf9c8e4f822 100644 (file)
@@ -35,7 +35,7 @@
 
 #endif
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #include "acgccex.h"
 
 #elif defined(_MSC_VER)
diff --git a/include/acpi/platform/acintel.h b/include/acpi/platform/acintel.h
deleted file mode 100644 (file)
index 85b1ae8..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
-/******************************************************************************
- *
- * Name: acintel.h - VC specific defines, etc.
- *
- * Copyright (C) 2000 - 2022, Intel Corp.
- *
- *****************************************************************************/
-
-#ifndef __ACINTEL_H__
-#define __ACINTEL_H__
-
-/*
- * Use compiler specific <stdarg.h> is a good practice for even when
- * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined.
- */
-#ifndef va_arg
-#include <stdarg.h>
-#endif
-
-/* Configuration specific to Intel 64-bit C compiler */
-
-#define COMPILER_DEPENDENT_INT64    __int64
-#define COMPILER_DEPENDENT_UINT64   unsigned __int64
-#define ACPI_INLINE                 __inline
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* remark 981 - operands evaluated in no particular order */
-#pragma warning(disable:981)
-
-/* warn C4100: unreferenced formal parameter */
-#pragma warning(disable:4100)
-
-/* warn C4127: conditional expression is constant */
-#pragma warning(disable:4127)
-
-/* warn C4706: assignment within conditional expression */
-#pragma warning(disable:4706)
-
-/* warn C4214: bit field types other than int */
-#pragma warning(disable:4214)
-
-#endif                         /* __ACINTEL_H__ */
index 118bea36d7d49082d5725a9b9d2ecadb40898846..abbc8149178e6b2bf29448f1a5a9dced4f88d286 100644 (file)
                const_bin2bcd(x) :                      \
                _bin2bcd(x))
 
+#define bcd_is_valid(x)                                        \
+               const_bcd_is_valid(x)
+
 #define const_bcd2bin(x)       (((x) & 0x0f) + ((x) >> 4) * 10)
 #define const_bin2bcd(x)       ((((x) / 10) << 4) + (x) % 10)
+#define const_bcd_is_valid(x)  (((x) & 0x0f) < 10 && ((x) >> 4) < 10)
 
 unsigned _bcd2bin(unsigned char val) __attribute_const__;
 unsigned char _bin2bcd(unsigned val) __attribute_const__;
index 779fba613bd0923c89876735e7ba1ddec75fb849..dd5ce1137f04aeed0ce86b732b14d34c36710716 100644 (file)
@@ -473,6 +473,7 @@ enum hctx_type {
 
 /**
  * struct blk_mq_tag_set - tag set that can be shared between request queues
+ * @ops:          Pointers to functions that implement block driver behavior.
  * @map:          One or more ctx -> hctx mappings. One map exists for each
  *                hardware queue type (enum hctx_type) that the driver wishes
  *                to support. There are no restrictions on maps being of the
@@ -480,7 +481,6 @@ enum hctx_type {
  *                types.
  * @nr_maps:      Number of elements in the @map array. A number in the range
  *                [1, HCTX_MAX_TYPES].
- * @ops:          Pointers to functions that implement block driver behavior.
  * @nr_hw_queues:  Number of hardware queues supported by the block driver that
  *                owns this data structure.
  * @queue_depth:   Number of tags per hardware queue, reserved tags included.
@@ -505,9 +505,9 @@ enum hctx_type {
  *                (BLK_MQ_F_BLOCKING).
  */
 struct blk_mq_tag_set {
+       const struct blk_mq_ops *ops;
        struct blk_mq_queue_map map[HCTX_MAX_TYPES];
        unsigned int            nr_maps;
-       const struct blk_mq_ops *ops;
        unsigned int            nr_hw_queues;
        unsigned int            queue_depth;
        unsigned int            reserved_tags;
index 41a41561b77325a5ae5469267d9a305a5df7b58f..d1aee08f8c1811d645856ce4b92e24cb4cd236ab 100644 (file)
@@ -1283,12 +1283,7 @@ static inline bool bdev_nowait(struct block_device *bdev)
 
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-
-       if (q)
-               return blk_queue_zoned_model(q);
-
-       return BLK_ZONED_NONE;
+       return blk_queue_zoned_model(bdev_get_queue(bdev));
 }
 
 static inline bool bdev_is_zoned(struct block_device *bdev)
index 03c2a613ad404a954c75bf59cb032d3a2202bf69..0c356a5179917e9ff1d9015c1398df8e4243e5a3 100644 (file)
 
 #include <uapi/linux/capability.h>
 #include <linux/uidgid.h>
+#include <linux/bits.h>
 
 #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
-#define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
 
 extern int file_caps_enabled;
 
-typedef struct kernel_cap_struct {
-       __u32 cap[_KERNEL_CAPABILITY_U32S];
-} kernel_cap_t;
+typedef struct { u64 val; } kernel_cap_t;
 
 /* same as vfs_ns_cap_data but in cpu endian and always filled completely */
 struct cpu_vfs_cap_data {
        __u32 magic_etc;
+       kuid_t rootid;
        kernel_cap_t permitted;
        kernel_cap_t inheritable;
-       kuid_t rootid;
 };
 
 #define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
 #define _KERNEL_CAP_T_SIZE     (sizeof(kernel_cap_t))
 
-
 struct file;
 struct inode;
 struct dentry;
@@ -44,16 +41,6 @@ struct task_struct;
 struct user_namespace;
 struct mnt_idmap;
 
-extern const kernel_cap_t __cap_empty_set;
-extern const kernel_cap_t __cap_init_eff_set;
-
-/*
- * Internal kernel functions only
- */
-
-#define CAP_FOR_EACH_U32(__capi)  \
-       for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi)
-
 /*
  * CAP_FS_MASK and CAP_NFSD_MASKS:
  *
@@ -67,94 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set;
  *   2. The security.* and trusted.* xattrs are fs-related MAC permissions
  */
 
-# define CAP_FS_MASK_B0     (CAP_TO_MASK(CAP_CHOWN)            \
-                           | CAP_TO_MASK(CAP_MKNOD)            \
-                           | CAP_TO_MASK(CAP_DAC_OVERRIDE)     \
-                           | CAP_TO_MASK(CAP_DAC_READ_SEARCH)  \
-                           | CAP_TO_MASK(CAP_FOWNER)           \
-                           | CAP_TO_MASK(CAP_FSETID))
-
-# define CAP_FS_MASK_B1     (CAP_TO_MASK(CAP_MAC_OVERRIDE))
+# define CAP_FS_MASK     (BIT_ULL(CAP_CHOWN)           \
+                       | BIT_ULL(CAP_MKNOD)            \
+                       | BIT_ULL(CAP_DAC_OVERRIDE)     \
+                       | BIT_ULL(CAP_DAC_READ_SEARCH)  \
+                       | BIT_ULL(CAP_FOWNER)           \
+                       | BIT_ULL(CAP_FSETID)           \
+                       | BIT_ULL(CAP_MAC_OVERRIDE))
+#define CAP_VALID_MASK  (BIT_ULL(CAP_LAST_CAP+1)-1)
 
-#if _KERNEL_CAPABILITY_U32S != 2
-# error Fix up hand-coded capability macro initializers
-#else /* HAND-CODED capability initializers */
+# define CAP_EMPTY_SET    ((kernel_cap_t) { 0 })
+# define CAP_FULL_SET     ((kernel_cap_t) { CAP_VALID_MASK })
+# define CAP_FS_SET       ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) })
+# define CAP_NFSD_SET     ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) })
 
-#define CAP_LAST_U32                   ((_KERNEL_CAPABILITY_U32S) - 1)
-#define CAP_LAST_U32_VALID_MASK                (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
+# define cap_clear(c)         do { (c).val = 0; } while (0)
 
-# define CAP_EMPTY_SET    ((kernel_cap_t){{ 0, 0 }})
-# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
-# define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0 \
-                                   | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
-                                   CAP_FS_MASK_B1 } })
-# define CAP_NFSD_SET     ((kernel_cap_t){{ CAP_FS_MASK_B0 \
-                                   | CAP_TO_MASK(CAP_SYS_RESOURCE), \
-                                   CAP_FS_MASK_B1 } })
-
-#endif /* _KERNEL_CAPABILITY_U32S != 2 */
-
-# define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
-
-#define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
-#define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
-#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))
-
-#define CAP_BOP_ALL(c, a, b, OP)                                    \
-do {                                                                \
-       unsigned __capi;                                            \
-       CAP_FOR_EACH_U32(__capi) {                                  \
-               c.cap[__capi] = a.cap[__capi] OP b.cap[__capi];     \
-       }                                                           \
-} while (0)
-
-#define CAP_UOP_ALL(c, a, OP)                                       \
-do {                                                                \
-       unsigned __capi;                                            \
-       CAP_FOR_EACH_U32(__capi) {                                  \
-               c.cap[__capi] = OP a.cap[__capi];                   \
-       }                                                           \
-} while (0)
+#define cap_raise(c, flag)  ((c).val |= BIT_ULL(flag))
+#define cap_lower(c, flag)  ((c).val &= ~BIT_ULL(flag))
+#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0)
 
 static inline kernel_cap_t cap_combine(const kernel_cap_t a,
                                       const kernel_cap_t b)
 {
-       kernel_cap_t dest;
-       CAP_BOP_ALL(dest, a, b, |);
-       return dest;
+       return (kernel_cap_t) { a.val | b.val };
 }
 
 static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
                                         const kernel_cap_t b)
 {
-       kernel_cap_t dest;
-       CAP_BOP_ALL(dest, a, b, &);
-       return dest;
+       return (kernel_cap_t) { a.val & b.val };
 }
 
 static inline kernel_cap_t cap_drop(const kernel_cap_t a,
                                    const kernel_cap_t drop)
 {
-       kernel_cap_t dest;
-       CAP_BOP_ALL(dest, a, drop, &~);
-       return dest;
+       return (kernel_cap_t) { a.val &~ drop.val };
 }
 
-static inline kernel_cap_t cap_invert(const kernel_cap_t c)
+static inline bool cap_isclear(const kernel_cap_t a)
 {
-       kernel_cap_t dest;
-       CAP_UOP_ALL(dest, c, ~);
-       return dest;
+       return !a.val;
 }
 
-static inline bool cap_isclear(const kernel_cap_t a)
+static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
 {
-       unsigned __capi;
-       CAP_FOR_EACH_U32(__capi) {
-               if (a.cap[__capi] != 0)
-                       return false;
-       }
-       return true;
+       return a.val == b.val;
 }
 
 /*
@@ -166,39 +111,31 @@ static inline bool cap_isclear(const kernel_cap_t a)
  */
 static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
 {
-       kernel_cap_t dest;
-       dest = cap_drop(a, set);
-       return cap_isclear(dest);
+       return !(a.val & ~set.val);
 }
 
 /* Used to decide between falling back on the old suser() or fsuser(). */
 
 static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
 {
-       const kernel_cap_t __cap_fs_set = CAP_FS_SET;
-       return cap_drop(a, __cap_fs_set);
+       return cap_drop(a, CAP_FS_SET);
 }
 
 static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
                                            const kernel_cap_t permitted)
 {
-       const kernel_cap_t __cap_fs_set = CAP_FS_SET;
-       return cap_combine(a,
-                          cap_intersect(permitted, __cap_fs_set));
+       return cap_combine(a, cap_intersect(permitted, CAP_FS_SET));
 }
 
 static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
 {
-       const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
-       return cap_drop(a, __cap_fs_set);
+       return cap_drop(a, CAP_NFSD_SET);
 }
 
 static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
                                              const kernel_cap_t permitted)
 {
-       const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
-       return cap_combine(a,
-                          cap_intersect(permitted, __cap_nfsd_set));
+       return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET));
 }
 
 #ifdef CONFIG_MULTIUSER
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
deleted file mode 100644 (file)
index b17f3cd..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_COMPILER_TYPES_H
-#error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead."
-#endif
-
-#ifdef __ECC
-
-/* Compiler specific definitions for Intel ECC compiler */
-
-#include <asm/intrinsics.h>
-
-/* Intel ECC compiler doesn't support gcc specific asm stmts.
- * It uses intrinsics to do the equivalent things.
- */
-
-#define barrier() __memory_barrier()
-#define barrier_data(ptr) barrier()
-
-#define RELOC_HIDE(ptr, off)                                   \
-  ({ unsigned long __ptr;                                      \
-     __ptr = (unsigned long) (ptr);                            \
-    (typeof(ptr)) (__ptr + (off)); })
-
-/* This should act as an optimization barrier on var.
- * Given that this compiler does not have inline assembly, a compiler barrier
- * is the best we can do.
- */
-#define OPTIMIZER_HIDE_VAR(var) barrier()
-
-#endif
-
-/* icc has this, but it's called _bswap16 */
-#define __HAVE_BUILTIN_BSWAP16__
-#define __builtin_bswap16 _bswap16
index 4a3bd114a24facea243d3c7cf1db815f55890ab7..e659cb6fded393e191535c8e8176b92f0120c424 100644 (file)
  * compiler should see some alignment anyway, when the return value is
  * massaged by 'flags = ptr & 3; ptr &= ~3;').
  *
- * Optional: not supported by icc
- *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
  * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
  */
-#if __has_attribute(__assume_aligned__)
-# define __assume_aligned(a, ...)       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#else
-# define __assume_aligned(a, ...)
-#endif
+#define __assume_aligned(a, ...)        __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
 
 /*
  * Note the long name.
@@ -85,7 +79,6 @@
 /*
  * Optional: only supported since gcc >= 9
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute
  */
@@ -98,7 +91,6 @@
 /*
  * Optional: not supported by gcc
  * Optional: only supported since clang >= 14.0
- * Optional: not supported by icc
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin
  */
 
 /*
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
  */
 /*
  * Optional: only supported since gcc >= 8
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
  */
 
 /*
  * Optional: not supported by gcc.
- * Optional: not supported by icc.
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable
  */
  * Note: the "type" argument should match any __builtin_object_size(p, type) usage.
  *
  * Optional: not supported by gcc.
- * Optional: not supported by icc.
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size
  */
index 690c7c826fbfd738298f7596d59952c66167f61b..547ea1ff806eb0ca880ad40b25bc5ae480a52c23 100644 (file)
@@ -120,8 +120,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
-#elif defined(__INTEL_COMPILER)
-#include <linux/compiler-intel.h>
 #elif defined(__GNUC__)
 /* The above compilers also define __GNUC__, so order is important here. */
 #include <linux/compiler-gcc.h>
index 10c92bd9b8070a5e3c6e006502c4405b89c150af..63d637d18e7990961948b23d13a4ad11d7406e3f 100644 (file)
@@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr)
 #endif
 }
 
-/* Deprecated. Always use nr_cpu_ids. */
-#define nr_cpumask_bits        nr_cpu_ids
+/*
+ * We have several different "preferred sizes" for the cpumask
+ * operations, depending on operation.
+ *
+ * For example, the bitmap scanning and operating operations have
+ * optimized routines that work for the single-word case, but only when
+ * the size is constant. So if NR_CPUS fits in one single word, we are
+ * better off using that small constant, in order to trigger the
+ * optimized bit finding. That is 'small_cpumask_size'.
+ *
+ * The clearing and copying operations will similarly perform better
+ * with a constant size, but we limit that size arbitrarily to four
+ * words. We call this 'large_cpumask_size'.
+ *
+ * Finally, some operations just want the exact limit, either because
+ * they set bits or just don't have any faster fixed-sized versions. We
+ * call this just 'nr_cpumask_bits'.
+ *
+ * Note that these optional constants are always guaranteed to be at
+ * least as big as 'nr_cpu_ids' itself is, and all our cpumask
+ * allocations are at least that size (see cpumask_size()). The
+ * optimization comes from being able to potentially use a compile-time
+ * constant instead of a run-time generated exact number of CPUs.
+ */
+#if NR_CPUS <= BITS_PER_LONG
+  #define small_cpumask_bits ((unsigned int)NR_CPUS)
+  #define large_cpumask_bits ((unsigned int)NR_CPUS)
+#elif NR_CPUS <= 4*BITS_PER_LONG
+  #define small_cpumask_bits nr_cpu_ids
+  #define large_cpumask_bits ((unsigned int)NR_CPUS)
+#else
+  #define small_cpumask_bits nr_cpu_ids
+  #define large_cpumask_bits nr_cpu_ids
+#endif
+#define nr_cpumask_bits nr_cpu_ids
 
 /*
  * The following particular system cpumasks and operations manage
@@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
  */
 static inline unsigned int cpumask_first(const struct cpumask *srcp)
 {
-       return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
+       return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
  */
 static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 {
-       return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits);
+       return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 static inline
 unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
 {
-       return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+       return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
 
 /**
@@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
  */
 static inline unsigned int cpumask_last(const struct cpumask *srcp)
 {
-       return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
+       return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
        /* -1 is a legal arg here. */
        if (n != -1)
                cpumask_check(n);
-       return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
+       return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1);
 }
 
 /**
@@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
        /* -1 is a legal arg here. */
        if (n != -1)
                cpumask_check(n);
-       return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+       return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1);
 }
 
 #if NR_CPUS == 1
@@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
        if (n != -1)
                cpumask_check(n);
        return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
-               nr_cpumask_bits, n + 1);
+               small_cpumask_bits, n + 1);
 }
 
 /**
@@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu(cpu, mask)                                \
-       for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
-
-/**
- * for_each_cpu_not - iterate over every cpu in a complemented mask
- * @cpu: the (optionally unsigned) integer iterator
- * @mask: the cpumask pointer
- *
- * After the loop, cpu is >= nr_cpu_ids.
- */
-#define for_each_cpu_not(cpu, mask)                            \
-       for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
+       for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
 
 #if NR_CPUS == 1
 static inline
@@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_wrap(cpu, mask, start)                            \
-       for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start)
+       for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start)
 
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
@@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_and(cpu, mask1, mask2)                            \
-       for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
+       for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
 
 /**
  * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
@@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_andnot(cpu, mask1, mask2)                         \
-       for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
+       for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
 
 /**
  * cpumask_any_but - return a "random" in a cpumask, but not this one.
@@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  */
 static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
 {
-       return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu));
+       return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
                                                        const struct cpumask *srcp2)
 {
        return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
-                               nr_cpumask_bits, cpumask_check(cpu));
+                               small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
                                                        const struct cpumask *srcp2)
 {
        return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
-                               nr_cpumask_bits, cpumask_check(cpu));
+                               small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
        return find_nth_and_andnot_bit(cpumask_bits(srcp1),
                                        cpumask_bits(srcp2),
                                        cpumask_bits(srcp3),
-                                       nr_cpumask_bits, cpumask_check(cpu));
+                                       small_cpumask_bits, cpumask_check(cpu));
 }
 
 #define CPU_BITS_NONE                                          \
@@ -498,6 +521,10 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
  */
 static inline void cpumask_setall(struct cpumask *dstp)
 {
+       if (small_const_nbits(small_cpumask_bits)) {
+               cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
+               return;
+       }
        bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
 }
 
@@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
  */
 static inline void cpumask_clear(struct cpumask *dstp)
 {
-       bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits);
+       bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
 }
 
 /**
@@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp,
                               const struct cpumask *src2p)
 {
        return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
-                                      cpumask_bits(src2p), nr_cpumask_bits);
+                                      cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
                              const struct cpumask *src2p)
 {
        bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
-                                     cpumask_bits(src2p), nr_cpumask_bits);
+                                     cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp,
                               const struct cpumask *src2p)
 {
        bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
-                                      cpumask_bits(src2p), nr_cpumask_bits);
+                                      cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
                                  const struct cpumask *src2p)
 {
        return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
-                                         cpumask_bits(src2p), nr_cpumask_bits);
-}
-
-/**
- * cpumask_complement - *dstp = ~*srcp
- * @dstp: the cpumask result
- * @srcp: the input to invert
- */
-static inline void cpumask_complement(struct cpumask *dstp,
-                                     const struct cpumask *srcp)
-{
-       bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp),
-                                             nr_cpumask_bits);
+                                         cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
                                const struct cpumask *src2p)
 {
        return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
-                                                nr_cpumask_bits);
+                                                small_cpumask_bits);
 }
 
 /**
@@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
                                    const struct cpumask *src3p)
 {
        return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
-                              cpumask_bits(src3p), nr_cpumask_bits);
+                              cpumask_bits(src3p), small_cpumask_bits);
 }
 
 /**
@@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
                                     const struct cpumask *src2p)
 {
        return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
-                                                     nr_cpumask_bits);
+                                                     small_cpumask_bits);
 }
 
 /**
@@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
                                 const struct cpumask *src2p)
 {
        return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
-                                                 nr_cpumask_bits);
+                                                 small_cpumask_bits);
 }
 
 /**
@@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
  */
 static inline bool cpumask_empty(const struct cpumask *srcp)
 {
-       return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits);
+       return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
  */
 static inline unsigned int cpumask_weight(const struct cpumask *srcp)
 {
-       return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
+       return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
 static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
                                                const struct cpumask *srcp2)
 {
-       return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+       return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
 
 /**
@@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
                                       const struct cpumask *srcp, int n)
 {
        bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
-                                              nr_cpumask_bits);
+                                              small_cpumask_bits);
 }
 
 /**
@@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
 static inline void cpumask_copy(struct cpumask *dstp,
                                const struct cpumask *srcp)
 {
-       bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
+       bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
 
 /**
@@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
  */
 static inline unsigned int cpumask_size(void)
 {
-       return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long);
+       return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long);
 }
 
 /*
index ee0d75d9a302d47649c79c7dcb4c24abd0bbd547..1701f25117ea4af5df4cf0202eeb2bc5175db138 100644 (file)
@@ -315,7 +315,7 @@ struct f2fs_inode {
                        __u8 i_log_cluster_size;        /* log of cluster size */
                        __le16 i_compress_flag;         /* compress flag */
                                                /* 0 bit: chksum flag
-                                                * [10,15] bits: compress level
+                                                * [8,15] bits: compress level
                                                 */
                        __le32 i_extra_end[0];  /* for attribute size calculation */
                } __packed;
index eaf8ab1773033214f59d556e4ab686300a51e8d1..1ea8c7a3570b2880aeb353d9707ad4f9a7df1ca4 100644 (file)
@@ -834,6 +834,7 @@ struct hid_driver {
  * @output_report: send output report to device
  * @idle: send idle request to device
  * @may_wakeup: return if device may act as a wakeup source during system-suspend
+ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
  */
 struct hid_ll_driver {
        int (*start)(struct hid_device *hdev);
@@ -859,6 +860,8 @@ struct hid_ll_driver {
 
        int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
        bool (*may_wakeup)(struct hid_device *hdev);
+
+       unsigned int max_buffer_size;
 };
 
 extern bool hid_is_usb(const struct hid_device *hdev);
index 0efe4d784358284cf31e165e37f06f70c06e49f2..00689c12f6abb3f4b9c39f7d95905f843d7570dd 100644 (file)
@@ -58,7 +58,7 @@ struct io_uring_task {
 
        struct xarray                   xa;
        struct wait_queue_head          wait;
-       atomic_t                        in_idle;
+       atomic_t                        in_cancel;
        atomic_t                        inflight_tracked;
        struct percpu_counter           inflight;
 
index 13c9b74a4575aa61589b16635a796aad094baa3e..cdb14a1ef26810c26a54b94efb3b281996bc7b5e 100644 (file)
@@ -635,6 +635,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
                            int nvec, msi_alloc_info_t *args);
 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
                             int virq, int nvec, msi_alloc_info_t *args);
+void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
+
 struct irq_domain *
 __platform_msi_create_device_domain(struct device *dev,
                                    unsigned int nvec,
index 7d48ea368c5e57c5361da337d01896c20fe5e06e..a529347fd75b2a065f618d45771b27a7bd163f80 100644 (file)
@@ -110,6 +110,7 @@ struct ubi_volume_info {
        int name_len;
        const char *name;
        dev_t cdev;
+       struct device *dev;
 };
 
 /**
index 62c54ffbeeaacc74aa9e52697ab11d7deec92d82..9ac3df3fccf0118412f1139521e5c954f0d890d9 100644 (file)
@@ -15,6 +15,7 @@ struct unwind_hint {
        s16             sp_offset;
        u8              sp_reg;
        u8              type;
+       u8              signal;
        u8              end;
 };
 #endif
@@ -49,7 +50,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)              \
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end)      \
        "987: \n\t"                                             \
        ".pushsection .discard.unwind_hints\n\t"                \
        /* struct unwind_hint */                                \
@@ -57,6 +58,7 @@ struct unwind_hint {
        ".short " __stringify(sp_offset) "\n\t"                 \
        ".byte " __stringify(sp_reg) "\n\t"                     \
        ".byte " __stringify(type) "\n\t"                       \
+       ".byte " __stringify(signal) "\n\t"                     \
        ".byte " __stringify(end) "\n\t"                        \
        ".balign 4 \n\t"                                        \
        ".popsection\n\t"
@@ -129,7 +131,7 @@ struct unwind_hint {
  * the debuginfo as necessary.  It will also warn if it sees any
  * inconsistencies.
  */
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .Lunwind_hint_ip_\@:
        .pushsection .discard.unwind_hints
                /* struct unwind_hint */
@@ -137,6 +139,7 @@ struct unwind_hint {
                .short \sp_offset
                .byte \sp_reg
                .byte \type
+               .byte \signal
                .byte \end
                .balign 4
        .popsection
@@ -174,7 +177,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)      \
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \
        "\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
 #define STACK_FRAME_NON_STANDARD_FP(func)
@@ -182,7 +185,7 @@ struct unwind_hint {
 #define ASM_REACHABLE
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .endm
 .macro STACK_FRAME_NON_STANDARD func:req
 .endm
index 161e91167b9c01de182731803d1d5e898ccd5e68..7b7b93b6fb81ad7138fa43750783822832da6035 100644 (file)
@@ -440,13 +440,6 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
        return -EINVAL;
 }
 
-static inline int pwm_capture(struct pwm_device *pwm,
-                             struct pwm_capture *result,
-                             unsigned long timeout)
-{
-       return -EINVAL;
-}
-
 static inline int pwm_enable(struct pwm_device *pwm)
 {
        might_sleep();
@@ -458,6 +451,13 @@ static inline void pwm_disable(struct pwm_device *pwm)
        might_sleep();
 }
 
+static inline int pwm_capture(struct pwm_device *pwm,
+                             struct pwm_capture *result,
+                             unsigned long timeout)
+{
+       return -EINVAL;
+}
+
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
 {
        return -EINVAL;
index a4570da03e58aa0c00d28de57bb006f3e8c2ef97..b87d0166041273d0e3dbc466b1b296f2cb49073f 100644 (file)
@@ -94,7 +94,7 @@ enum ttu_flags {
        TTU_SPLIT_HUGE_PMD      = 0x4,  /* split huge PMD if any */
        TTU_IGNORE_MLOCK        = 0x8,  /* ignore mlock */
        TTU_SYNC                = 0x10, /* avoid racy checks with PVMW_SYNC */
-       TTU_IGNORE_HWPOISON     = 0x20, /* corrupted page is recoverable */
+       TTU_HWPOISON            = 0x20, /* do convert pte to hwpoison entry */
        TTU_BATCH_FLUSH         = 0x40, /* Batch TLB flushes where possible
                                         * and caller guarantees they will
                                         * do a final flush if necessary */
index c255273b02810ebc1b9fcd9e517f87478bda2636..37ad81058d6aedfd86667e2f54e8e8b401fbff8f 100644 (file)
@@ -97,7 +97,10 @@ struct intc_hw_desc {
        unsigned int nr_subgroups;
 };
 
-#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
+#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a,                 \
+                                 typeof(NULL):  0,           \
+                                 default:       sizeof(a)))
+#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a)
 
 #define INTC_HW_DESC(vectors, groups, mask_regs,       \
                     prio_regs, sense_regs, ack_regs)   \
index d517bfac937b0a2bda248f594750fbb0c9a082ab..41c57b8b167147bb4544a9bdcb92e7a61459fc40 100644 (file)
@@ -428,12 +428,18 @@ MAX_XDP_METADATA_KFUNC,
 #ifdef CONFIG_NET
 u32 bpf_xdp_metadata_kfunc_id(int id);
 bool bpf_dev_bound_kfunc_id(u32 btf_id);
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val);
 void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg);
 void xdp_features_clear_redirect_target(struct net_device *dev);
 #else
 static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
 static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
 
+static inline void
+xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
+{
+}
+
 static inline void
 xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
 {
@@ -445,4 +451,9 @@ xdp_features_clear_redirect_target(struct net_device *dev)
 }
 #endif
 
+static inline void xdp_clear_features_flag(struct net_device *dev)
+{
+       xdp_set_features_flag(dev, 0);
+}
+
 #endif /* __LINUX_NET_XDP_H__ */
index 7e95ec45138fe53e1d5bbbc382ac39aae2cdc53d..de310f21406c546f7d8d0819e56ef4a9a4ec7ea4 100644 (file)
@@ -462,6 +462,7 @@ struct scsi_exec_args {
        unsigned int sense_len;         /* sense buffer len */
        struct scsi_sense_hdr *sshdr;   /* decoded sense header */
        blk_mq_req_flags_t req_flags;   /* BLK_MQ_REQ flags */
+       int scmd_flags;                 /* SCMD flags */
        int *resid;                     /* residual length */
 };
 
index 3dcda19d35202538e63fe028bead4597bb2e8ddb..483513c575976c48e9b89a4f6eac67c9c8e10827 100644 (file)
@@ -496,6 +496,7 @@ enum fc_host_event_code  {
        FCH_EVT_PORT_FABRIC             = 0x204,
        FCH_EVT_LINK_UNKNOWN            = 0x500,
        FCH_EVT_LINK_FPIN               = 0x501,
+       FCH_EVT_LINK_FPIN_ACK           = 0x502,
        FCH_EVT_VENDOR_UNIQUE           = 0xffff,
 };
 
@@ -856,7 +857,8 @@ void fc_host_post_fc_event(struct Scsi_Host *shost, u32 event_number,
         * Note: when calling fc_host_post_fc_event(), vendor_id may be
         *   specified as 0.
         */
-void fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf);
+void fc_host_fpin_rcv(struct Scsi_Host *shost, u32 fpin_len, char *fpin_buf,
+               u8 event_acknowledge);
 struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel,
                struct fc_vport_identifiers *);
 int fc_vport_terminate(struct fc_vport *vport);
index 3203d35bc8c157326c4b38d04adc7be666964cb2..0814ed1438640988848f8146957973cceabe8aed 100644 (file)
@@ -190,6 +190,8 @@ struct snd_soc_component_driver {
        bool use_dai_pcm_id;    /* use DAI link PCM ID as PCM device number */
        int be_pcm_base;        /* base device ID for all BE PCMs */
 
+       unsigned int start_dma_last;
+
 #ifdef CONFIG_DEBUG_FS
        const char *debugfs_prefix;
 #endif
index 31d994e6b4ca9fcc811675f1c0f04f7c00d314ac..1322d34a5dfc17cdcdc26eee30b0c497a4a4fe91 100644 (file)
@@ -569,10 +569,10 @@ TRACE_EVENT(f2fs_file_write_iter,
 );
 
 TRACE_EVENT(f2fs_map_blocks,
-       TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
-                               int create, int flag, int ret),
+       TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int flag,
+                int ret),
 
-       TP_ARGS(inode, map, create, flag, ret),
+       TP_ARGS(inode, map, flag, ret),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
@@ -584,7 +584,6 @@ TRACE_EVENT(f2fs_map_blocks,
                __field(int,    m_seg_type)
                __field(bool,   m_may_create)
                __field(bool,   m_multidev_dio)
-               __field(int,    create)
                __field(int,    flag)
                __field(int,    ret)
        ),
@@ -599,7 +598,6 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->m_seg_type     = map->m_seg_type;
                __entry->m_may_create   = map->m_may_create;
                __entry->m_multidev_dio = map->m_multidev_dio;
-               __entry->create         = create;
                __entry->flag           = flag;
                __entry->ret            = ret;
        ),
@@ -607,7 +605,7 @@ TRACE_EVENT(f2fs_map_blocks,
        TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
                "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
                "seg_type = %d, may_create = %d, multidevice = %d, "
-               "create = %d, flag = %d, err = %d",
+               "flag = %d, err = %d",
                show_dev_ino(__entry),
                (unsigned long long)__entry->m_lblk,
                (unsigned long long)__entry->m_pblk,
@@ -616,7 +614,6 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->m_seg_type,
                __entry->m_may_create,
                __entry->m_multidev_dio,
-               __entry->create,
                __entry->flag,
                __entry->ret)
 );
@@ -1293,6 +1290,43 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
        TP_ARGS(page, type)
 );
 
+TRACE_EVENT(f2fs_replace_atomic_write_block,
+
+       TP_PROTO(struct inode *inode, struct inode *cow_inode, pgoff_t index,
+                       block_t old_addr, block_t new_addr, bool recovery),
+
+       TP_ARGS(inode, cow_inode, index, old_addr, new_addr, recovery),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(ino_t,  ino)
+               __field(ino_t,  cow_ino)
+               __field(pgoff_t, index)
+               __field(block_t, old_addr)
+               __field(block_t, new_addr)
+               __field(bool, recovery)
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->cow_ino        = cow_inode->i_ino;
+               __entry->index          = index;
+               __entry->old_addr       = old_addr;
+               __entry->new_addr       = new_addr;
+               __entry->recovery       = recovery;
+       ),
+
+       TP_printk("dev = (%d,%d), ino = %lu, cow_ino = %lu, index = %lu, "
+                       "old_addr = 0x%llx, new_addr = 0x%llx, recovery = %d",
+               show_dev_ino(__entry),
+               __entry->cow_ino,
+               (unsigned long)__entry->index,
+               (unsigned long long)__entry->old_addr,
+               (unsigned long long)__entry->new_addr,
+               __entry->recovery)
+);
+
 TRACE_EVENT(f2fs_filemap_fault,
 
        TP_PROTO(struct inode *inode, pgoff_t index, unsigned long ret),
@@ -1975,7 +2009,7 @@ TRACE_EVENT(f2fs_iostat,
                __entry->fs_cdrio       = iostat[FS_CDATA_READ_IO];
                __entry->fs_nrio        = iostat[FS_NODE_READ_IO];
                __entry->fs_mrio        = iostat[FS_META_READ_IO];
-               __entry->fs_discard     = iostat[FS_DISCARD];
+               __entry->fs_discard     = iostat[FS_DISCARD_IO];
        ),
 
        TP_printk("dev = (%d,%d), "
@@ -2048,33 +2082,33 @@ TRACE_EVENT(f2fs_iostat_latency,
 
        TP_fast_assign(
                __entry->dev            = sbi->sb->s_dev;
-               __entry->d_rd_peak      = iostat_lat[0][DATA].peak_lat;
-               __entry->d_rd_avg       = iostat_lat[0][DATA].avg_lat;
-               __entry->d_rd_cnt       = iostat_lat[0][DATA].cnt;
-               __entry->n_rd_peak      = iostat_lat[0][NODE].peak_lat;
-               __entry->n_rd_avg       = iostat_lat[0][NODE].avg_lat;
-               __entry->n_rd_cnt       = iostat_lat[0][NODE].cnt;
-               __entry->m_rd_peak      = iostat_lat[0][META].peak_lat;
-               __entry->m_rd_avg       = iostat_lat[0][META].avg_lat;
-               __entry->m_rd_cnt       = iostat_lat[0][META].cnt;
-               __entry->d_wr_s_peak    = iostat_lat[1][DATA].peak_lat;
-               __entry->d_wr_s_avg     = iostat_lat[1][DATA].avg_lat;
-               __entry->d_wr_s_cnt     = iostat_lat[1][DATA].cnt;
-               __entry->n_wr_s_peak    = iostat_lat[1][NODE].peak_lat;
-               __entry->n_wr_s_avg     = iostat_lat[1][NODE].avg_lat;
-               __entry->n_wr_s_cnt     = iostat_lat[1][NODE].cnt;
-               __entry->m_wr_s_peak    = iostat_lat[1][META].peak_lat;
-               __entry->m_wr_s_avg     = iostat_lat[1][META].avg_lat;
-               __entry->m_wr_s_cnt     = iostat_lat[1][META].cnt;
-               __entry->d_wr_as_peak   = iostat_lat[2][DATA].peak_lat;
-               __entry->d_wr_as_avg    = iostat_lat[2][DATA].avg_lat;
-               __entry->d_wr_as_cnt    = iostat_lat[2][DATA].cnt;
-               __entry->n_wr_as_peak   = iostat_lat[2][NODE].peak_lat;
-               __entry->n_wr_as_avg    = iostat_lat[2][NODE].avg_lat;
-               __entry->n_wr_as_cnt    = iostat_lat[2][NODE].cnt;
-               __entry->m_wr_as_peak   = iostat_lat[2][META].peak_lat;
-               __entry->m_wr_as_avg    = iostat_lat[2][META].avg_lat;
-               __entry->m_wr_as_cnt    = iostat_lat[2][META].cnt;
+               __entry->d_rd_peak      = iostat_lat[READ_IO][DATA].peak_lat;
+               __entry->d_rd_avg       = iostat_lat[READ_IO][DATA].avg_lat;
+               __entry->d_rd_cnt       = iostat_lat[READ_IO][DATA].cnt;
+               __entry->n_rd_peak      = iostat_lat[READ_IO][NODE].peak_lat;
+               __entry->n_rd_avg       = iostat_lat[READ_IO][NODE].avg_lat;
+               __entry->n_rd_cnt       = iostat_lat[READ_IO][NODE].cnt;
+               __entry->m_rd_peak      = iostat_lat[READ_IO][META].peak_lat;
+               __entry->m_rd_avg       = iostat_lat[READ_IO][META].avg_lat;
+               __entry->m_rd_cnt       = iostat_lat[READ_IO][META].cnt;
+               __entry->d_wr_s_peak    = iostat_lat[WRITE_SYNC_IO][DATA].peak_lat;
+               __entry->d_wr_s_avg     = iostat_lat[WRITE_SYNC_IO][DATA].avg_lat;
+               __entry->d_wr_s_cnt     = iostat_lat[WRITE_SYNC_IO][DATA].cnt;
+               __entry->n_wr_s_peak    = iostat_lat[WRITE_SYNC_IO][NODE].peak_lat;
+               __entry->n_wr_s_avg     = iostat_lat[WRITE_SYNC_IO][NODE].avg_lat;
+               __entry->n_wr_s_cnt     = iostat_lat[WRITE_SYNC_IO][NODE].cnt;
+               __entry->m_wr_s_peak    = iostat_lat[WRITE_SYNC_IO][META].peak_lat;
+               __entry->m_wr_s_avg     = iostat_lat[WRITE_SYNC_IO][META].avg_lat;
+               __entry->m_wr_s_cnt     = iostat_lat[WRITE_SYNC_IO][META].cnt;
+               __entry->d_wr_as_peak   = iostat_lat[WRITE_ASYNC_IO][DATA].peak_lat;
+               __entry->d_wr_as_avg    = iostat_lat[WRITE_ASYNC_IO][DATA].avg_lat;
+               __entry->d_wr_as_cnt    = iostat_lat[WRITE_ASYNC_IO][DATA].cnt;
+               __entry->n_wr_as_peak   = iostat_lat[WRITE_ASYNC_IO][NODE].peak_lat;
+               __entry->n_wr_as_avg    = iostat_lat[WRITE_ASYNC_IO][NODE].avg_lat;
+               __entry->n_wr_as_cnt    = iostat_lat[WRITE_ASYNC_IO][NODE].cnt;
+               __entry->m_wr_as_peak   = iostat_lat[WRITE_ASYNC_IO][META].peak_lat;
+               __entry->m_wr_as_avg    = iostat_lat[WRITE_ASYNC_IO][META].avg_lat;
+               __entry->m_wr_as_cnt    = iostat_lat[WRITE_ASYNC_IO][META].cnt;
        ),
 
        TP_printk("dev = (%d,%d), "
index 973af6d066260a1dbf8252746494b90236903bb4..b6eb90df5d052d997f09c55b276e62b198ee36a8 100644 (file)
@@ -715,6 +715,7 @@ struct drm_amdgpu_cs_chunk_data {
 #define AMDGPU_IDS_FLAGS_FUSION         0x1
 #define AMDGPU_IDS_FLAGS_PREEMPTION     0x2
 #define AMDGPU_IDS_FLAGS_TMZ            0x4
+#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
 
 /* indicate if acceleration can be working */
 #define AMDGPU_INFO_ACCEL_WORKING              0x00
@@ -1115,6 +1116,16 @@ struct drm_amdgpu_info_device {
        __u64 tcc_disabled_mask;
        __u64 min_engine_clock;
        __u64 min_memory_clock;
+       /* The following fields are only set on gfx11+, older chips set 0. */
+       __u32 tcp_cache_size;       /* AKA GL0, VMEM cache */
+       __u32 num_sqc_per_wgp;
+       __u32 sqc_data_cache_size;  /* AKA SMEM cache */
+       __u32 sqc_inst_cache_size;
+       __u32 gl1c_cache_size;
+       __u32 gl2c_cache_size;
+       __u64 mall_size;            /* AKA infinity cache */
+       /* high 32 bits of the rb pipes mask */
+       __u32 enabled_rb_pipes_mask_hi;
 };
 
 struct drm_amdgpu_info_hw_ip {
index 68de6f4c4eee35661ae45775238cfcdc1232c322..ac3da855fb197ef32e41b6f2611d5908939d0171 100644 (file)
@@ -445,6 +445,8 @@ typedef struct elf64_shdr {
 #define NT_LOONGARCH_LSX       0xa02   /* LoongArch Loongson SIMD Extension registers */
 #define NT_LOONGARCH_LASX      0xa03   /* LoongArch Loongson Advanced SIMD Extension registers */
 #define NT_LOONGARCH_LBT       0xa04   /* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK  0xa05   /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH  0xa06   /* LoongArch hardware watchpoint registers */
 
 /* Note types with note name "GNU" */
 #define NT_GNU_PROPERTY_TYPE_0 5
index 8c4e3e536c04285e155617cde126f2c4fce10e08..ed134fbdfd32d3351226300f0a725414f04ade2a 100644 (file)
@@ -33,6 +33,8 @@ enum netdev_xdp_act {
        NETDEV_XDP_ACT_HW_OFFLOAD = 16,
        NETDEV_XDP_ACT_RX_SG = 32,
        NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+       NETDEV_XDP_ACT_MASK = 127,
 };
 
 enum {
index 1fed3c9294fc5ada17c4cd641ae62bf38845ab15..d7a1524023dba6d9a3b516c6d351018e7f3e4e20 100644 (file)
@@ -144,6 +144,7 @@ struct opal_read_write_table {
 #define OPAL_FL_LOCKED                 0x00000008
 #define OPAL_FL_MBR_ENABLED            0x00000010
 #define OPAL_FL_MBR_DONE               0x00000020
+#define OPAL_FL_SUM_SUPPORTED          0x00000040
 
 struct opal_status {
        __u32 flags;
index fdc3517f9e1993ad891350e60662b939a6853d00..907d345f04f93b559624f2f280dd48ad6364c0e5 100644 (file)
@@ -455,12 +455,6 @@ struct mpi3mr_bsg_packet {
        } cmd;
 };
 
-
-/* MPI3: NVMe Encasulation related definitions */
-#ifndef MPI3_NVME_ENCAP_CMD_MAX
-#define MPI3_NVME_ENCAP_CMD_MAX               (1)
-#endif
-
 struct mpi3_nvme_encapsulated_request {
        __le16  host_tag;
        __u8    ioc_use_only02;
@@ -474,7 +468,7 @@ struct mpi3_nvme_encapsulated_request {
        __le16  flags;
        __le32  data_length;
        __le32  reserved14[3];
-       __le32  command[MPI3_NVME_ENCAP_CMD_MAX];
+       __le32  command[];
 };
 
 struct mpi3_nvme_encapsulated_error_reply {
index 431c3afb2ce0f2048d6a50e77d90121ad8557239..25aab8ec4f86be11780176b250c915ca73865b98 100644 (file)
@@ -979,6 +979,7 @@ struct ufs_hba {
        struct completion *uic_async_done;
 
        enum ufshcd_state ufshcd_state;
+       bool logical_unit_scan_finished;
        u32 eh_flags;
        u32 intr_mask;
        u16 ee_ctrl_mask;
index 882bd56b01ed0f5bd6e61668b1b33b711687a0c5..76c279b13aee4754cb458dac0ac3351988dd1207 100644 (file)
@@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
        struct user_namespace *uns = seq_user_ns(m);
        struct group_info *gi;
        kernel_cap_t cap;
-       unsigned __capi;
        int g;
 
        seq_printf(m, "%5d\n", id);
@@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
        }
        seq_puts(m, "\n\tCapEff:\t");
        cap = cred->cap_effective;
-       CAP_FOR_EACH_U32(__capi)
-               seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
+       seq_put_hex_ll(m, NULL, cap.val, 16);
        seq_putc(m, '\n');
        return 0;
 }
index 1df68da89f998660d54bc4999430f6219000b8a2..fd1cc35a1c00b67cc02fb8cf1f4421f933c43419 100644 (file)
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
        struct io_uring_task *tctx = task->io_uring;
 
        percpu_counter_sub(&tctx->inflight, nr);
-       if (unlikely(atomic_read(&tctx->in_idle)))
+       if (unlikely(atomic_read(&tctx->in_cancel)))
                wake_up(&tctx->wait);
        put_task_struct_many(task, nr);
 }
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)
 
        ctx_flush_and_put(ctx, &uring_locked);
 
-       /* relaxed read is enough as only the task itself sets ->in_idle */
-       if (unlikely(atomic_read(&tctx->in_idle)))
+       /* relaxed read is enough as only the task itself sets ->in_cancel */
+       if (unlikely(atomic_read(&tctx->in_cancel)))
                io_uring_drop_tctx_refs(current);
 
        trace_io_uring_task_work_run(tctx, count, loops);
@@ -1285,17 +1285,15 @@ static void io_req_local_work_add(struct io_kiocb *req)
 
        percpu_ref_get(&ctx->refs);
 
-       if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
-               percpu_ref_put(&ctx->refs);
-               return;
-       }
+       if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
+               goto put_ref;
+
        /* needed for the following wake up */
        smp_mb__after_atomic();
 
-       if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
+       if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
                io_move_task_work_from_local(ctx);
-               percpu_ref_put(&ctx->refs);
-               return;
+               goto put_ref;
        }
 
        if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
@@ -1305,6 +1303,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
 
        if (READ_ONCE(ctx->cq_waiting))
                wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
+
+put_ref:
        percpu_ref_put(&ctx->refs);
 }
 
@@ -1777,7 +1777,7 @@ int io_req_prep_async(struct io_kiocb *req)
        const struct io_issue_def *def = &io_issue_defs[req->opcode];
 
        /* assign early for deferred execution for non-fixed file */
-       if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
+       if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE) && !req->file)
                req->file = io_file_get_normal(req, req->cqe.fd);
        if (!cdef->prep_async)
                return 0;
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
 
        work = container_of(cb, struct io_tctx_exit, task_work);
        /*
-        * When @in_idle, we're in cancellation and it's racy to remove the
+        * When @in_cancel, we're in cancellation and it's racy to remove the
         * node. It'll be removed by the end of cancellation, just ignore it.
         * tctx can be NULL if the queueing of this task_work raced with
         * work cancelation off the exec path.
         */
-       if (tctx && !atomic_read(&tctx->in_idle))
+       if (tctx && !atomic_read(&tctx->in_cancel))
                io_uring_del_tctx_node((unsigned long)work->ctx);
        complete(&work->completion);
 }
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
        if (tctx->io_wq)
                io_wq_exit_start(tctx->io_wq);
 
-       atomic_inc(&tctx->in_idle);
+       atomic_inc(&tctx->in_cancel);
        do {
                bool loop = false;
 
@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
        if (cancel_all) {
                /*
                 * We shouldn't run task_works after cancel, so just leave
-                * ->in_idle set for normal exit.
+                * ->in_cancel set for normal exit.
                 */
-               atomic_dec(&tctx->in_idle);
+               atomic_dec(&tctx->in_cancel);
                /* for exec all current's requests should be gone, kill tctx */
                __io_uring_free(current);
        }
index 4a6401080c1f88a74d76c4ce88bd15fee40f2528..3002dc827195916b160fce8376de009feb86f3c5 100644 (file)
@@ -505,7 +505,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
        }
 
        pages = io_pin_pages(reg.ring_addr,
-                            struct_size(br, bufs, reg.ring_entries),
+                            flex_array_size(br, bufs, reg.ring_entries),
                             &nr_pages);
        if (IS_ERR(pages)) {
                kfree(free_bl);
index cbd4b725f58c98e5bc5bf88d5707db5c8302e071..b7f190ca528e6e259eb2b072d7a16aaba98848cb 100644 (file)
@@ -567,7 +567,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        sr->flags = READ_ONCE(sqe->ioprio);
        if (sr->flags & ~(RECVMSG_FLAGS))
                return -EINVAL;
-       sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+       sr->msg_flags = READ_ONCE(sqe->msg_flags);
        if (sr->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
        if (sr->msg_flags & MSG_ERRQUEUE)
index 8339a92b451079b993359fee37aef449fba52d21..795facbd0e9f174ab4af7bbd1444b4eaedeb29da 100644 (file)
@@ -51,6 +51,9 @@ struct io_poll_table {
 
 #define IO_WQE_F_DOUBLE                1
 
+static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+                       void *key);
+
 static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe)
 {
        unsigned long priv = (unsigned long)wqe->private;
@@ -164,15 +167,14 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
        }
 }
 
-static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
-                             wait_queue_func_t wake_func)
+static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
 {
        poll->head = NULL;
 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
        /* mask in events that we always want/need */
        poll->events = events | IO_POLL_UNMASK;
        INIT_LIST_HEAD(&poll->wait.entry);
-       init_waitqueue_func_entry(&poll->wait, wake_func);
+       init_waitqueue_func_entry(&poll->wait, io_poll_wake);
 }
 
 static inline void io_poll_remove_entry(struct io_poll *poll)
@@ -508,7 +510,7 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
 
                /* mark as double wq entry */
                wqe_private |= IO_WQE_F_DOUBLE;
-               io_init_poll_iocb(poll, first->events, first->wait.func);
+               io_init_poll_iocb(poll, first->events);
                if (!io_poll_double_prepare(req)) {
                        /* the request is completing, just back off */
                        kfree(poll);
@@ -569,7 +571,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 
        INIT_HLIST_NODE(&req->hash_node);
        req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
-       io_init_poll_iocb(poll, mask, io_poll_wake);
+       io_init_poll_iocb(poll, mask);
        poll->file = req->file;
        req->apoll_events = poll->events;
 
@@ -650,6 +652,14 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
        __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
 }
 
+/*
+ * We can't reliably detect loops in repeated poll triggers and issue
+ * subsequently failing. But rather than fail these immediately, allow a
+ * certain amount of retries before we give up. Given that this condition
+ * should _rarely_ trigger even once, we should be fine with a larger value.
+ */
+#define APOLL_MAX_RETRY                128
+
 static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
                                             unsigned issue_flags)
 {
@@ -665,14 +675,18 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
                if (entry == NULL)
                        goto alloc_apoll;
                apoll = container_of(entry, struct async_poll, cache);
+               apoll->poll.retries = APOLL_MAX_RETRY;
        } else {
 alloc_apoll:
                apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
                if (unlikely(!apoll))
                        return NULL;
+               apoll->poll.retries = APOLL_MAX_RETRY;
        }
        apoll->double_poll = NULL;
        req->apoll = apoll;
+       if (unlikely(!--apoll->poll.retries))
+               return NULL;
        return apoll;
 }
 
@@ -694,8 +708,6 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
                return IO_APOLL_ABORTED;
        if (!file_can_poll(req->file))
                return IO_APOLL_ABORTED;
-       if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
-               return IO_APOLL_ABORTED;
        if (!(req->flags & REQ_F_APOLL_MULTISHOT))
                mask |= EPOLLONESHOT;
 
index 5f3bae50fc81a03e2c79c3d2a15652a67c39a98f..b2393b403a2c21014d5c648cc244a707748cb02a 100644 (file)
@@ -12,6 +12,7 @@ struct io_poll {
        struct file                     *file;
        struct wait_queue_head          *head;
        __poll_t                        events;
+       int                             retries;
        struct wait_queue_entry         wait;
 };
 
index a59fc02de5983c4f789e9cdfea3a1376f578ebe6..056f40946ff68ff8d2d72781c5bdc9e3e303cea4 100644 (file)
@@ -1162,14 +1162,17 @@ struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages)
        pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
                              pages, vmas);
        if (pret == nr_pages) {
+               struct file *file = vmas[0]->vm_file;
+
                /* don't support file backed memory */
                for (i = 0; i < nr_pages; i++) {
-                       struct vm_area_struct *vma = vmas[i];
-
-                       if (vma_is_shmem(vma))
+                       if (vmas[i]->vm_file != file) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       if (!file)
                                continue;
-                       if (vma->vm_file &&
-                           !is_file_hugepages(vma->vm_file)) {
+                       if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) {
                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -1207,6 +1210,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
        unsigned long off;
        size_t size;
        int ret, nr_pages, i;
+       struct folio *folio = NULL;
 
        *pimu = ctx->dummy_ubuf;
        if (!iov->iov_base)
@@ -1221,6 +1225,21 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                goto done;
        }
 
+       /* If it's a huge page, try to coalesce them into a single bvec entry */
+       if (nr_pages > 1) {
+               folio = page_folio(pages[0]);
+               for (i = 1; i < nr_pages; i++) {
+                       if (page_folio(pages[i]) != folio) {
+                               folio = NULL;
+                               break;
+                       }
+               }
+               if (folio) {
+                       folio_put_refs(folio, nr_pages - 1);
+                       nr_pages = 1;
+               }
+       }
+
        imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
        if (!imu)
                goto done;
@@ -1233,6 +1252,17 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 
        off = (unsigned long) iov->iov_base & ~PAGE_MASK;
        size = iov->iov_len;
+       /* store original address for later verification */
+       imu->ubuf = (unsigned long) iov->iov_base;
+       imu->ubuf_end = imu->ubuf + iov->iov_len;
+       imu->nr_bvecs = nr_pages;
+       *pimu = imu;
+       ret = 0;
+
+       if (folio) {
+               bvec_set_page(&imu->bvec[0], pages[0], size, off);
+               goto done;
+       }
        for (i = 0; i < nr_pages; i++) {
                size_t vec_len;
 
@@ -1241,12 +1271,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                off = 0;
                size -= vec_len;
        }
-       /* store original address for later verification */
-       imu->ubuf = (unsigned long) iov->iov_base;
-       imu->ubuf_end = imu->ubuf + iov->iov_len;
-       imu->nr_bvecs = nr_pages;
-       *pimu = imu;
-       ret = 0;
 done:
        if (ret)
                kvfree(imu);
@@ -1335,7 +1359,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                return -EFAULT;
 
        /*
-        * May not be a start of buffer, set size appropriately
+        * Might not be a start of buffer, set size appropriately
         * and advance us to the beginning.
         */
        offset = buf_addr - imu->ubuf;
@@ -1361,7 +1385,15 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                const struct bio_vec *bvec = imu->bvec;
 
                if (offset <= bvec->bv_len) {
-                       iov_iter_advance(iter, offset);
+                       /*
+                        * Note, huge pages buffers consists of one large
+                        * bvec entry and should always go this way. The other
+                        * branch doesn't expect non PAGE_SIZE'd chunks.
+                        */
+                       iter->bvec = bvec;
+                       iter->nr_segs = bvec->bv_len;
+                       iter->count -= offset;
+                       iter->iov_offset = offset;
                } else {
                        unsigned long seg_skip;
 
index f27601fa46607ba9ad431f6bc5ca57df24342e18..7c198a40d5f11d799e49441aefc9034cc71169f5 100644 (file)
@@ -27,28 +27,6 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
                list->last = node;
 }
 
-/**
- * wq_list_merge - merge the second list to the first one.
- * @list0: the first list
- * @list1: the second list
- * Return the first node after mergence.
- */
-static inline struct io_wq_work_node *wq_list_merge(struct io_wq_work_list *list0,
-                                                   struct io_wq_work_list *list1)
-{
-       struct io_wq_work_node *ret;
-
-       if (!list0->first) {
-               ret = list1->first;
-       } else {
-               ret = list0->first;
-               list0->last->next = list1->first;
-       }
-       INIT_WQ_LIST(list0);
-       INIT_WQ_LIST(list1);
-       return ret;
-}
-
 static inline void wq_list_add_tail(struct io_wq_work_node *node,
                                    struct io_wq_work_list *list)
 {
index 4324b1cf1f6afaf5e6afc740f8e1ee03e2afa719..3a8d1dd97e1b4bfd4f5476d809756b64ae1d5518 100644 (file)
@@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
 
        xa_init(&tctx->xa);
        init_waitqueue_head(&tctx->wait);
-       atomic_set(&tctx->in_idle, 0);
+       atomic_set(&tctx->in_cancel, 0);
        atomic_set(&tctx->inflight_tracked, 0);
        task->io_uring = tctx;
        init_llist_head(&tctx->task_list);
index 93d0b87f32838ea13330e27ea55ad71f2ce218c6..addeed3df15d3dfa9e090109bfb074a0bbb26153 100644 (file)
@@ -1295,15 +1295,11 @@ out:
 static void audit_log_cap(struct audit_buffer *ab, char *prefix,
                          kernel_cap_t *cap)
 {
-       int i;
-
        if (cap_isclear(*cap)) {
                audit_log_format(ab, " %s=0", prefix);
                return;
        }
-       audit_log_format(ab, " %s=", prefix);
-       CAP_FOR_EACH_U32(i)
-               audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+       audit_log_format(ab, " %s=%016llx", prefix, cap->val);
 }
 
 static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
index 339a44dfe2f464d499a88f05485b4865ed5fa694..3e058f41df32d76507d04ac229c53d4e198687c7 100644 (file)
 #include <linux/user_namespace.h>
 #include <linux/uaccess.h>
 
-/*
- * Leveraged for setting/resetting capabilities
- */
-
-const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
-EXPORT_SYMBOL(__cap_empty_set);
-
 int file_caps_enabled = 1;
 
 static int __init file_caps_disable(char *str)
@@ -151,6 +144,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
        pid_t pid;
        unsigned tocopy;
        kernel_cap_t pE, pI, pP;
+       struct __user_cap_data_struct kdata[2];
 
        ret = cap_validate_magic(header, &tocopy);
        if ((dataptr == NULL) || (ret != 0))
@@ -163,42 +157,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
                return -EINVAL;
 
        ret = cap_get_target_pid(pid, &pE, &pI, &pP);
-       if (!ret) {
-               struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
-               unsigned i;
-
-               for (i = 0; i < tocopy; i++) {
-                       kdata[i].effective = pE.cap[i];
-                       kdata[i].permitted = pP.cap[i];
-                       kdata[i].inheritable = pI.cap[i];
-               }
-
-               /*
-                * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
-                * we silently drop the upper capabilities here. This
-                * has the effect of making older libcap
-                * implementations implicitly drop upper capability
-                * bits when they perform a: capget/modify/capset
-                * sequence.
-                *
-                * This behavior is considered fail-safe
-                * behavior. Upgrading the application to a newer
-                * version of libcap will enable access to the newer
-                * capabilities.
-                *
-                * An alternative would be to return an error here
-                * (-ERANGE), but that causes legacy applications to
-                * unexpectedly fail; the capget/modify/capset aborts
-                * before modification is attempted and the application
-                * fails.
-                */
-               if (copy_to_user(dataptr, kdata, tocopy
-                                * sizeof(struct __user_cap_data_struct))) {
-                       return -EFAULT;
-               }
-       }
+       if (ret)
+               return ret;
 
-       return ret;
+       /*
+        * Annoying legacy format with 64-bit capabilities exposed
+        * as two sets of 32-bit fields, so we need to split the
+        * capability values up.
+        */
+       kdata[0].effective   = pE.val; kdata[1].effective   = pE.val >> 32;
+       kdata[0].permitted   = pP.val; kdata[1].permitted   = pP.val >> 32;
+       kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32;
+
+       /*
+        * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
+        * we silently drop the upper capabilities here. This
+        * has the effect of making older libcap
+        * implementations implicitly drop upper capability
+        * bits when they perform a: capget/modify/capset
+        * sequence.
+        *
+        * This behavior is considered fail-safe
+        * behavior. Upgrading the application to a newer
+        * version of libcap will enable access to the newer
+        * capabilities.
+        *
+        * An alternative would be to return an error here
+        * (-ERANGE), but that causes legacy applications to
+        * unexpectedly fail; the capget/modify/capset aborts
+        * before modification is attempted and the application
+        * fails.
+        */
+       if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
+               return -EFAULT;
+
+       return 0;
+}
+
+static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
+{
+       return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
 }
 
 /**
@@ -221,8 +219,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
  */
 SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 {
-       struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
-       unsigned i, tocopy, copybytes;
+       struct __user_cap_data_struct kdata[2] = { { 0, }, };
+       unsigned tocopy, copybytes;
        kernel_cap_t inheritable, permitted, effective;
        struct cred *new;
        int ret;
@@ -246,21 +244,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
        if (copy_from_user(&kdata, data, copybytes))
                return -EFAULT;
 
-       for (i = 0; i < tocopy; i++) {
-               effective.cap[i] = kdata[i].effective;
-               permitted.cap[i] = kdata[i].permitted;
-               inheritable.cap[i] = kdata[i].inheritable;
-       }
-       while (i < _KERNEL_CAPABILITY_U32S) {
-               effective.cap[i] = 0;
-               permitted.cap[i] = 0;
-               inheritable.cap[i] = 0;
-               i++;
-       }
-
-       effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-       permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-       inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+       effective   = mk_kernel_cap(kdata[0].effective,   kdata[1].effective);
+       permitted   = mk_kernel_cap(kdata[0].permitted,   kdata[1].permitted);
+       inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);
 
        new = prepare_creds();
        if (!new)
index bbd945bacef08dc8536781696542eee0be3c9b02..961d4af76af37708ee842bfb7a040c9bd31f504b 100644 (file)
@@ -188,9 +188,9 @@ EXPORT_SYMBOL_GPL(ipi_get_hwirq);
 static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
                           const struct cpumask *dest, unsigned int cpu)
 {
-       const struct cpumask *ipimask = irq_data_get_affinity_mask(data);
+       const struct cpumask *ipimask;
 
-       if (!chip || !ipimask)
+       if (!chip || !data)
                return -EINVAL;
 
        if (!chip->ipi_send_single && !chip->ipi_send_mask)
@@ -199,6 +199,10 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
        if (cpu >= nr_cpu_ids)
                return -EINVAL;
 
+       ipimask = irq_data_get_affinity_mask(data);
+       if (!ipimask)
+               return -EINVAL;
+
        if (dest) {
                if (!cpumask_subset(dest, ipimask))
                        return -EINVAL;
index fd099627440145789e4fb2c66e7a566e03bf9b5d..240e145e969fccd5185a89f33bd34a81a4c24ce0 100644 (file)
@@ -277,7 +277,7 @@ static struct attribute *irq_attrs[] = {
 };
 ATTRIBUTE_GROUPS(irq);
 
-static struct kobj_type irq_kobj_type = {
+static const struct kobj_type irq_kobj_type = {
        .release        = irq_kobj_release,
        .sysfs_ops      = &kobj_sysfs_ops,
        .default_groups = irq_groups,
@@ -335,7 +335,7 @@ postcore_initcall(irq_sysfs_init);
 
 #else /* !CONFIG_SYSFS */
 
-static struct kobj_type irq_kobj_type = {
+static const struct kobj_type irq_kobj_type = {
        .release        = irq_kobj_release,
 };
 
index 7d4fc6479062452e212736f122869841611d5f97..f34760a1e222623a95a613e7623c81f59b37f4a3 100644 (file)
@@ -1147,7 +1147,8 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
                domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
 
        if (domain) {
-               domain->root = parent->root;
+               if (parent)
+                       domain->root = parent->root;
                domain->parent = parent;
                domain->flags |= flags;
 
index d0f0389920c110c4aa990feb6d33f86fe5e52860..7a97bcb086bf39e92f9ac0cf6fb29c946631d2aa 100644 (file)
@@ -830,11 +830,8 @@ static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
        domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
                                             fwnode, &msi_domain_ops, info);
 
-       if (domain) {
-               if (!domain->name && info->chip)
-                       domain->name = info->chip->name;
+       if (domain)
                irq_domain_update_bus_token(domain, info->bus_token);
-       }
 
        return domain;
 }
@@ -1084,10 +1081,13 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
        struct xarray *xa;
        int ret, virq;
 
-       if (!msi_ctrl_valid(dev, &ctrl))
-               return -EINVAL;
-
        msi_lock_descs(dev);
+
+       if (!msi_ctrl_valid(dev, &ctrl)) {
+               ret = -EINVAL;
+               goto unlock;
+       }
+
        ret = msi_domain_add_simple_msi_descs(dev, &ctrl);
        if (ret)
                goto unlock;
@@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
        return 0;
 
 fail:
-       for (--virq; virq >= virq_base; virq--)
+       for (--virq; virq >= virq_base; virq--) {
+               msi_domain_depopulate_descs(dev, virq, 1);
                irq_domain_free_irqs_common(domain, virq, 1);
+       }
        msi_domain_free_descs(dev, &ctrl);
 unlock:
        msi_unlock_descs(dev);
        return ret;
 }
 
+void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
+{
+       struct msi_ctrl ctrl = {
+               .domid  = MSI_DEFAULT_DOMAIN,
+               .first  = virq_base,
+               .last   = virq_base + nvec - 1,
+       };
+       struct msi_desc *desc;
+       struct xarray *xa;
+       unsigned long idx;
+
+       if (!msi_ctrl_valid(dev, &ctrl))
+               return;
+
+       xa = &dev->msi.data->__domains[ctrl.domid].store;
+       xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
+               desc->irq = 0;
+}
+
 /*
  * Carefully check whether the device can use reservation mode. If
  * reservation mode is enabled then the early activation will assign a
index 487f5b03bf8352ff192629a2f1b1bdde1b67c335..5cfea8302d23a622673a606aab08e49228b37528 100644 (file)
@@ -212,9 +212,6 @@ static void panic_print_sys_info(bool console_flush)
                return;
        }
 
-       if (panic_print & PANIC_PRINT_ALL_CPU_BT)
-               trigger_all_cpu_backtrace();
-
        if (panic_print & PANIC_PRINT_TASK_INFO)
                show_state();
 
@@ -244,6 +241,30 @@ void check_panic_on_warn(const char *origin)
                      origin, limit);
 }
 
+/*
+ * Helper that triggers the NMI backtrace (if set in panic_print)
+ * and then performs the secondary CPUs shutdown - we cannot have
+ * the NMI backtrace after the CPUs are off!
+ */
+static void panic_other_cpus_shutdown(bool crash_kexec)
+{
+       if (panic_print & PANIC_PRINT_ALL_CPU_BT)
+               trigger_all_cpu_backtrace();
+
+       /*
+        * Note that smp_send_stop() is the usual SMP shutdown function,
+        * which unfortunately may not be hardened to work in a panic
+        * situation. If we want to do crash dump after notifier calls
+        * and kmsg_dump, we will need architecture dependent extra
+        * bits in addition to stopping other CPUs, hence we rely on
+        * crash_smp_send_stop() for that.
+        */
+       if (!crash_kexec)
+               smp_send_stop();
+       else
+               crash_smp_send_stop();
+}
+
 /**
  *     panic - halt the system
  *     @fmt: The text string to print
@@ -334,23 +355,10 @@ void panic(const char *fmt, ...)
         *
         * Bypass the panic_cpu check and call __crash_kexec directly.
         */
-       if (!_crash_kexec_post_notifiers) {
+       if (!_crash_kexec_post_notifiers)
                __crash_kexec(NULL);
 
-               /*
-                * Note smp_send_stop is the usual smp shutdown function, which
-                * unfortunately means it may not be hardened to work in a
-                * panic situation.
-                */
-               smp_send_stop();
-       } else {
-               /*
-                * If we want to do crash dump after notifier calls and
-                * kmsg_dump, we will need architecture dependent extra
-                * works in addition to stopping other CPUs.
-                */
-               crash_smp_send_stop();
-       }
+       panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
 
        /*
         * Run any panic handlers, including those that might need to
index 5c840151f3bb2f879c530cbef6b918c1312f549d..e3211455b2032b165af1cdcf4e03f620e3cadcaa 100644 (file)
@@ -546,7 +546,7 @@ static void sugov_tunables_free(struct kobject *kobj)
        kfree(to_sugov_tunables(attr_set));
 }
 
-static struct kobj_type sugov_tunables_ktype = {
+static const struct kobj_type sugov_tunables_ktype = {
        .default_groups = sugov_groups,
        .sysfs_ops = &governor_sysfs_ops,
        .release = &sugov_tunables_free,
index 5743be55941532ad2d46b55d87a09a419c260b03..d5d94510afd3f81327c8cb37cfb55310cc5658a6 100644 (file)
@@ -729,14 +729,10 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop);
  **/
 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 {
-       struct request_queue *q;
+       struct request_queue *q = bdev_get_queue(bdev);
        int ret, start = 0;
        char b[BDEVNAME_SIZE];
 
-       q = bdev_get_queue(bdev);
-       if (!q)
-               return -ENXIO;
-
        mutex_lock(&q->debugfs_mutex);
 
        switch (cmd) {
index fbf872c624cbc44e2ab0981fc85fc2ba13d35ab9..60aa9e764a38f154b0adfeca24d6569f31a4ffa3 100644 (file)
@@ -32,9 +32,6 @@
 
 #include <trace/events/module.h>
 
-#define CAP_BSET       (void *)1
-#define CAP_PI         (void *)2
-
 static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
 static DEFINE_SPINLOCK(umh_sysctl_lock);
@@ -501,9 +498,9 @@ static int proc_cap_handler(struct ctl_table *table, int write,
                         void *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table t;
-       unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
-       kernel_cap_t new_cap;
-       int err, i;
+       unsigned long cap_array[2];
+       kernel_cap_t new_cap, *cap;
+       int err;
 
        if (write && (!capable(CAP_SETPCAP) ||
                      !capable(CAP_SYS_MODULE)))
@@ -512,16 +509,13 @@ static int proc_cap_handler(struct ctl_table *table, int write,
        /*
         * convert from the global kernel_cap_t to the ulong array to print to
         * userspace if this is a read.
+        *
+        * Legacy format: capabilities are exposed as two 32-bit values
         */
+       cap = table->data;
        spin_lock(&umh_sysctl_lock);
-       for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)  {
-               if (table->data == CAP_BSET)
-                       cap_array[i] = usermodehelper_bset.cap[i];
-               else if (table->data == CAP_PI)
-                       cap_array[i] = usermodehelper_inheritable.cap[i];
-               else
-                       BUG();
-       }
+       cap_array[0] = (u32) cap->val;
+       cap_array[1] = cap->val >> 32;
        spin_unlock(&umh_sysctl_lock);
 
        t = *table;
@@ -535,22 +529,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
        if (err < 0)
                return err;
 
-       /*
-        * convert from the sysctl array of ulongs to the kernel_cap_t
-        * internal representation
-        */
-       for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
-               new_cap.cap[i] = cap_array[i];
+       new_cap.val = (u32)cap_array[0];
+       new_cap.val += (u64)cap_array[1] << 32;
 
        /*
         * Drop everything not in the new_cap (but don't add things)
         */
        if (write) {
                spin_lock(&umh_sysctl_lock);
-               if (table->data == CAP_BSET)
-                       usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
-               if (table->data == CAP_PI)
-                       usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
+               *cap = cap_intersect(*cap, new_cap);
                spin_unlock(&umh_sysctl_lock);
        }
 
@@ -560,15 +547,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 struct ctl_table usermodehelper_table[] = {
        {
                .procname       = "bset",
-               .data           = CAP_BSET,
-               .maxlen         = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+               .data           = &usermodehelper_bset,
+               .maxlen         = 2 * sizeof(unsigned long),
                .mode           = 0600,
                .proc_handler   = proc_cap_handler,
        },
        {
                .procname       = "inheritable",
-               .data           = CAP_PI,
-               .maxlen         = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+               .data           = &usermodehelper_inheritable,
+               .maxlen         = 2 * sizeof(unsigned long),
                .mode           = 0600,
                .proc_handler   = proc_cap_handler,
        },
index be6ee60202908394540f68bf7ff4b25f296cf78d..fdca89c057452e7715a21316a1d3b763a525d785 100644 (file)
@@ -49,6 +49,15 @@ menuconfig KASAN
 
 if KASAN
 
+config CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+       def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=kernel-address -mllvm -asan-kernel-mem-intrinsic-prefix=1)) || \
+                (CC_IS_GCC && $(cc-option,-fsanitize=kernel-address --param asan-kernel-mem-intrinsic-prefix=1))
+       # Don't define it if we don't need it: compilation of the test uses
+       # this variable to decide how the compiler should treat builtins.
+       depends on !KASAN_HW_TAGS
+       help
+         The compiler is able to prefix memintrinsics with __asan or __hwasan.
+
 choice
        prompt "KASAN mode"
        default KASAN_GENERIC
index 469be6240523f6478e195d85eaf46767b3919c6a..baf2821f7a00fdbfcc75b96b0ab83a1986cda66b 100644 (file)
@@ -127,14 +127,10 @@ CFLAGS_test_fpu.o += $(FPU_CFLAGS)
 
 obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
 
-obj-$(CONFIG_KUNIT) += kunit/
-# Include the KUnit hooks unconditionally. They'll compile to nothing if
-# CONFIG_KUNIT=n, otherwise will be a small table of static data (static key,
-# function pointers) which need to be built-in even when KUnit is a module.
-ifeq ($(CONFIG_KUNIT), m)
-obj-y += kunit/hooks.o
-else
-obj-$(CONFIG_KUNIT) += kunit/hooks.o
+# Some KUnit files (hooks.o) need to be built-in even when KUnit is a module,
+# so we can't just use obj-$(CONFIG_KUNIT).
+ifdef CONFIG_KUNIT
+obj-y += kunit/
 endif
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
index d1fc6ece21f3704566f9296b47bf47d3f04c8296..a105e6369efc912cc910aeb932ae0335688ac485 100644 (file)
                KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \
        } while (0)
 
-#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask)                                 \
-       do {                                                                    \
-               const cpumask_t *m = (mask);                                    \
-               int mask_weight = cpumask_weight(m);                            \
-               int cpu, iter = 0;                                              \
-               for_each_cpu_not(cpu, m)                                        \
-                       iter++;                                                 \
-               KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask));    \
-       } while (0)
-
 #define EXPECT_FOR_EACH_CPU_OP_EQ(test, op, mask1, mask2)                      \
        do {                                                                    \
                const cpumask_t *m1 = (mask1);                                  \
@@ -77,7 +67,7 @@ static void test_cpumask_weight(struct kunit *test)
        KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty));
        KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask),
                            MASK_MSG(cpu_possible_mask));
-       KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all));
+       KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(&mask_all), MASK_MSG(&mask_all));
 }
 
 static void test_cpumask_first(struct kunit *test)
@@ -113,14 +103,12 @@ static void test_cpumask_next(struct kunit *test)
 static void test_cpumask_iterators(struct kunit *test)
 {
        EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
-       EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
        EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
        EXPECT_FOR_EACH_CPU_OP_EQ(test, and, &mask_empty, &mask_empty);
        EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, &mask_empty);
        EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, &mask_empty, &mask_empty);
 
        EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask);
-       EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask);
        EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask);
        EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, cpu_possible_mask);
        EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, cpu_possible_mask, &mask_empty);
index da665cd4ea12f897898023a15df83658fae30bcf..cb417f504996254f79bc020e66688fba37d6a990 100644 (file)
@@ -13,7 +13,7 @@ kunit-objs +=                         debugfs.o
 endif
 
 # KUnit 'hooks' are built-in even when KUnit is built as a module.
-lib-y +=                               hooks.o
+obj-y +=                               hooks.o
 
 obj-$(CONFIG_KUNIT_TEST) +=            kunit-test.o
 
index 2b5e2b480253f18021964956b216bf463394844d..f4eafb9d74e6f79ff25ed66b4fd47455d7b586f1 100644 (file)
@@ -133,7 +133,7 @@ EXPORT_SYMBOL(match_token);
  * as a number in that base.
  *
  * Return: On success, sets @result to the integer represented by the
- * string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * string and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 static int match_number(substring_t *s, int *result, int base)
 {
@@ -165,7 +165,7 @@ static int match_number(substring_t *s, int *result, int base)
  * as a number in that base.
  *
  * Return: On success, sets @result to the integer represented by the
- * string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * string and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 static int match_u64int(substring_t *s, u64 *result, int base)
 {
@@ -189,7 +189,7 @@ static int match_u64int(substring_t *s, u64 *result, int base)
  * Description: Attempts to parse the &substring_t @s as a decimal integer.
  *
  * Return: On success, sets @result to the integer represented by the string
- * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 int match_int(substring_t *s, int *result)
 {
@@ -205,7 +205,7 @@ EXPORT_SYMBOL(match_int);
  * Description: Attempts to parse the &substring_t @s as a decimal integer.
  *
  * Return: On success, sets @result to the integer represented by the string
- * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 int match_uint(substring_t *s, unsigned int *result)
 {
@@ -228,7 +228,7 @@ EXPORT_SYMBOL(match_uint);
  * integer.
  *
  * Return: On success, sets @result to the integer represented by the string
- * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 int match_u64(substring_t *s, u64 *result)
 {
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(match_u64);
  * Description: Attempts to parse the &substring_t @s as an octal integer.
  *
  * Return: On success, sets @result to the integer represented by the string
- * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 int match_octal(substring_t *s, int *result)
 {
@@ -260,7 +260,7 @@ EXPORT_SYMBOL(match_octal);
  * Description: Attempts to parse the &substring_t @s as a hexadecimal integer.
  *
  * Return: On success, sets @result to the integer represented by the string
- * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * and returns 0. Returns -EINVAL or -ERANGE on failure.
  */
 int match_hex(substring_t *s, int *result)
 {
index 385333b22ec68e354ff9f8302b34ca2ae99bada3..4ea40f5a279fa6ae08fe28a85d50813bc4825348 100644 (file)
@@ -420,9 +420,11 @@ static inline void flush_pending(
        z_streamp strm
 )
 {
+    unsigned len;
     deflate_state *s = (deflate_state *) strm->state;
-    unsigned len = s->pending;
 
+    bi_flush(s);
+    len = s->pending;
     if (len > strm->avail_out) len = strm->avail_out;
     if (len == 0) return;
 
index 607bb69e526cf9d659274b333c921ad0f602348b..6c655d9b5639106722baed45b3f27fae1ca20664 100644 (file)
@@ -250,12 +250,11 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
                        folio_put(folio);
                        continue;
                }
-               if (folio_test_unevictable(folio)) {
+               if (folio_test_unevictable(folio))
                        folio_putback_lru(folio);
-               } else {
+               else
                        list_add(&folio->lru, &folio_list);
-                       folio_put(folio);
-               }
+               folio_put(folio);
        }
        applied = reclaim_pages(&folio_list);
        cond_resched();
index d4837bff3b60f2b254a0a2b67e904628df1524ba..7634dd2a61285646373aa55b6751c8d535f5b2d4 100644 (file)
@@ -35,7 +35,14 @@ CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
 
-CFLAGS_KASAN_TEST := $(CFLAGS_KASAN) -fno-builtin $(call cc-disable-warning, vla)
+CFLAGS_KASAN_TEST := $(CFLAGS_KASAN) $(call cc-disable-warning, vla)
+ifndef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+# If compiler instruments memintrinsics by prefixing them with __asan/__hwasan,
+# we need to treat them normally (as builtins), otherwise the compiler won't
+# recognize them as instrumentable. If it doesn't instrument them, we need to
+# pass -fno-builtin, so the compiler doesn't inline them.
+CFLAGS_KASAN_TEST += -fno-builtin
+endif
 
 CFLAGS_kasan_test.o := $(CFLAGS_KASAN_TEST)
 CFLAGS_kasan_test_module.o := $(CFLAGS_KASAN_TEST)
index 9377b0789edc2c9390872f7eaf0c83502ab80cf5..a61eeee3095a972948ec9b98ad9cf6f696c70b18 100644 (file)
@@ -666,4 +666,8 @@ void __hwasan_storeN_noabort(unsigned long addr, size_t size);
 
 void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size);
 
+void *__hwasan_memset(void *addr, int c, size_t len);
+void *__hwasan_memmove(void *dest, const void *src, size_t len);
+void *__hwasan_memcpy(void *dest, const void *src, size_t len);
+
 #endif /* __MM_KASAN_KASAN_H */
index 74cd80c12b251ac980e77e27eda9d20052769d08..627eaf1ee1db124cea7111a74bde7a1f494bc4f6 100644 (file)
@@ -165,6 +165,15 @@ static void kasan_test_exit(struct kunit *test)
                kunit_skip((test), "Test requires " #config "=n");      \
 } while (0)
 
+#define KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test) do {              \
+       if (IS_ENABLED(CONFIG_KASAN_HW_TAGS))                           \
+               break;  /* No compiler instrumentation. */              \
+       if (IS_ENABLED(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX))        \
+               break;  /* Should always be instrumented! */            \
+       if (IS_ENABLED(CONFIG_GENERIC_ENTRY))                           \
+               kunit_skip((test), "Test requires checked mem*()");     \
+} while (0)
+
 static void kmalloc_oob_right(struct kunit *test)
 {
        char *ptr;
@@ -454,6 +463,8 @@ static void kmalloc_oob_16(struct kunit *test)
                u64 words[2];
        } *ptr1, *ptr2;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        /* This test is specifically crafted for the generic mode. */
        KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
 
@@ -476,6 +487,8 @@ static void kmalloc_uaf_16(struct kunit *test)
                u64 words[2];
        } *ptr1, *ptr2;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
@@ -498,6 +511,8 @@ static void kmalloc_oob_memset_2(struct kunit *test)
        char *ptr;
        size_t size = 128 - KASAN_GRANULE_SIZE;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -511,6 +526,8 @@ static void kmalloc_oob_memset_4(struct kunit *test)
        char *ptr;
        size_t size = 128 - KASAN_GRANULE_SIZE;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -524,6 +541,8 @@ static void kmalloc_oob_memset_8(struct kunit *test)
        char *ptr;
        size_t size = 128 - KASAN_GRANULE_SIZE;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -537,6 +556,8 @@ static void kmalloc_oob_memset_16(struct kunit *test)
        char *ptr;
        size_t size = 128 - KASAN_GRANULE_SIZE;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -550,6 +571,8 @@ static void kmalloc_oob_in_memset(struct kunit *test)
        char *ptr;
        size_t size = 128 - KASAN_GRANULE_SIZE;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -566,6 +589,8 @@ static void kmalloc_memmove_negative_size(struct kunit *test)
        size_t size = 64;
        size_t invalid_size = -2;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        /*
         * Hardware tag-based mode doesn't check memmove for negative size.
         * As a result, this test introduces a side-effect memory corruption,
@@ -590,6 +615,8 @@ static void kmalloc_memmove_invalid_size(struct kunit *test)
        size_t size = 64;
        size_t invalid_size = size;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
@@ -618,6 +645,8 @@ static void kmalloc_uaf_memset(struct kunit *test)
        char *ptr;
        size_t size = 33;
 
+       KASAN_TEST_NEEDS_CHECKED_MEMINTRINSICS(test);
+
        /*
         * Only generic KASAN uses quarantine, which is required to avoid a
         * kernel memory corruption this test causes.
index 3703983a8e5569f3b4ff63698f3fee0e1b1b8109..c8b86f3273b509bca9bfbd29ecf9bd8150438635 100644 (file)
@@ -38,11 +38,14 @@ bool __kasan_check_write(const volatile void *p, unsigned int size)
 }
 EXPORT_SYMBOL(__kasan_check_write);
 
-#ifndef CONFIG_GENERIC_ENTRY
+#if !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) && !defined(CONFIG_GENERIC_ENTRY)
 /*
  * CONFIG_GENERIC_ENTRY relies on compiler emitted mem*() calls to not be
  * instrumented. KASAN enabled toolchains should emit __asan_mem*() functions
  * for the sites they want to instrument.
+ *
+ * If we have a compiler that can instrument meminstrinsics, never override
+ * these, so that non-instrumented files can safely consider them as builtins.
  */
 #undef memset
 void *memset(void *addr, int c, size_t len)
@@ -107,6 +110,17 @@ void *__asan_memcpy(void *dest, const void *src, size_t len)
 }
 EXPORT_SYMBOL(__asan_memcpy);
 
+#ifdef CONFIG_KASAN_SW_TAGS
+void *__hwasan_memset(void *addr, int c, size_t len) __alias(__asan_memset);
+EXPORT_SYMBOL(__hwasan_memset);
+#ifdef __HAVE_ARCH_MEMMOVE
+void *__hwasan_memmove(void *dest, const void *src, size_t len) __alias(__asan_memmove);
+EXPORT_SYMBOL(__hwasan_memmove);
+#endif
+void *__hwasan_memcpy(void *dest, const void *src, size_t len) __alias(__asan_memcpy);
+EXPORT_SYMBOL(__hwasan_memcpy);
+#endif
+
 void kasan_poison(const void *addr, size_t size, u8 value, bool init)
 {
        void *shadow_start, *shadow_end;
index a1ede7bdce95e89adae2bb9664a54dce708fc419..fae9baf3be1622991bc13842c797255fa46462b7 100644 (file)
@@ -1069,7 +1069,7 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p)
  * cache and swap cache(ie. page is freshly swapped in). So it could be
  * referenced concurrently by 2 types of PTEs:
  * normal PTEs and swap PTEs. We try to handle them consistently by calling
- * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * try_to_unmap(!TTU_HWPOISON) to convert the normal PTEs to swap PTEs,
  * and then
  *      - clear dirty bit to prevent IO
  *      - remove from LRU
@@ -1486,7 +1486,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                                  int flags, struct page *hpage)
 {
        struct folio *folio = page_folio(hpage);
-       enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
+       enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        bool unmap_success;
@@ -1516,7 +1516,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 
        if (PageSwapCache(p)) {
                pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
-               ttu |= TTU_IGNORE_HWPOISON;
+               ttu &= ~TTU_HWPOISON;
        }
 
        /*
@@ -1531,7 +1531,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                if (page_mkclean(hpage)) {
                        SetPageDirty(hpage);
                } else {
-                       ttu |= TTU_IGNORE_HWPOISON;
+                       ttu &= ~TTU_HWPOISON;
                        pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
                                pfn);
                }
index 37865f85df6d4132f5343aac33f16a580bf2f001..98f1c11197a8c5f057ed23f5aea9cdc62536bed4 100644 (file)
@@ -1035,11 +1035,16 @@ out:
  * destination folio.  This is safe because nobody is using them
  * except us.
  */
+union migration_ptr {
+       struct anon_vma *anon_vma;
+       struct address_space *mapping;
+};
 static void __migrate_folio_record(struct folio *dst,
                                   unsigned long page_was_mapped,
                                   struct anon_vma *anon_vma)
 {
-       dst->mapping = (void *)anon_vma;
+       union migration_ptr ptr = { .anon_vma = anon_vma };
+       dst->mapping = ptr.mapping;
        dst->private = (void *)page_was_mapped;
 }
 
@@ -1047,7 +1052,8 @@ static void __migrate_folio_extract(struct folio *dst,
                                   int *page_was_mappedp,
                                   struct anon_vma **anon_vmap)
 {
-       *anon_vmap = (void *)dst->mapping;
+       union migration_ptr ptr = { .mapping = dst->mapping };
+       *anon_vmap = ptr.anon_vma;
        *page_was_mappedp = (unsigned long)dst->private;
        dst->mapping = NULL;
        dst->private = NULL;
index 20f21f0949ddb1eb0324dc75f9149009df238a69..740b54be3ed4140f16a6731a275ab49f3a8b256f 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -973,7 +973,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
                        vma_end = addr;
                        adjust = mid;
                        adj_next = -(vma->vm_end - addr);
-                       err = dup_anon_vma(res, adjust);
+                       err = dup_anon_vma(adjust, prev);
                } else {
                        vma = next;                     /* case 3 */
                        vma_start = addr;
index 15ae24585fc49df4e977a8940f858890f4b0dcf2..8632e02661ac7fa01d4e19931028b5f1f10da717 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1602,7 +1602,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                /* Update high watermark before we lower rss */
                update_hiwater_rss(mm);
 
-               if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
+               if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
                        pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                        if (folio_test_hugetlb(folio)) {
                                hugetlb_count_sub(folio_nr_pages(folio), mm);
index 622ec6a586eea993b0adcdd91655f37c3937998f..2adcb5e7b0e29e54fc104e0a58a7a11781f1f9a6 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/9p.h>
 
-#define DEFAULT_MSIZE (128 * 1024)
+/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
+ * room for write (16 extra) or read (11 extra) operands.
+ */
+
+#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)
 
 /* Client Option Parsing (code inspired by NFS code)
  *  - a little lazy - parse all client options
@@ -1289,7 +1293,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
                 qid->type, qid->path, qid->version, iounit);
 
        memmove(&ofid->qid, qid, sizeof(struct p9_qid));
-       ofid->mode = mode;
+       ofid->mode = flags;
        ofid->iounit = iounit;
 
 free_and_error:
index 83f9100d46bff763a2c2d4af6d706a42a6fa5e83..b84748baf9cbe3dc38a816daeeeefb3d7157ae13 100644 (file)
@@ -385,6 +385,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
        struct p9_trans_rdma *rdma = client->trans;
        struct ib_recv_wr wr;
        struct ib_sge sge;
+       int ret;
 
        c->busa = ib_dma_map_single(rdma->cm_id->device,
                                    c->rc.sdata, client->msize,
@@ -402,7 +403,12 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
        wr.wr_cqe = &c->cqe;
        wr.sg_list = &sge;
        wr.num_sge = 1;
-       return ib_post_recv(rdma->qp, &wr, NULL);
+
+       ret = ib_post_recv(rdma->qp, &wr, NULL);
+       if (ret)
+               ib_dma_unmap_single(rdma->cm_id->device, c->busa,
+                                   client->msize, DMA_FROM_DEVICE);
+       return ret;
 
  error:
        p9_debug(P9_DEBUG_ERROR, "EIO\n");
@@ -499,7 +505,7 @@ dont_need_post_recv:
 
        if (down_interruptible(&rdma->sq_sem)) {
                err = -EINTR;
-               goto send_error;
+               goto dma_unmap;
        }
 
        /* Mark request as `sent' *before* we actually send it,
@@ -509,11 +515,14 @@ dont_need_post_recv:
        WRITE_ONCE(req->status, REQ_STATUS_SENT);
        err = ib_post_send(rdma->qp, &wr, NULL);
        if (err)
-               goto send_error;
+               goto dma_unmap;
 
        /* Success */
        return 0;
 
+dma_unmap:
+       ib_dma_unmap_single(rdma->cm_id->device, c->busa,
+                           c->req->tc.size, DMA_TO_DEVICE);
  /* Handle errors that happened during or while preparing the send: */
  send_error:
        WRITE_ONCE(req->status, REQ_STATUS_ERROR);
index 82c7005ede65679bd528637ff029573a03e7776e..c64050e839ac6faccd7f9741654cfa74bf661836 100644 (file)
@@ -372,19 +372,24 @@ out:
        return ret;
 }
 
-static int xen_9pfs_front_probe(struct xenbus_device *dev,
-                               const struct xenbus_device_id *id)
+static int xen_9pfs_front_init(struct xenbus_device *dev)
 {
        int ret, i;
        struct xenbus_transaction xbt;
-       struct xen_9pfs_front_priv *priv = NULL;
-       char *versions;
+       struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
+       char *versions, *v;
        unsigned int max_rings, max_ring_order, len = 0;
 
        versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
        if (IS_ERR(versions))
                return PTR_ERR(versions);
-       if (strcmp(versions, "1")) {
+       for (v = versions; *v; v++) {
+               if (simple_strtoul(v, &v, 10) == 1) {
+                       v = NULL;
+                       break;
+               }
+       }
+       if (v) {
                kfree(versions);
                return -EINVAL;
        }
@@ -399,11 +404,6 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
        if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order))
                p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2;
 
-       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
-
-       priv->dev = dev;
        priv->num_rings = XEN_9PFS_NUM_RINGS;
        priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
                              GFP_KERNEL);
@@ -462,23 +462,35 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
                goto error;
        }
 
-       write_lock(&xen_9pfs_lock);
-       list_add_tail(&priv->list, &xen_9pfs_devs);
-       write_unlock(&xen_9pfs_lock);
-       dev_set_drvdata(&dev->dev, priv);
-       xenbus_switch_state(dev, XenbusStateInitialised);
-
        return 0;
 
  error_xenbus:
        xenbus_transaction_end(xbt, 1);
        xenbus_dev_fatal(dev, ret, "writing xenstore");
  error:
-       dev_set_drvdata(&dev->dev, NULL);
        xen_9pfs_front_free(priv);
        return ret;
 }
 
+static int xen_9pfs_front_probe(struct xenbus_device *dev,
+                               const struct xenbus_device_id *id)
+{
+       struct xen_9pfs_front_priv *priv = NULL;
+
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->dev = dev;
+       dev_set_drvdata(&dev->dev, priv);
+
+       write_lock(&xen_9pfs_lock);
+       list_add_tail(&priv->list, &xen_9pfs_devs);
+       write_unlock(&xen_9pfs_lock);
+
+       return 0;
+}
+
 static int xen_9pfs_front_resume(struct xenbus_device *dev)
 {
        dev_warn(&dev->dev, "suspend/resume unsupported\n");
@@ -497,6 +509,8 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateInitWait:
+               if (!xen_9pfs_front_init(dev))
+                       xenbus_switch_state(dev, XenbusStateInitialised);
                break;
 
        case XenbusStateConnected:
index 8c92fc55331771495b7c11fe5b4e660d8c11a11c..87e654b7d06c14efe4e4567060e6fe73e8306249 100644 (file)
@@ -774,20 +774,32 @@ static int __init xdp_metadata_init(void)
 }
 late_initcall(xdp_metadata_init);
 
-void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
 {
-       dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
-       if (support_sg)
-               dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+       val &= NETDEV_XDP_ACT_MASK;
+       if (dev->xdp_features == val)
+               return;
 
+       dev->xdp_features = val;
        call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
 }
+EXPORT_SYMBOL_GPL(xdp_set_features_flag);
+
+void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+{
+       xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT);
+
+       if (support_sg)
+               val |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+       xdp_set_features_flag(dev, val);
+}
 EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
 
 void xdp_features_clear_redirect_target(struct net_device *dev)
 {
-       dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT |
-                              NETDEV_XDP_ACT_NDO_XMIT_SG);
-       call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+       xdp_features_t val = dev->xdp_features;
+
+       val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG);
+       xdp_set_features_flag(dev, val);
 }
 EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);
index 71d01cf3c13eb4bd3d314ef140568d2ffd6a499e..ba839e441450f195012a8d77cb9e5ed956962d2f 100644 (file)
@@ -3605,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
        th->window = htons(min(req->rsk_rcv_wnd, 65535U));
        tcp_options_write(th, NULL, &opts);
        th->doff = (tcp_header_size >> 2);
-       __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+       TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
        /* Okay, we have all we need - do the md5 hash if needed */
index 8eb3423008687a3f09c3512d8adbb00b19579695..d3d861911ed65084bd617736c5c32c3ed899de50 100644 (file)
@@ -2611,6 +2611,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
        if (!sband)
                return -EINVAL;
 
+       if (params->basic_rates) {
+               if (!ieee80211_parse_bitrates(link->conf->chandef.width,
+                                             wiphy->bands[sband->band],
+                                             params->basic_rates,
+                                             params->basic_rates_len,
+                                             &link->conf->basic_rates))
+                       return -EINVAL;
+               changed |= BSS_CHANGED_BASIC_RATES;
+               ieee80211_check_rate_mask(link);
+       }
+
        if (params->use_cts_prot >= 0) {
                link->conf->use_cts_prot = params->use_cts_prot;
                changed |= BSS_CHANGED_ERP_CTS_PROT;
@@ -2632,16 +2643,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
                changed |= BSS_CHANGED_ERP_SLOT;
        }
 
-       if (params->basic_rates) {
-               ieee80211_parse_bitrates(link->conf->chandef.width,
-                                        wiphy->bands[sband->band],
-                                        params->basic_rates,
-                                        params->basic_rates_len,
-                                        &link->conf->basic_rates);
-               changed |= BSS_CHANGED_BASIC_RATES;
-               ieee80211_check_rate_mask(link);
-       }
-
        if (params->ap_isolate >= 0) {
                if (params->ap_isolate)
                        sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
index 56628b52d1001a967eb2e504bdbeac0c4cd17acc..5c8dea49626c31a008f9243498564fbcd3cebb1c 100644 (file)
@@ -997,9 +997,13 @@ out:
        return ret;
 }
 
+static struct lock_class_key mptcp_slock_keys[2];
+static struct lock_class_key mptcp_keys[2];
+
 static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
                                            struct mptcp_pm_addr_entry *entry)
 {
+       bool is_ipv6 = sk->sk_family == AF_INET6;
        int addrlen = sizeof(struct sockaddr_in);
        struct sockaddr_storage addr;
        struct socket *ssock;
@@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
        if (!newsk)
                return -EINVAL;
 
+       /* The subflow socket lock is acquired in a nested to the msk one
+        * in several places, even by the TCP stack, and this msk is a kernel
+        * socket: lockdep complains. Instead of propagating the _nested
+        * modifiers in several places, re-init the lock class for the msk
+        * socket to an mptcp specific one.
+        */
+       sock_lock_init_class_and_name(newsk,
+                                     is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
+                                     &mptcp_slock_keys[is_ipv6],
+                                     is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
+                                     &mptcp_keys[is_ipv6]);
+
        lock_sock(newsk);
        ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
        release_sock(newsk);
index 3ad9c46202fc63a5b3a870bf2ba994a8d9148264..60b23b2716c4083349f3f68655d243398bc31776 100644 (file)
@@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
        if (sk->sk_socket && !ssk->sk_socket)
                mptcp_sock_graft(ssk, sk->sk_socket);
 
-       mptcp_propagate_sndbuf((struct sock *)msk, ssk);
        mptcp_sockopt_sync_locked(msk, ssk);
        return true;
 }
@@ -2343,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                goto out;
        }
 
-       sock_orphan(ssk);
        subflow->disposable = 1;
 
        /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2351,15 +2349,25 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
         * reference owned by msk;
         */
        if (!inet_csk(ssk)->icsk_ulp_ops) {
+               WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
                kfree_rcu(subflow, rcu);
+       } else if (msk->in_accept_queue && msk->first == ssk) {
+               /* if the first subflow moved to a close state, e.g. due to
+                * incoming reset and we reach here before inet_child_forget()
+                * the TCP stack could later try to close it via
+                * inet_csk_listen_stop(), or deliver it to the user space via
+                * accept().
+                * We can't delete the subflow - or risk a double free - nor let
+                * the msk survive - or will be leaked in the non accept scenario:
+                * fallback and let TCP cope with the subflow cleanup.
+                */
+               WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+               mptcp_subflow_drop_ctx(ssk);
        } else {
                /* otherwise tcp will dispose of the ssk and subflow ctx */
-               if (ssk->sk_state == TCP_LISTEN) {
-                       tcp_set_state(ssk, TCP_CLOSE);
-                       mptcp_subflow_queue_clean(sk, ssk);
-                       inet_csk_listen_stop(ssk);
+               if (ssk->sk_state == TCP_LISTEN)
                        mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
-               }
+
                __tcp_close(ssk, 0);
 
                /* close acquired an extra ref */
@@ -2399,9 +2407,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
        return 0;
 }
 
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow, *tmp;
+       struct mptcp_sock *msk = mptcp_sk(sk);
 
        might_sleep();
 
@@ -2415,7 +2424,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
                if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
                        continue;
 
-               mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+               mptcp_close_ssk(sk, ssk, subflow);
+       }
+
+       /* if the MPC subflow has been closed before the msk is accepted,
+        * msk will never be accept-ed, close it now
+        */
+       if (!msk->first && msk->in_accept_queue) {
+               sock_set_flag(sk, SOCK_DEAD);
+               inet_sk_state_store(sk, TCP_CLOSE);
        }
 }
 
@@ -2624,6 +2641,9 @@ static void mptcp_worker(struct work_struct *work)
        __mptcp_check_send_data_fin(sk);
        mptcp_check_data_fin(sk);
 
+       if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+               __mptcp_close_subflow(sk);
+
        /* There is no point in keeping around an orphaned sk timedout or
         * closed, but we need the msk around to reply to incoming DATA_FIN,
         * even if it is orphaned and in FIN_WAIT2 state
@@ -2639,9 +2659,6 @@ static void mptcp_worker(struct work_struct *work)
                }
        }
 
-       if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
-               __mptcp_close_subflow(msk);
-
        if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
                __mptcp_retrans(sk);
 
@@ -3079,6 +3096,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
        msk->local_key = subflow_req->local_key;
        msk->token = subflow_req->token;
        msk->subflow = NULL;
+       msk->in_accept_queue = 1;
        WRITE_ONCE(msk->fully_established, false);
        if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
                WRITE_ONCE(msk->csum_enabled, true);
@@ -3096,8 +3114,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
        security_inet_csk_clone(nsk, req);
        bh_unlock_sock(nsk);
 
-       /* keep a single reference */
-       __sock_put(nsk);
+       /* note: the newly allocated socket refcount is 2 now */
        return nsk;
 }
 
@@ -3153,8 +3170,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
                        goto out;
                }
 
-               /* acquire the 2nd reference for the owning socket */
-               sock_hold(new_mptcp_sock);
                newsk = new_mptcp_sock;
                MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
        } else {
@@ -3705,25 +3720,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
                struct sock *newsk = newsock->sk;
 
                set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+               msk->in_accept_queue = 0;
 
                lock_sock(newsk);
 
-               /* PM/worker can now acquire the first subflow socket
-                * lock without racing with listener queue cleanup,
-                * we can notify it, if needed.
-                *
-                * Even if remote has reset the initial subflow by now
-                * the refcnt is still at least one.
-                */
-               subflow = mptcp_subflow_ctx(msk->first);
-               list_add(&subflow->node, &msk->conn_list);
-               sock_hold(msk->first);
-               if (mptcp_is_fully_established(newsk))
-                       mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
-
-               mptcp_rcv_space_init(msk, msk->first);
-               mptcp_propagate_sndbuf(newsk, msk->first);
-
                /* set ssk->sk_socket of accept()ed flows to mptcp socket.
                 * This is needed so NOSPACE flag can be set from tcp stack.
                 */
index 61fd8eabfca2028680e04558b4baca9f48bbaaaa..339a6f0729898422cfd7e7ee8c014fd09fecbeeb 100644 (file)
@@ -295,7 +295,8 @@ struct mptcp_sock {
        u8              recvmsg_inq:1,
                        cork:1,
                        nodelay:1,
-                       fastopening:1;
+                       fastopening:1,
+                       in_accept_queue:1;
        int             connect_flags;
        struct work_struct work;
        struct sk_buff  *ooo_last_skb;
@@ -628,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                     struct mptcp_subflow_context *subflow);
 void __mptcp_subflow_send_ack(struct sock *ssk);
 void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
 void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
 bool __mptcp_close(struct sock *sk, long timeout);
@@ -666,6 +666,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
 
 bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
 
+void mptcp_subflow_drop_ctx(struct sock *ssk);
+
 static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
                                              struct mptcp_subflow_context *ctx)
 {
index 4ae1a7304cf0da1840a1d236969549d18cf8ff97..a0041360ee9d95b0cf85845e98c0f157a578e59d 100644 (file)
@@ -397,10 +397,15 @@ void mptcp_subflow_reset(struct sock *ssk)
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct sock *sk = subflow->conn;
 
+       /* mptcp_mp_fail_no_response() can reach here on an already closed
+        * socket
+        */
+       if (ssk->sk_state == TCP_CLOSE)
+               return;
+
        /* must hold: tcp_done() could drop last reference on parent */
        sock_hold(sk);
 
-       tcp_set_state(ssk, TCP_CLOSE);
        tcp_send_active_reset(ssk, GFP_ATOMIC);
        tcp_done(ssk);
        if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
@@ -622,7 +627,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init
 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
 static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
 static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
-static struct proto tcpv6_prot_override;
+static struct proto tcpv6_prot_override __ro_after_init;
 
 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
@@ -693,9 +698,10 @@ static bool subflow_hmac_valid(const struct request_sock *req,
 
 static void mptcp_force_close(struct sock *sk)
 {
-       /* the msk is not yet exposed to user-space */
+       /* the msk is not yet exposed to user-space, and refcount is 2 */
        inet_sk_state_store(sk, TCP_CLOSE);
        sk_common_release(sk);
+       sock_put(sk);
 }
 
 static void subflow_ulp_fallback(struct sock *sk,
@@ -711,7 +717,7 @@ static void subflow_ulp_fallback(struct sock *sk,
        mptcp_subflow_ops_undo_override(sk);
 }
 
-static void subflow_drop_ctx(struct sock *ssk)
+void mptcp_subflow_drop_ctx(struct sock *ssk)
 {
        struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
 
@@ -750,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
        struct mptcp_options_received mp_opt;
        bool fallback, fallback_is_fatal;
        struct sock *new_msk = NULL;
+       struct mptcp_sock *owner;
        struct sock *child;
 
        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -816,7 +823,7 @@ create_child:
 
                        if (new_msk)
                                mptcp_copy_inaddrs(new_msk, child);
-                       subflow_drop_ctx(child);
+                       mptcp_subflow_drop_ctx(child);
                        goto out;
                }
 
@@ -824,6 +831,8 @@ create_child:
                ctx->setsockopt_seq = listener->setsockopt_seq;
 
                if (ctx->mp_capable) {
+                       owner = mptcp_sk(new_msk);
+
                        /* this can't race with mptcp_close(), as the msk is
                         * not yet exposted to user-space
                         */
@@ -832,14 +841,14 @@ create_child:
                        /* record the newly created socket as the first msk
                         * subflow, but don't link it yet into conn_list
                         */
-                       WRITE_ONCE(mptcp_sk(new_msk)->first, child);
+                       WRITE_ONCE(owner->first, child);
 
                        /* new mpc subflow takes ownership of the newly
                         * created mptcp socket
                         */
                        mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
-                       mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
-                       mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
+                       mptcp_pm_new_connection(owner, child, 1);
+                       mptcp_token_accept(subflow_req, owner);
                        ctx->conn = new_msk;
                        new_msk = NULL;
 
@@ -847,15 +856,21 @@ create_child:
                         * uses the correct data
                         */
                        mptcp_copy_inaddrs(ctx->conn, child);
+                       mptcp_propagate_sndbuf(ctx->conn, child);
+
+                       mptcp_rcv_space_init(owner, child);
+                       list_add(&ctx->node, &owner->conn_list);
+                       sock_hold(child);
 
                        /* with OoO packets we can reach here without ingress
                         * mpc option
                         */
-                       if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
+                       if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
                                mptcp_subflow_fully_established(ctx, &mp_opt);
+                               mptcp_pm_fully_established(owner, child, GFP_ATOMIC);
+                               ctx->pm_notified = 1;
+                       }
                } else if (ctx->mp_join) {
-                       struct mptcp_sock *owner;
-
                        owner = subflow_req->msk;
                        if (!owner) {
                                subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
@@ -899,7 +914,7 @@ out:
        return child;
 
 dispose_child:
-       subflow_drop_ctx(child);
+       mptcp_subflow_drop_ctx(child);
        tcp_rsk(req)->drop_req = true;
        inet_csk_prepare_for_destroy_sock(child);
        tcp_done(child);
@@ -910,7 +925,7 @@ dispose_child:
 }
 
 static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
-static struct proto tcp_prot_override;
+static struct proto tcp_prot_override __ro_after_init;
 
 enum mapping_status {
        MAPPING_OK,
@@ -1432,6 +1447,13 @@ static void subflow_error_report(struct sock *ssk)
 {
        struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
 
+       /* bail early if this is a no-op, so that we avoid introducing a
+        * problematic lockdep dependency between TCP accept queue lock
+        * and msk socket spinlock
+        */
+       if (!sk->sk_socket)
+               return;
+
        mptcp_data_lock(sk);
        if (!sock_owned_by_user(sk))
                __mptcp_error_report(sk);
@@ -1803,79 +1825,6 @@ static void subflow_state_change(struct sock *sk)
        }
 }
 
-void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
-{
-       struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
-       struct mptcp_sock *msk, *next, *head = NULL;
-       struct request_sock *req;
-
-       /* build a list of all unaccepted mptcp sockets */
-       spin_lock_bh(&queue->rskq_lock);
-       for (req = queue->rskq_accept_head; req; req = req->dl_next) {
-               struct mptcp_subflow_context *subflow;
-               struct sock *ssk = req->sk;
-               struct mptcp_sock *msk;
-
-               if (!sk_is_mptcp(ssk))
-                       continue;
-
-               subflow = mptcp_subflow_ctx(ssk);
-               if (!subflow || !subflow->conn)
-                       continue;
-
-               /* skip if already in list */
-               msk = mptcp_sk(subflow->conn);
-               if (msk->dl_next || msk == head)
-                       continue;
-
-               msk->dl_next = head;
-               head = msk;
-       }
-       spin_unlock_bh(&queue->rskq_lock);
-       if (!head)
-               return;
-
-       /* can't acquire the msk socket lock under the subflow one,
-        * or will cause ABBA deadlock
-        */
-       release_sock(listener_ssk);
-
-       for (msk = head; msk; msk = next) {
-               struct sock *sk = (struct sock *)msk;
-               bool do_cancel_work;
-
-               sock_hold(sk);
-               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-               next = msk->dl_next;
-               msk->first = NULL;
-               msk->dl_next = NULL;
-
-               do_cancel_work = __mptcp_close(sk, 0);
-               release_sock(sk);
-               if (do_cancel_work) {
-                       /* lockdep will report a false positive ABBA deadlock
-                        * between cancel_work_sync and the listener socket.
-                        * The involved locks belong to different sockets WRT
-                        * the existing AB chain.
-                        * Using a per socket key is problematic as key
-                        * deregistration requires process context and must be
-                        * performed at socket disposal time, in atomic
-                        * context.
-                        * Just tell lockdep to consider the listener socket
-                        * released here.
-                        */
-                       mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
-                       mptcp_cancel_work(sk);
-                       mutex_acquire(&listener_sk->sk_lock.dep_map,
-                                     SINGLE_DEPTH_NESTING, 0, _RET_IP_);
-               }
-               sock_put(sk);
-       }
-
-       /* we are still under the listener msk socket lock */
-       lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
-}
-
 static int subflow_ulp_init(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1932,6 +1881,13 @@ static void subflow_ulp_release(struct sock *ssk)
                 * when the subflow is still unaccepted
                 */
                release = ctx->disposable || list_empty(&ctx->node);
+
+               /* inet_child_forget() does not call sk_state_change(),
+                * explicitly trigger the socket close machinery
+                */
+               if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
+                                                 &mptcp_sk(sk)->flags))
+                       mptcp_schedule_work(sk);
                sock_put(sk);
        }
 
index a4cccdfdc00a242ba2334f06404e6cd663bd91de..ff6dd86bdc9f3f504426f0d3f91424f14d77b689 100644 (file)
@@ -2657,16 +2657,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
        struct sock *sk = sock->sk;
        struct smc_sock *smc;
-       int rc = -EPIPE;
+       int rc;
 
        smc = smc_sk(sk);
        lock_sock(sk);
-       if ((sk->sk_state != SMC_ACTIVE) &&
-           (sk->sk_state != SMC_APPCLOSEWAIT1) &&
-           (sk->sk_state != SMC_INIT))
-               goto out;
 
+       /* SMC does not support connect with fastopen */
        if (msg->msg_flags & MSG_FASTOPEN) {
+               /* not connected yet, fallback */
                if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
                        rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
                        if (rc)
@@ -2675,6 +2673,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
                        rc = -EINVAL;
                        goto out;
                }
+       } else if ((sk->sk_state != SMC_ACTIVE) &&
+                  (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+                  (sk->sk_state != SMC_INIT)) {
+               rc = -EPIPE;
+               goto out;
        }
 
        if (smc->use_fallback) {
index 6bae8ce7059ee97a8e8cee3fa5726bf6ae0b6e86..9c92c0e6c4da8c54f4878172f17f1d1be757f706 100644 (file)
@@ -450,7 +450,9 @@ static struct file_system_type sock_fs_type = {
  *
  *     Returns the &file bound with @sock, implicitly storing it
  *     in sock->file. If dname is %NULL, sets to "".
- *     On failure the return is a ERR pointer (see linux/err.h).
+ *
+ *     On failure @sock is released, and an ERR pointer is returned.
+ *
  *     This function uses GFP_KERNEL internally.
  */
 
@@ -1638,7 +1640,6 @@ static struct socket *__sys_socket_create(int family, int type, int protocol)
 struct file *__sys_socket_file(int family, int type, int protocol)
 {
        struct socket *sock;
-       struct file *file;
        int flags;
 
        sock = __sys_socket_create(family, type, protocol);
@@ -1649,11 +1650,7 @@ struct file *__sys_socket_file(int family, int type, int protocol)
        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
 
-       file = sock_alloc_file(sock, flags, NULL);
-       if (IS_ERR(file))
-               sock_release(sock);
-
-       return file;
+       return sock_alloc_file(sock, flags, NULL);
 }
 
 int __sys_socket(int family, int type, int protocol)
index c287ce15c419f432d97609158d3f2ec49a7709a2..ce0541e32fc9827bbc2eb355c5b71b82734da48e 100644 (file)
@@ -49,7 +49,8 @@ static void kdf_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        derivedkey.data = kunit_kzalloc(test, param->expected_result->len,
                                        GFP_KERNEL);
@@ -83,7 +84,8 @@ static void checksum_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        Kc.len = gk5e->Kc_length;
        Kc.data = kunit_kzalloc(test, Kc.len, GFP_KERNEL);
@@ -517,6 +519,7 @@ static struct kunit_case rfc3961_test_cases[] = {
                .run_case               = kdf_case,
                .generate_params        = rfc3961_kdf_gen_params,
        },
+       {}
 };
 
 static struct kunit_suite rfc3961_suite = {
@@ -725,7 +728,8 @@ static void rfc3962_encrypt_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
@@ -777,6 +781,7 @@ static struct kunit_case rfc3962_test_cases[] = {
                .run_case               = rfc3962_encrypt_case,
                .generate_params        = rfc3962_encrypt_gen_params,
        },
+       {}
 };
 
 static struct kunit_suite rfc3962_suite = {
@@ -1319,7 +1324,8 @@ static void rfc6803_encrypt_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        usage.data[3] = param->constant;
 
@@ -1411,6 +1417,7 @@ static struct kunit_case rfc6803_test_cases[] = {
                .run_case               = rfc6803_encrypt_case,
                .generate_params        = rfc6803_encrypt_gen_params,
        },
+       {}
 };
 
 static struct kunit_suite rfc6803_suite = {
@@ -1810,7 +1817,8 @@ static void rfc8009_encrypt_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        *(__be32 *)usage.data = cpu_to_be32(2);
 
@@ -1902,6 +1910,7 @@ static struct kunit_case rfc8009_test_cases[] = {
                .run_case               = rfc8009_encrypt_case,
                .generate_params        = rfc8009_encrypt_gen_params,
        },
+       {}
 };
 
 static struct kunit_suite rfc8009_suite = {
@@ -1975,7 +1984,8 @@ static void encrypt_selftest_case(struct kunit *test)
 
        /* Arrange */
        gk5e = gss_krb5_lookup_enctype(param->enctype);
-       KUNIT_ASSERT_NOT_NULL(test, gk5e);
+       if (!gk5e)
+               kunit_skip(test, "Encryption type is not available");
 
        cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
@@ -2023,6 +2033,7 @@ static struct kunit_case encryption_test_cases[] = {
                .run_case               = encrypt_selftest_case,
                .generate_params        = encrypt_selftest_gen_params,
        },
+       {}
 };
 
 static struct kunit_suite encryption_test_suite = {
index 347122c3575eaae597405369e2e9d8324d6ad240..0b0f18ecce4470d6fd21c084a3ea49e04dcbb9bd 100644 (file)
@@ -2105,7 +2105,8 @@ out:
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
-static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
+                    struct scm_cookie *scm, bool fds_sent)
 {
        struct unix_sock *ousk = unix_sk(other);
        struct sk_buff *skb;
@@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
        if (!skb)
                return err;
 
+       err = unix_scm_to_skb(scm, skb, !fds_sent);
+       if (err < 0) {
+               kfree_skb(skb);
+               return err;
+       }
        skb_put(skb, 1);
        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
 
@@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
        if (msg->msg_flags & MSG_OOB) {
-               err = queue_oob(sock, msg, other);
+               err = queue_oob(sock, msg, other, &scm, fds_sent);
                if (err)
                        goto out_err;
                sent++;
index 112b4bb009c80f648748256b322d9a5e7a7e8514..4f63059efd813ba6dd257a7c7ebebacb2f102cd1 100644 (file)
@@ -462,6 +462,11 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
        [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
 };
 
+static struct netlink_range_validation nl80211_punct_bitmap_range = {
+       .min = 0,
+       .max = 0xffff,
+};
+
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
        [NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -805,7 +810,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN),
        [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG },
        [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
-       [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff),
+       [NL80211_ATTR_PUNCT_BITMAP] =
+               NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range),
 };
 
 /* policy for the key attributes */
@@ -8901,7 +8907,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
                struct cfg80211_chan_def *chandef;
 
                chandef = wdev_chandef(wdev, link_id);
-               if (!chandef)
+               if (!chandef || !chandef->chan)
                        continue;
 
                /*
@@ -10793,8 +10799,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 
 static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev,
                                              const u8 *ssid, int ssid_len,
-                                             struct nlattr **attrs,
-                                             const u8 **bssid_out)
+                                             struct nlattr **attrs)
 {
        struct ieee80211_channel *chan;
        struct cfg80211_bss *bss;
@@ -10821,7 +10826,6 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device
        if (!bss)
                return ERR_PTR(-ENOENT);
 
-       *bssid_out = bssid;
        return bss;
 }
 
@@ -10831,7 +10835,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
        struct net_device *dev = info->user_ptr[1];
        struct cfg80211_assoc_request req = {};
        struct nlattr **attrs = NULL;
-       const u8 *bssid, *ssid;
+       const u8 *ap_addr, *ssid;
        unsigned int link_id;
        int err, ssid_len;
 
@@ -10968,6 +10972,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
                        return -EINVAL;
 
                req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+               ap_addr = req.ap_mld_addr;
 
                attrs = kzalloc(attrsize, GFP_KERNEL);
                if (!attrs)
@@ -10993,8 +10998,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
                                goto free;
                        }
                        req.links[link_id].bss =
-                               nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
-                                                 &bssid);
+                               nl80211_assoc_bss(rdev, ssid, ssid_len, attrs);
                        if (IS_ERR(req.links[link_id].bss)) {
                                err = PTR_ERR(req.links[link_id].bss);
                                req.links[link_id].bss = NULL;
@@ -11045,10 +11049,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
                if (req.link_id >= 0)
                        return -EINVAL;
 
-               req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs,
-                                           &bssid);
+               req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs);
                if (IS_ERR(req.bss))
                        return PTR_ERR(req.bss);
+               ap_addr = req.bss->bssid;
        }
 
        err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
@@ -11061,7 +11065,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
                        dev->ieee80211_ptr->conn_owner_nlportid =
                                info->snd_portid;
                        memcpy(dev->ieee80211_ptr->disconnect_bssid,
-                              bssid, ETH_ALEN);
+                              ap_addr, ETH_ALEN);
                }
 
                wdev_unlock(dev->ieee80211_ptr);
index be4963bf720304da3d9d72128983555ed44c4c62..552d9a85925b9945f6a9712b236e564292f58f50 100644 (file)
@@ -6,6 +6,7 @@
 --opaque-type local_apic
 
 # Packed type cannot transitively contain a `#[repr(align)]` type.
+--opaque-type alt_instr
 --opaque-type x86_msi_data
 --opaque-type x86_msi_addr_lo
 
index fd346f58ddbaf10889d3de29fd7399cd90a94258..ef44c614c6d900a5c89f9bf51c0684341f640c95 100644 (file)
@@ -55,6 +55,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
        pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
                p->symbol_name, p->addr, regs->psw.addr, regs->flags);
 #endif
+#ifdef CONFIG_LOONGARCH
+       pr_info("<%s> p->addr = 0x%p, era = 0x%lx, estat = 0x%lx\n",
+               p->symbol_name, p->addr, regs->csr_era, regs->csr_estat);
+#endif
 
        /* A dump_stack() here will give a stack backtrace */
        return 0;
@@ -92,6 +96,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
        pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n",
                p->symbol_name, p->addr, regs->flags);
 #endif
+#ifdef CONFIG_LOONGARCH
+       pr_info("<%s> p->addr = 0x%p, estat = 0x%lx\n",
+               p->symbol_name, p->addr, regs->csr_estat);
+#endif
 }
 
 static int __init kprobe_init(void)
index b9e94c5e7097090317275d2a543d06debbce36bc..c186110ffa209dd8235c3b666929566f8c1f81e3 100644 (file)
@@ -1,5 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0
+
+ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+# Safe for compiler to generate meminstrinsic calls in uninstrumented files.
+CFLAGS_KASAN_NOSANITIZE :=
+else
+# Don't let compiler generate memintrinsic calls in uninstrumented files
+# because they are instrumented.
 CFLAGS_KASAN_NOSANITIZE := -fno-builtin
+endif
+
 KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET)
 
 cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1)))
@@ -38,6 +47,11 @@ endif
 
 CFLAGS_KASAN += $(call cc-param,asan-stack=$(stack_enable))
 
+# Instrument memcpy/memset/memmove calls by using instrumented __asan_mem*()
+# instead. With compilers that don't support this option, compiler-inserted
+# memintrinsics won't be checked by KASAN on GENERIC_ENTRY architectures.
+CFLAGS_KASAN += $(call cc-param,asan-kernel-mem-intrinsic-prefix=1)
+
 endif # CONFIG_KASAN_GENERIC
 
 ifdef CONFIG_KASAN_SW_TAGS
@@ -54,6 +68,9 @@ CFLAGS_KASAN := -fsanitize=kernel-hwaddress \
                $(call cc-param,hwasan-inline-all-checks=0) \
                $(instrumentation_flags)
 
+# Instrument memcpy/memset/memmove calls by using instrumented __hwasan_mem*().
+CFLAGS_KASAN += $(call cc-param,hwasan-kernel-mem-intrinsic-prefix=1)
+
 endif # CONFIG_KASAN_SW_TAGS
 
 export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE
index 2401c86fcf53311e8d7d92de0e815b5cb12f6d0b..0573c92e841d3209b9034a0673e46a2e750510d5 100755 (executable)
@@ -12,8 +12,6 @@ get_c_compiler_info()
        cat <<- EOF | "$@" -E -P -x c - 2>/dev/null
        #if defined(__clang__)
        Clang   __clang_major__  __clang_minor__  __clang_patchlevel__
-       #elif defined(__INTEL_COMPILER)
-       ICC     __INTEL_COMPILER  __INTEL_COMPILER_UPDATE
        #elif defined(__GNUC__)
        GCC     __GNUC__  __GNUC_MINOR__  __GNUC_PATCHLEVEL__
        #else
index 2956fce8fa4fc82fd941e7785e742d526ad142c2..e52cb43fede60fe1be7eea170b63a6c0d54b6e0b 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # SPDX-License-Identifier: GPL-2.0
 # Linux kernel coccicheck
 #
@@ -18,7 +18,7 @@ fi
 SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}')
 
 USE_JOBS="no"
-$SPATCH --help | grep "\-\-jobs" > /dev/null && USE_JOBS="yes"
+$SPATCH --help | grep -e "--jobs" > /dev/null && USE_JOBS="yes"
 
 # The verbosity may be set by the environmental parameter V=
 # as for example with 'make V=1 coccicheck'
index e63d52408b86fe38d24c07e0402776c0086b0b7e..bbe5b293293352fc0e47c159a4a34fa05b6edc61 100644 (file)
@@ -55,7 +55,7 @@ identifier fname6 =~ ".*call_rcu.*";
 p1 << r1.p1;
 p2 << r1.p2;
 @@
-msg = "atomic_dec_and_test variation before object free at line %s."
+msg = "WARNING: atomic_dec_and_test variation before object free at line %s."
 coccilib.report.print_report(p1[0], msg % (p2[0].line))
 
 @r4 exists@
@@ -88,7 +88,7 @@ fname@p2(y, ...);
 p1 << r4.p1;
 p2 << r4.p2;
 @@
-msg = "atomic_dec_and_test variation before object free at line %s."
+msg = "WARNING: atomic_dec_and_test variation before object free at line %s."
 coccilib.report.print_report(p1[0], msg % (p2[0].line))
 
 @r2 exists@
@@ -107,7 +107,7 @@ atomic64_add_unless(&(a)->x,-1,1)@p1
 @script:python depends on report@
 p1 << r2.p1;
 @@
-msg = "atomic_add_unless"
+msg = "WARNING: atomic_add_unless"
 coccilib.report.print_report(p1[0], msg)
 
 @r3 exists@
@@ -126,5 +126,5 @@ x = atomic64_add_return@p1(-1, ...);
 @script:python depends on report@
 p1 << r3.p1;
 @@
-msg = "x = atomic_add_return(-1, ...)"
+msg = "WARNING: x = atomic_add_return(-1, ...)"
 coccilib.report.print_report(p1[0], msg)
index a814f1efb39d5f15e999436d132c8692a58b7bc3..20d483ec6f5fca1793e26082af4fc3836a317de4 100755 (executable)
@@ -19,10 +19,6 @@ binutils)
 gcc)
        echo 5.1.0
        ;;
-icc)
-       # temporary
-       echo 16.0.3
-       ;;
 llvm)
        if [ "$SRCARCH" = s390 ]; then
                echo 15.0.0
index 5e9949832af62951990e37b80f667d25d12c0faa..cf2ceec40b28ae17c5f573c36ed5aa4ffb8c2855 100644 (file)
@@ -304,6 +304,26 @@ fail:
 }
 EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64);
 
+static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+       u32 val;
+
+       if (!aa_unpack_u32(e, &val, name))
+               return false;
+       data->val = val;
+       return true;
+}
+
+static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+       u32 val;
+
+       if (!aa_unpack_u32(e, &val, name))
+               return false;
+       data->val = (u32)data->val | ((u64)val << 32);
+       return true;
+}
+
 VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size)
 {
        void *pos = e->pos;
@@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                profile->path_flags = PATH_MEDIATE_DELETED;
 
        info = "failed to unpack profile capabilities";
-       if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL))
+       if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL))
                goto fail;
-       if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL))
+       if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL))
                goto fail;
-       if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL))
+       if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL))
                goto fail;
-       if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL))
+       if (!aa_unpack_cap_low(e, &tmpcap, NULL))
                goto fail;
 
        info = "failed to unpack upper profile capabilities";
        if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) {
                /* optional upper half of 64 bit caps */
-               if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL))
+               if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL))
                        goto fail;
-               if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL))
+               if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL))
                        goto fail;
-               if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL))
+               if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL))
                        goto fail;
-               if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL))
+               if (!aa_unpack_cap_high(e, &tmpcap, NULL))
                        goto fail;
                if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
                        goto fail;
@@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
        info = "failed to unpack extended profile capabilities";
        if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) {
                /* optional extended caps mediation mask */
-               if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL))
+               if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL))
                        goto fail;
-               if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL))
+               if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL))
                        goto fail;
                if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
                        goto fail;
index aec62db5527105b83033370392ad7ce14772931c..5bb7d1e962772a3929baa2ef39dfa9dbf3b7475f 100644 (file)
@@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
                                          bool *has_fcap)
 {
        struct cred *new = bprm->cred;
-       unsigned i;
        int ret = 0;
 
        if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
@@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
        if (caps->magic_etc & VFS_CAP_REVISION_MASK)
                *has_fcap = true;
 
-       CAP_FOR_EACH_U32(i) {
-               __u32 permitted = caps->permitted.cap[i];
-               __u32 inheritable = caps->inheritable.cap[i];
-
-               /*
-                * pP' = (X & fP) | (pI & fI)
-                * The addition of pA' is handled later.
-                */
-               new->cap_permitted.cap[i] =
-                       (new->cap_bset.cap[i] & permitted) |
-                       (new->cap_inheritable.cap[i] & inheritable);
+       /*
+        * pP' = (X & fP) | (pI & fI)
+        * The addition of pA' is handled later.
+        */
+       new->cap_permitted.val =
+               (new->cap_bset.val & caps->permitted.val) |
+               (new->cap_inheritable.val & caps->inheritable.val);
 
-               if (permitted & ~new->cap_permitted.cap[i])
-                       /* insufficient to execute correctly */
-                       ret = -EPERM;
-       }
+       if (caps->permitted.val & ~new->cap_permitted.val)
+               /* insufficient to execute correctly */
+               ret = -EPERM;
 
        /*
         * For legacy apps, with no internal support for recognizing they
@@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
 {
        struct inode *inode = d_backing_inode(dentry);
        __u32 magic_etc;
-       unsigned tocopy, i;
        int size;
        struct vfs_ns_cap_data data, *nscaps = &data;
        struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
@@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
        case VFS_CAP_REVISION_1:
                if (size != XATTR_CAPS_SZ_1)
                        return -EINVAL;
-               tocopy = VFS_CAP_U32_1;
                break;
        case VFS_CAP_REVISION_2:
                if (size != XATTR_CAPS_SZ_2)
                        return -EINVAL;
-               tocopy = VFS_CAP_U32_2;
                break;
        case VFS_CAP_REVISION_3:
                if (size != XATTR_CAPS_SZ_3)
                        return -EINVAL;
-               tocopy = VFS_CAP_U32_3;
                rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
                break;
 
@@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
        if (!rootid_owns_currentns(rootvfsuid))
                return -ENODATA;
 
-       CAP_FOR_EACH_U32(i) {
-               if (i >= tocopy)
-                       break;
-               cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
-               cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
+       cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
+       cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
+
+       /*
+        * Rev1 had just a single 32-bit word, later expanded
+        * to a second one for the high bits
+        */
+       if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
+               cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
+               cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
        }
 
-       cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-       cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+       cpu_caps->permitted.val &= CAP_VALID_MASK;
+       cpu_caps->inheritable.val &= CAP_VALID_MASK;
 
        cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
 
index e103bb3693c068ed22cd2d02d8db3309ff98cfaf..3c629f4ae08076c966f45e952a4b4ede60368570 100644 (file)
@@ -9260,6 +9260,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x0ac9, "Dell Precision 3260", ALC295_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
@@ -11617,6 +11618,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+       SND_PCI_QUIRK(0x103c, 0x870c, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
        SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
        SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
        SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
index 9a30f6d35d1358e31282002bc07d2d276b3c65e1..24b9782340001810f3ac051eab3129d86f98a9e2 100644 (file)
@@ -1892,13 +1892,10 @@ static int aureon_add_controls(struct snd_ice1712 *ice)
                unsigned char id;
                snd_ice1712_save_gpio_status(ice);
                id = aureon_cs8415_get(ice, CS8415_ID);
+               snd_ice1712_restore_gpio_status(ice);
                if (id != 0x41)
                        dev_info(ice->card->dev,
                                 "No CS8415 chip. Skipping CS8415 controls.\n");
-               else if ((id & 0x0F) != 0x01)
-                       dev_info(ice->card->dev,
-                                "Detected unsupported CS8415 rev. (%c)\n",
-                                (char)((id & 0x0F) + 'A' - 1));
                else {
                        for (i = 0; i < ARRAY_SIZE(cs8415_controls); i++) {
                                struct snd_kcontrol *kctl;
@@ -1909,7 +1906,6 @@ static int aureon_add_controls(struct snd_ice1712 *ice)
                                        kctl->id.device = ice->pcm->device;
                        }
                }
-               snd_ice1712_restore_gpio_status(ice);
        }
 
        return 0;
index 36314753923b8c6adf458d5d7af6ca15681ac800..4a69ce702360c547e3fc904b1a15d6823227d0c2 100644 (file)
@@ -255,6 +255,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "15NBC1011"),
                }
        },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "OMEN by HP Gaming Laptop 16z-n000"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+                       DMI_MATCH(DMI_BOARD_NAME, "8A43"),
+               }
+       },
        {}
 };
 
index 24381c42eb54c15e50fad471c382f33b078e0e12..64750db9b96396e2b8d2a78cf185a3cb12c128f6 100644 (file)
 #define SERDES_CONF_UNK3       BIT(14)
 #define SERDES_CONF_NO_DATA_FEEDBACK   BIT(15)
 #define SERDES_CONF_SYNC_SEL   GENMASK(18, 16)
-#define SERDES_CONF_SOME_RST   BIT(19)
 #define REG_TX_SERDES_BITSTART 0x08
 #define REG_RX_SERDES_BITSTART 0x0c
 #define REG_TX_SERDES_SLOTMASK 0x0c
@@ -203,15 +202,24 @@ static void mca_fe_early_trigger(struct snd_pcm_substream *substream, int cmd,
        case SNDRV_PCM_TRIGGER_START:
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+               mca_modify(cl, serdes_conf, SERDES_CONF_SYNC_SEL,
+                          FIELD_PREP(SERDES_CONF_SYNC_SEL, 0));
+               mca_modify(cl, serdes_conf, SERDES_CONF_SYNC_SEL,
+                          FIELD_PREP(SERDES_CONF_SYNC_SEL, 7));
                mca_modify(cl, serdes_unit + REG_SERDES_STATUS,
                           SERDES_STATUS_EN | SERDES_STATUS_RST,
                           SERDES_STATUS_RST);
-               mca_modify(cl, serdes_conf, SERDES_CONF_SOME_RST,
-                          SERDES_CONF_SOME_RST);
-               readl_relaxed(cl->base + serdes_conf);
-               mca_modify(cl, serdes_conf, SERDES_STATUS_RST, 0);
-               WARN_ON(readl_relaxed(cl->base + REG_SERDES_STATUS) &
+               /*
+                * Experiments suggest that it takes at most ~1 us
+                * for the bit to clear, so wait 2 us for good measure.
+                */
+               udelay(2);
+               WARN_ON(readl_relaxed(cl->base + serdes_unit + REG_SERDES_STATUS) &
                        SERDES_STATUS_RST);
+               mca_modify(cl, serdes_conf, SERDES_CONF_SYNC_SEL,
+                          FIELD_PREP(SERDES_CONF_SYNC_SEL, 0));
+               mca_modify(cl, serdes_conf, SERDES_CONF_SYNC_SEL,
+                          FIELD_PREP(SERDES_CONF_SYNC_SEL, cl->no + 1));
                break;
        default:
                break;
@@ -942,10 +950,17 @@ static int mca_pcm_new(struct snd_soc_component *component,
                chan = mca_request_dma_channel(cl, i);
 
                if (IS_ERR_OR_NULL(chan)) {
+                       mca_pcm_free(component, rtd->pcm);
+
+                       if (chan && PTR_ERR(chan) == -EPROBE_DEFER)
+                               return PTR_ERR(chan);
+
                        dev_err(component->dev, "unable to obtain DMA channel (stream %d cluster %d): %pe\n",
                                i, cl->no, chan);
-                       mca_pcm_free(component, rtd->pcm);
-                       return -EINVAL;
+
+                       if (!chan)
+                               return -EINVAL;
+                       return PTR_ERR(chan);
                }
 
                cl->dma_chans[i] = chan;
index cf4084dcbd5eefd2a4a00079ca6c8c7d40882618..1aed3baa9369708336378c760f6a2a3e6bec5373 100644 (file)
@@ -114,6 +114,7 @@ struct mchp_pdmc {
        struct clk *gclk;
        u32 pdmcen;
        u32 suspend_irq;
+       u32 startup_delay_us;
        int mic_no;
        int sinc_order;
        bool audio_filter_en;
@@ -425,6 +426,7 @@ static const struct snd_soc_component_driver mchp_pdmc_dai_component = {
        .open = &mchp_pdmc_open,
        .close = &mchp_pdmc_close,
        .legacy_dai_naming = 1,
+       .start_dma_last = 1,
 };
 
 static const unsigned int mchp_pdmc_1mic[] = {1};
@@ -632,6 +634,29 @@ static int mchp_pdmc_hw_params(struct snd_pcm_substream *substream,
        return 0;
 }
 
+static void mchp_pdmc_noise_filter_workaround(struct mchp_pdmc *dd)
+{
+       u32 tmp, steps = 16;
+
+       /*
+        * PDMC doesn't wait for microphones' startup time thus the acquisition
+        * may start before the microphones are ready leading to poc noises at
+        * the beginning of capture. To avoid this, we need to wait 50ms (in
+        * normal startup procedure) or 150 ms (worst case after resume from sleep
+        * states) after microphones are enabled and then clear the FIFOs (by
+        * reading the RHR 16 times) and possible interrupts before continuing.
+        * Also, for this to work the DMA needs to be started after interrupts
+        * are enabled.
+        */
+       usleep_range(dd->startup_delay_us, dd->startup_delay_us + 5);
+
+       while (steps--)
+               regmap_read(dd->regmap, MCHP_PDMC_RHR, &tmp);
+
+       /* Clear interrupts. */
+       regmap_read(dd->regmap, MCHP_PDMC_ISR, &tmp);
+}
+
 static int mchp_pdmc_trigger(struct snd_pcm_substream *substream,
                             int cmd, struct snd_soc_dai *dai)
 {
@@ -644,15 +669,17 @@ static int mchp_pdmc_trigger(struct snd_pcm_substream *substream,
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_START:
-               /* Enable overrun and underrun error interrupts */
-               regmap_write(dd->regmap, MCHP_PDMC_IER, dd->suspend_irq |
-                            MCHP_PDMC_IR_RXOVR | MCHP_PDMC_IR_RXUDR);
-               dd->suspend_irq = 0;
-               fallthrough;
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
                snd_soc_component_update_bits(cpu, MCHP_PDMC_MR,
                                              MCHP_PDMC_MR_PDMCEN_MASK,
                                              dd->pdmcen);
+
+               mchp_pdmc_noise_filter_workaround(dd);
+
+               /* Enable interrupts. */
+               regmap_write(dd->regmap, MCHP_PDMC_IER, dd->suspend_irq |
+                            MCHP_PDMC_IR_RXOVR | MCHP_PDMC_IR_RXUDR);
+               dd->suspend_irq = 0;
                break;
        case SNDRV_PCM_TRIGGER_SUSPEND:
                regmap_read(dd->regmap, MCHP_PDMC_IMR, &dd->suspend_irq);
@@ -796,6 +823,7 @@ static bool mchp_pdmc_readable_reg(struct device *dev, unsigned int reg)
        case MCHP_PDMC_CFGR:
        case MCHP_PDMC_IMR:
        case MCHP_PDMC_ISR:
+       case MCHP_PDMC_RHR:
        case MCHP_PDMC_VER:
                return true;
        default:
@@ -817,6 +845,17 @@ static bool mchp_pdmc_writeable_reg(struct device *dev, unsigned int reg)
        }
 }
 
+static bool mchp_pdmc_volatile_reg(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case MCHP_PDMC_ISR:
+       case MCHP_PDMC_RHR:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static bool mchp_pdmc_precious_reg(struct device *dev, unsigned int reg)
 {
        switch (reg) {
@@ -836,6 +875,7 @@ static const struct regmap_config mchp_pdmc_regmap_config = {
        .readable_reg   = mchp_pdmc_readable_reg,
        .writeable_reg  = mchp_pdmc_writeable_reg,
        .precious_reg   = mchp_pdmc_precious_reg,
+       .volatile_reg   = mchp_pdmc_volatile_reg,
        .cache_type     = REGCACHE_FLAT,
 };
 
@@ -918,6 +958,9 @@ static int mchp_pdmc_dt_init(struct mchp_pdmc *dd)
                dd->channel_mic_map[i].clk_edge = edge;
        }
 
+       dd->startup_delay_us = 150000;
+       of_property_read_u32(np, "microchip,startup-delay-us", &dd->startup_delay_us);
+
        return 0;
 }
 
index 1430642c8433ab11d35aa953bd08c879cff7a21a..785b9d01d8afeb9b34dad6fb7a320a82b5080461 100644 (file)
@@ -98,6 +98,9 @@ static struct snd_soc_dai_link at91sam9g20ek_dai = {
        .init = at91sam9g20ek_wm8731_init,
        .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
                   SND_SOC_DAIFMT_CBP_CFP,
+#ifndef ENABLE_MIC_INPUT
+       .playback_only = true,
+#endif
        SND_SOC_DAILINK_REG(pcm),
 };
 
index bd72c426a93d17d154862c848eebea24d37dde82..07747565c3b51c795e6472c560d44dcb333d4739 100644 (file)
@@ -2103,6 +2103,7 @@ config SND_SOC_WSA883X
 config SND_SOC_ZL38060
        tristate "Microsemi ZL38060 Connected Home Audio Processor"
        depends on SPI_MASTER
+       depends on GPIOLIB
        select REGMAP
        help
          Support for ZL38060 Connected Home Audio Processor from Microsemi,
index bbb09724988766f46b5fb2ae1d13669a5414e8d7..a663d37e57760ff19fe798f949c1e94aff20a281 100644 (file)
@@ -444,22 +444,6 @@ static const struct snd_soc_component_driver adau7118_component_driver = {
        .endianness             = 1,
 };
 
-static void adau7118_regulator_disable(void *data)
-{
-       struct adau7118_data *st = data;
-       int ret;
-       /*
-        * If we fail to disable DVDD, don't bother in trying IOVDD. We
-        * actually don't want to be left in the situation where DVDD
-        * is enabled and IOVDD is disabled.
-        */
-       ret = regulator_disable(st->dvdd);
-       if (ret)
-               return;
-
-       regulator_disable(st->iovdd);
-}
-
 static int adau7118_regulator_setup(struct adau7118_data *st)
 {
        st->iovdd = devm_regulator_get(st->dev, "iovdd");
@@ -481,8 +465,7 @@ static int adau7118_regulator_setup(struct adau7118_data *st)
                regcache_cache_only(st->map, true);
        }
 
-       return devm_add_action_or_reset(st->dev, adau7118_regulator_disable,
-                                       st);
+       return 0;
 }
 
 static int adau7118_parset_dt(const struct adau7118_data *st)
index c55b033d89da235884f4ff633caa90dcbc22312a..4a4f09f924bc510ff54fff1cf02a008bca82d982 100644 (file)
@@ -339,11 +339,39 @@ static void da7219_aad_hptest_work(struct work_struct *work)
                                    SND_JACK_HEADSET | SND_JACK_LINEOUT);
 }
 
+static void da7219_aad_jack_det_work(struct work_struct *work)
+{
+       struct da7219_aad_priv *da7219_aad =
+               container_of(work, struct da7219_aad_priv, jack_det_work);
+       struct snd_soc_component *component = da7219_aad->component;
+       u8 srm_st;
+
+       mutex_lock(&da7219_aad->jack_det_mutex);
+
+       srm_st = snd_soc_component_read(component, DA7219_PLL_SRM_STS) & DA7219_PLL_SRM_STS_MCLK;
+       msleep(da7219_aad->gnd_switch_delay * ((srm_st == 0x0) ? 2 : 1) - 4);
+       /* Enable ground switch */
+       snd_soc_component_update_bits(component, 0xFB, 0x01, 0x01);
+
+       mutex_unlock(&da7219_aad->jack_det_mutex);
+}
+
 
 /*
  * IRQ
  */
 
+static irqreturn_t da7219_aad_pre_irq_thread(int irq, void *data)
+{
+
+       struct da7219_aad_priv *da7219_aad = data;
+
+       if (!da7219_aad->jack_inserted)
+               schedule_work(&da7219_aad->jack_det_work);
+
+       return IRQ_WAKE_THREAD;
+}
+
 static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
 {
        struct da7219_aad_priv *da7219_aad = data;
@@ -351,14 +379,9 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
        struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
        struct da7219_priv *da7219 = snd_soc_component_get_drvdata(component);
        u8 events[DA7219_AAD_IRQ_REG_MAX];
-       u8 statusa, srm_st;
+       u8 statusa;
        int i, report = 0, mask = 0;
 
-       srm_st = snd_soc_component_read(component, DA7219_PLL_SRM_STS) & DA7219_PLL_SRM_STS_MCLK;
-       msleep(da7219_aad->gnd_switch_delay * ((srm_st == 0x0) ? 2 : 1) - 4);
-       /* Enable ground switch */
-       snd_soc_component_update_bits(component, 0xFB, 0x01, 0x01);
-
        /* Read current IRQ events */
        regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
                         events, DA7219_AAD_IRQ_REG_MAX);
@@ -377,6 +400,9 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
                events[DA7219_AAD_IRQ_REG_A], events[DA7219_AAD_IRQ_REG_B],
                statusa);
 
+       if (!da7219_aad->jack_inserted)
+               cancel_work_sync(&da7219_aad->jack_det_work);
+
        if (statusa & DA7219_JACK_INSERTION_STS_MASK) {
                /* Jack Insertion */
                if (events[DA7219_AAD_IRQ_REG_A] &
@@ -940,8 +966,9 @@ int da7219_aad_init(struct snd_soc_component *component)
 
        INIT_WORK(&da7219_aad->btn_det_work, da7219_aad_btn_det_work);
        INIT_WORK(&da7219_aad->hptest_work, da7219_aad_hptest_work);
+       INIT_WORK(&da7219_aad->jack_det_work, da7219_aad_jack_det_work);
 
-       ret = request_threaded_irq(da7219_aad->irq, NULL,
+       ret = request_threaded_irq(da7219_aad->irq, da7219_aad_pre_irq_thread,
                                   da7219_aad_irq_thread,
                                   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
                                   "da7219-aad", da7219_aad);
index 21fdf53095cc223ecb564a1c02330463405b4db7..be87ee47edde13953250c8cc4d7d22c474a425e2 100644 (file)
@@ -11,6 +11,7 @@
 #define __DA7219_AAD_H
 
 #include <linux/timer.h>
+#include <linux/mutex.h>
 #include <sound/soc.h>
 #include <sound/jack.h>
 #include <sound/da7219-aad.h>
@@ -196,6 +197,9 @@ struct da7219_aad_priv {
 
        struct work_struct btn_det_work;
        struct work_struct hptest_work;
+       struct work_struct jack_det_work;
+
+       struct mutex  jack_det_mutex;
 
        struct snd_soc_jack *jack;
        bool micbias_resume_enable;
index 93f35e8d26fce33620b8c3b6a88ef6e1a8f6c834..b54610b27906437b0ede084710fb6fc1d5007600 100644 (file)
@@ -560,6 +560,9 @@ static int mt6358_put_wov(struct snd_kcontrol *kcontrol,
        struct mt6358_priv *priv = snd_soc_component_get_drvdata(c);
        int enabled = ucontrol->value.integer.value[0];
 
+       if (enabled < 0 || enabled > 1)
+               return -EINVAL;
+
        if (priv->wov_enabled != enabled) {
                if (enabled)
                        mt6358_enable_wov_phase2(priv);
@@ -567,6 +570,8 @@ static int mt6358_put_wov(struct snd_kcontrol *kcontrol,
                        mt6358_disable_wov_phase2(priv);
 
                priv->wov_enabled = enabled;
+
+               return 1;
        }
 
        return 0;
@@ -632,9 +637,6 @@ static const char * const hp_in_mux_map[] = {
        "Audio Playback",
        "Test Mode",
        "HP Impedance",
-       "undefined1",
-       "undefined2",
-       "undefined3",
 };
 
 static int hp_in_mux_map_value[] = {
@@ -643,9 +645,6 @@ static int hp_in_mux_map_value[] = {
        HP_MUX_HP,
        HP_MUX_TEST_MODE,
        HP_MUX_HP_IMPEDANCE,
-       HP_MUX_OPEN,
-       HP_MUX_OPEN,
-       HP_MUX_OPEN,
 };
 
 static SOC_VALUE_ENUM_SINGLE_DECL(hpl_in_mux_map_enum,
index 727c01facf52db7aa7c94640996a29342fe4c1fc..fa4b0a60f8a91dac9cd4b309a277c69c65d2cf75 100644 (file)
@@ -569,7 +569,7 @@ static int sma1303_aif_in_event(struct snd_soc_dapm_widget *w,
                        ret += sma1303_regmap_update_bits(sma1303,
                                        SMA1303_11_SYSTEM_CTRL2,
                                        SMA1303_LR_DATA_SW_MASK,
-                                       SMA1303_LR_DATA_SW_NORMAL,
+                                       SMA1303_LR_DATA_SW_SWAP,
                                        &temp);
                        if (temp == true)
                                change = true;
index 71a11d747622a61659cecb62f90cb298ffe6f63a..4fe448295a902604900d31d7814583363ff6d2c8 100644 (file)
@@ -223,6 +223,20 @@ static const struct dmi_system_id sof_rt5682_quirk_table[] = {
                                        SOF_RT5682_SSP_AMP(2) |
                                        SOF_RT5682_NUM_HDMIDEV(4)),
        },
+       {
+               .callback = sof_rt5682_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Rex"),
+                       DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98360_ALC5682I_I2S"),
+               },
+               .driver_data = (void *)(SOF_RT5682_MCLK_EN |
+                                       SOF_RT5682_SSP_CODEC(2) |
+                                       SOF_SPEAKER_AMP_PRESENT |
+                                       SOF_MAX98360A_SPEAKER_AMP_PRESENT |
+                                       SOF_RT5682_SSP_AMP(0) |
+                                       SOF_RT5682_NUM_HDMIDEV(4)
+                                       ),
+       },
        {
                .callback = sof_rt5682_quirk_cb,
                .matches = {
@@ -1105,6 +1119,15 @@ static const struct platform_device_id board_ids[] = {
                                        SOF_RT5682_SSP_AMP(1) |
                                        SOF_RT5682_NUM_HDMIDEV(4)),
        },
+       {
+               .name = "mtl_mx98360_rt5682",
+               .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN |
+                                       SOF_RT5682_SSP_CODEC(0) |
+                                       SOF_SPEAKER_AMP_PRESENT |
+                                       SOF_MAX98360A_SPEAKER_AMP_PRESENT |
+                                       SOF_RT5682_SSP_AMP(1) |
+                                       SOF_RT5682_NUM_HDMIDEV(4)),
+       },
        {
                .name = "jsl_rt5682",
                .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN |
index b1a66a0f68181f9e12582c111334168445ebdede..7911c3af8071f26e8b3838daecf87b3ca4c141b7 100644 (file)
@@ -15,6 +15,11 @@ static const struct snd_soc_acpi_codecs mtl_max98357a_amp = {
        .codecs = {"MX98357A"}
 };
 
+static const struct snd_soc_acpi_codecs mtl_max98360a_amp = {
+       .num_codecs = 1,
+       .codecs = {"MX98360A"}
+};
+
 static const struct snd_soc_acpi_codecs mtl_rt5682_rt5682s_hp = {
        .num_codecs = 2,
        .codecs = {"10EC5682", "RTL5682"},
@@ -28,6 +33,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = {
                .quirk_data = &mtl_max98357a_amp,
                .sof_tplg_filename = "sof-mtl-max98357a-rt5682.tplg",
        },
+       {
+               .comp_ids = &mtl_rt5682_rt5682s_hp,
+               .drv_name = "mtl_mx98360_rt5682",
+               .machine_quirk = snd_soc_acpi_codec_list,
+               .quirk_data = &mtl_max98360a_amp,
+               .sof_tplg_filename = "sof-mtl-max98360a-rt5682.tplg",
+       },
        {},
 };
 EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_mtl_machines);
index 6a9ace4180d3475bc6308a5a4df129afe7945308..8645ab686970f601e645624e87615a07093c3246 100644 (file)
@@ -141,16 +141,13 @@ static int mt8183_i2s_hd_set(struct snd_kcontrol *kcontrol,
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
        struct mtk_afe_i2s_priv *i2s_priv;
        struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
-       int hd_en;
+       int hd_en, change;
 
        if (ucontrol->value.enumerated.item[0] >= e->items)
                return -EINVAL;
 
        hd_en = ucontrol->value.integer.value[0];
 
-       dev_info(afe->dev, "%s(), kcontrol name %s, hd_en %d\n",
-                __func__, kcontrol->id.name, hd_en);
-
        i2s_priv = get_i2s_priv_by_name(afe, kcontrol->id.name);
 
        if (!i2s_priv) {
@@ -158,9 +155,10 @@ static int mt8183_i2s_hd_set(struct snd_kcontrol *kcontrol,
                return -EINVAL;
        }
 
+       change = i2s_priv->low_jitter_en != hd_en;
        i2s_priv->low_jitter_en = hd_en;
 
-       return 0;
+       return change;
 }
 
 static const struct snd_kcontrol_new mtk_dai_i2s_controls[] = {
@@ -276,9 +274,6 @@ static int mtk_apll_event(struct snd_soc_dapm_widget *w,
        struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
 
-       dev_info(cmpnt->dev, "%s(), name %s, event 0x%x\n",
-                __func__, w->name, event);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                if (strcmp(w->name, APLL1_W_NAME) == 0)
@@ -307,9 +302,6 @@ static int mtk_mclk_en_event(struct snd_soc_dapm_widget *w,
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
        struct mtk_afe_i2s_priv *i2s_priv;
 
-       dev_info(cmpnt->dev, "%s(), name %s, event 0x%x\n",
-                __func__, w->name, event);
-
        i2s_priv = get_i2s_priv_by_name(afe, w->name);
 
        if (!i2s_priv) {
@@ -715,11 +707,6 @@ static int mtk_dai_i2s_config(struct mtk_base_afe *afe,
        unsigned int i2s_con = 0, fmt_con = I2S_FMT_I2S << I2S_FMT_SFT;
        int ret = 0;
 
-       dev_info(afe->dev, "%s(), id %d, rate %d, format %d\n",
-                __func__,
-                i2s_id,
-                rate, format);
-
        if (i2s_priv) {
                i2s_priv->rate = rate;
 
@@ -810,8 +797,6 @@ static int mtk_dai_i2s_set_sysclk(struct snd_soc_dai *dai,
                return -EINVAL;
        }
 
-       dev_info(afe->dev, "%s(), freq %d\n", __func__, freq);
-
        apll = mt8183_get_apll_by_rate(afe, freq);
        apll_rate = mt8183_get_apll_rate(afe, apll);
 
index 071841903c62000f5dae86ba466849040d79a3bc..7a37752d42444b2890fbbfdf45937b93ea2c69f3 100644 (file)
@@ -679,7 +679,6 @@ static int mt8188_etdm_clk_src_sel_put(struct snd_kcontrol *kcontrol,
        unsigned int old_val;
        unsigned int mask;
        unsigned int reg;
-       unsigned int shift;
 
        if (source >= e->items)
                return -EINVAL;
@@ -687,27 +686,22 @@ static int mt8188_etdm_clk_src_sel_put(struct snd_kcontrol *kcontrol,
        if (!strcmp(kcontrol->id.name, "ETDM_OUT1_Clock_Source")) {
                reg = ETDM_OUT1_CON4;
                mask = ETDM_OUT_CON4_CLOCK_MASK;
-               shift = ETDM_OUT_CON4_CLOCK_SHIFT;
                val = FIELD_PREP(ETDM_OUT_CON4_CLOCK_MASK, source);
        } else if (!strcmp(kcontrol->id.name, "ETDM_OUT2_Clock_Source")) {
                reg = ETDM_OUT2_CON4;
                mask = ETDM_OUT_CON4_CLOCK_MASK;
-               shift = ETDM_OUT_CON4_CLOCK_SHIFT;
                val = FIELD_PREP(ETDM_OUT_CON4_CLOCK_MASK, source);
        } else if (!strcmp(kcontrol->id.name, "ETDM_OUT3_Clock_Source")) {
                reg = ETDM_OUT3_CON4;
                mask = ETDM_OUT_CON4_CLOCK_MASK;
-               shift = ETDM_OUT_CON4_CLOCK_SHIFT;
                val = FIELD_PREP(ETDM_OUT_CON4_CLOCK_MASK, source);
        } else if (!strcmp(kcontrol->id.name, "ETDM_IN1_Clock_Source")) {
                reg = ETDM_IN1_CON2;
                mask = ETDM_IN_CON2_CLOCK_MASK;
-               shift = ETDM_IN_CON2_CLOCK_SHIFT;
                val = FIELD_PREP(ETDM_IN_CON2_CLOCK_MASK, source);
        } else if (!strcmp(kcontrol->id.name, "ETDM_IN2_Clock_Source")) {
                reg = ETDM_IN2_CON2;
                mask = ETDM_IN_CON2_CLOCK_MASK;
-               shift = ETDM_IN_CON2_CLOCK_SHIFT;
                val = FIELD_PREP(ETDM_IN_CON2_CLOCK_MASK, source);
        } else {
                return -EINVAL;
@@ -715,8 +709,6 @@ static int mt8188_etdm_clk_src_sel_put(struct snd_kcontrol *kcontrol,
 
        regmap_read(afe->regmap, reg, &old_val);
        old_val &= mask;
-       old_val >>= shift;
-
        if (old_val == val)
                return 0;
 
@@ -2506,6 +2498,9 @@ static void mt8188_dai_etdm_parse_of(struct mtk_base_afe *afe)
 
        /* etdm in only */
        for (i = 0; i < 2; i++) {
+               dai_id = ETDM_TO_DAI_ID(i);
+               etdm_data = afe_priv->dai_priv[dai_id];
+
                snprintf(prop, sizeof(prop), "mediatek,%s-chn-disabled",
                         of_afe_etdms[i].name);
 
index f8c73e8624df20b0a58e03b699dd470882f4d81b..4919535e2759d97253868bdacf20c30e2ba271df 100644 (file)
@@ -303,9 +303,6 @@ static int mtk_adda_ul_event(struct snd_soc_dapm_widget *w,
        struct mt8192_afe_private *afe_priv = afe->platform_priv;
        int mtkaif_dmic = afe_priv->mtkaif_dmic;
 
-       dev_info(afe->dev, "%s(), name %s, event 0x%x, mtkaif_dmic %d\n",
-                __func__, w->name, event, mtkaif_dmic);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                mt8192_afe_gpio_request(afe->dev, true, MT8192_DAI_ADDA, 1);
@@ -345,10 +342,6 @@ static int mtk_adda_ch34_ul_event(struct snd_soc_dapm_widget *w,
        int mtkaif_dmic = afe_priv->mtkaif_dmic_ch34;
        int mtkaif_adda6_only = afe_priv->mtkaif_adda6_only;
 
-       dev_info(afe->dev,
-                "%s(), name %s, event 0x%x, mtkaif_dmic %d, mtkaif_adda6_only %d\n",
-                __func__, w->name, event, mtkaif_dmic, mtkaif_adda6_only);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                mt8192_afe_gpio_request(afe->dev, true, MT8192_DAI_ADDA_CH34,
@@ -538,9 +531,6 @@ static int mtk_adda_dl_event(struct snd_soc_dapm_widget *w,
        struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
 
-       dev_info(afe->dev, "%s(), name %s, event 0x%x\n",
-                __func__, w->name, event);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                mt8192_afe_gpio_request(afe->dev, true, MT8192_DAI_ADDA, 0);
@@ -564,9 +554,6 @@ static int mtk_adda_ch34_dl_event(struct snd_soc_dapm_widget *w,
        struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
 
-       dev_info(afe->dev, "%s(), name %s, event 0x%x\n",
-                __func__, w->name, event);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                mt8192_afe_gpio_request(afe->dev, true, MT8192_DAI_ADDA_CH34,
@@ -604,19 +591,21 @@ static int stf_positive_gain_set(struct snd_kcontrol *kcontrol,
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
        struct mt8192_afe_private *afe_priv = afe->platform_priv;
        int gain_db = ucontrol->value.integer.value[0];
+       bool change = false;
 
        afe_priv->stf_positive_gain_db = gain_db;
 
        if (gain_db >= 0 && gain_db <= 24) {
-               regmap_update_bits(afe->regmap,
-                                  AFE_SIDETONE_GAIN,
-                                  POSITIVE_GAIN_MASK_SFT,
-                                  (gain_db / 6) << POSITIVE_GAIN_SFT);
+               regmap_update_bits_check(afe->regmap,
+                                        AFE_SIDETONE_GAIN,
+                                        POSITIVE_GAIN_MASK_SFT,
+                                        (gain_db / 6) << POSITIVE_GAIN_SFT,
+                                        &change);
        } else {
-               dev_warn(afe->dev, "%s(), gain_db %d invalid\n",
-                        __func__, gain_db);
+               return -EINVAL;
        }
-       return 0;
+
+       return change;
 }
 
 static int mt8192_adda_dmic_get(struct snd_kcontrol *kcontrol,
@@ -637,15 +626,17 @@ static int mt8192_adda_dmic_set(struct snd_kcontrol *kcontrol,
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
        struct mt8192_afe_private *afe_priv = afe->platform_priv;
        int dmic_on;
+       bool change;
 
        dmic_on = ucontrol->value.integer.value[0];
 
-       dev_info(afe->dev, "%s(), kcontrol name %s, dmic_on %d\n",
-                __func__, kcontrol->id.name, dmic_on);
+       change = (afe_priv->mtkaif_dmic != dmic_on) ||
+               (afe_priv->mtkaif_dmic_ch34 != dmic_on);
 
        afe_priv->mtkaif_dmic = dmic_on;
        afe_priv->mtkaif_dmic_ch34 = dmic_on;
-       return 0;
+
+       return change;
 }
 
 static int mt8192_adda6_only_get(struct snd_kcontrol *kcontrol,
@@ -666,20 +657,20 @@ static int mt8192_adda6_only_set(struct snd_kcontrol *kcontrol,
        struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
        struct mt8192_afe_private *afe_priv = afe->platform_priv;
        int mtkaif_adda6_only;
+       bool change;
 
        mtkaif_adda6_only = ucontrol->value.integer.value[0];
 
-       dev_info(afe->dev, "%s(), kcontrol name %s, mtkaif_adda6_only %d\n",
-                __func__, kcontrol->id.name, mtkaif_adda6_only);
-
+       change = afe_priv->mtkaif_adda6_only != mtkaif_adda6_only;
        afe_priv->mtkaif_adda6_only = mtkaif_adda6_only;
-       return 0;
+
+       return change;
 }
 
 static const struct snd_kcontrol_new mtk_adda_controls[] = {
        SOC_SINGLE("Sidetone_Gain", AFE_SIDETONE_GAIN,
                   SIDE_TONE_GAIN_SFT, SIDE_TONE_GAIN_MASK, 0),
-       SOC_SINGLE_EXT("Sidetone_Positive_Gain_dB", SND_SOC_NOPM, 0, 100, 0,
+       SOC_SINGLE_EXT("Sidetone_Positive_Gain_dB", SND_SOC_NOPM, 0, 24, 0,
                       stf_positive_gain_get, stf_positive_gain_set),
        SOC_SINGLE("ADDA_DL_GAIN", AFE_ADDA_DL_SRC2_CON1,
                   DL_2_GAIN_CTL_PRE_SFT, DL_2_GAIN_CTL_PRE_MASK, 0),
@@ -750,9 +741,6 @@ static int mtk_stf_event(struct snd_soc_dapm_widget *w,
 
        regmap_read(afe->regmap, AFE_SIDETONE_CON1, &reg_value);
 
-       dev_info(afe->dev, "%s(), name %s, event 0x%x, ul_rate 0x%x, AFE_SIDETONE_CON1 0x%x\n",
-                __func__, w->name, event, ul_rate, reg_value);
-
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                /* set side tone gain = 0 */
@@ -1163,12 +1151,6 @@ static int mtk_dai_adda_hw_params(struct snd_pcm_substream *substream,
        unsigned int rate = params_rate(params);
        int id = dai->id;
 
-       dev_info(afe->dev, "%s(), id %d, stream %d, rate %d\n",
-                __func__,
-                id,
-                substream->stream,
-                rate);
-
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
                unsigned int dl_src2_con0 = 0;
                unsigned int dl_src2_con1 = 0;
@@ -1441,8 +1423,6 @@ int mt8192_dai_adda_register(struct mtk_base_afe *afe)
        struct mtk_base_afe_dai *dai;
        struct mt8192_afe_private *afe_priv = afe->platform_priv;
 
-       dev_info(afe->dev, "%s()\n", __func__);
-
        dai = devm_kzalloc(afe->dev, sizeof(*dai), GFP_KERNEL);
        if (!dai)
                return -ENOMEM;
index c2e268054773df30f5a78e29a93d5a7c186579c9..f2c9a1fdbe0d04c2b11f7f9f4ebcc3ba3680fb9c 100644 (file)
@@ -2567,6 +2567,9 @@ static void mt8195_dai_etdm_parse_of(struct mtk_base_afe *afe)
 
        /* etdm in only */
        for (i = 0; i < 2; i++) {
+               dai_id = ETDM_TO_DAI_ID(i);
+               etdm_data = afe_priv->dai_priv[dai_id];
+
                ret = snprintf(prop, sizeof(prop),
                               "mediatek,%s-chn-disabled",
                               of_afe_etdms[i].name);
index 7bc4421835d7261fbbffa72b6aecc3712bfb6bd4..0b8926600d900a688cae2f4942cf82e3ff8ad46d 100644 (file)
@@ -39,10 +39,10 @@ struct rsnd_adg {
        int clkin_size;
        int clkout_size;
        u32 ckr;
-       u32 rbga;
-       u32 rbgb;
+       u32 brga;
+       u32 brgb;
 
-       int rbg_rate[ADG_HZ_SIZE]; /* RBGA / RBGB */
+       int brg_rate[ADG_HZ_SIZE]; /* BRGA / BRGB */
 };
 
 #define for_each_rsnd_clkin(pos, adg, i)       \
@@ -75,7 +75,7 @@ static const char * const clkout_name_gen2[] = {
        [CLKOUT3] = "audio_clkout3",
 };
 
-static u32 rsnd_adg_calculate_rbgx(unsigned long div)
+static u32 rsnd_adg_calculate_brgx(unsigned long div)
 {
        int i;
 
@@ -131,8 +131,8 @@ static void __rsnd_adg_get_timesel_ratio(struct rsnd_priv *priv,
                adg->clkin_rate[CLKA],  /* 0000: CLKA */
                adg->clkin_rate[CLKB],  /* 0001: CLKB */
                adg->clkin_rate[CLKC],  /* 0010: CLKC */
-               adg->rbg_rate[ADG_HZ_441],      /* 0011: RBGA */
-               adg->rbg_rate[ADG_HZ_48],       /* 0100: RBGB */
+               adg->brg_rate[ADG_HZ_441],      /* 0011: BRGA */
+               adg->brg_rate[ADG_HZ_48],       /* 0100: BRGB */
        };
 
        min = ~0;
@@ -323,10 +323,10 @@ int rsnd_adg_clk_query(struct rsnd_priv *priv, unsigned int rate)
        /*
         * find divided clock from BRGA/BRGB
         */
-       if (rate == adg->rbg_rate[ADG_HZ_441])
+       if (rate == adg->brg_rate[ADG_HZ_441])
                return 0x10;
 
-       if (rate == adg->rbg_rate[ADG_HZ_48])
+       if (rate == adg->brg_rate[ADG_HZ_48])
                return 0x20;
 
        return -EIO;
@@ -358,13 +358,13 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
                ckr = 0x80000000; /* BRGB output = 48kHz */
 
        rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
-       rsnd_mod_write(adg_mod, BRRA,  adg->rbga);
-       rsnd_mod_write(adg_mod, BRRB,  adg->rbgb);
+       rsnd_mod_write(adg_mod, BRRA,  adg->brga);
+       rsnd_mod_write(adg_mod, BRRB,  adg->brgb);
 
        dev_dbg(dev, "CLKOUT is based on BRG%c (= %dHz)\n",
                (ckr) ? 'B' : 'A',
-               (ckr) ? adg->rbg_rate[ADG_HZ_48] :
-                       adg->rbg_rate[ADG_HZ_441]);
+               (ckr) ? adg->brg_rate[ADG_HZ_48] :
+                       adg->brg_rate[ADG_HZ_441]);
 
        return 0;
 }
@@ -484,7 +484,7 @@ static int rsnd_adg_get_clkout(struct rsnd_priv *priv)
        struct device *dev = rsnd_priv_to_dev(priv);
        struct device_node *np = dev->of_node;
        struct property *prop;
-       u32 ckr, rbgx, rbga, rbgb;
+       u32 ckr, brgx, brga, brgb;
        u32 rate, div;
        u32 req_rate[ADG_HZ_SIZE] = {};
        uint32_t count = 0;
@@ -501,8 +501,8 @@ static int rsnd_adg_get_clkout(struct rsnd_priv *priv)
        };
 
        ckr = 0;
-       rbga = 2; /* default 1/6 */
-       rbgb = 2; /* default 1/6 */
+       brga = 2; /* default 1/6 */
+       brgb = 2; /* default 1/6 */
 
        /*
         * ADG supports BRRA/BRRB output only
@@ -543,30 +543,30 @@ static int rsnd_adg_get_clkout(struct rsnd_priv *priv)
                if (0 == rate) /* not used */
                        continue;
 
-               /* RBGA */
-               if (!adg->rbg_rate[ADG_HZ_441] && (0 == rate % 44100)) {
+               /* BRGA */
+               if (!adg->brg_rate[ADG_HZ_441] && (0 == rate % 44100)) {
                        div = 6;
                        if (req_Hz[ADG_HZ_441])
                                div = rate / req_Hz[ADG_HZ_441];
-                       rbgx = rsnd_adg_calculate_rbgx(div);
-                       if (BRRx_MASK(rbgx) == rbgx) {
-                               rbga = rbgx;
-                               adg->rbg_rate[ADG_HZ_441] = rate / div;
+                       brgx = rsnd_adg_calculate_brgx(div);
+                       if (BRRx_MASK(brgx) == brgx) {
+                               brga = brgx;
+                               adg->brg_rate[ADG_HZ_441] = rate / div;
                                ckr |= brg_table[i] << 20;
                                if (req_Hz[ADG_HZ_441])
                                        parent_clk_name = __clk_get_name(clk);
                        }
                }
 
-               /* RBGB */
-               if (!adg->rbg_rate[ADG_HZ_48] && (0 == rate % 48000)) {
+               /* BRGB */
+               if (!adg->brg_rate[ADG_HZ_48] && (0 == rate % 48000)) {
                        div = 6;
                        if (req_Hz[ADG_HZ_48])
                                div = rate / req_Hz[ADG_HZ_48];
-                       rbgx = rsnd_adg_calculate_rbgx(div);
-                       if (BRRx_MASK(rbgx) == rbgx) {
-                               rbgb = rbgx;
-                               adg->rbg_rate[ADG_HZ_48] = rate / div;
+                       brgx = rsnd_adg_calculate_brgx(div);
+                       if (BRRx_MASK(brgx) == brgx) {
+                               brgb = brgx;
+                               adg->brg_rate[ADG_HZ_48] = rate / div;
                                ckr |= brg_table[i] << 16;
                                if (req_Hz[ADG_HZ_48])
                                        parent_clk_name = __clk_get_name(clk);
@@ -620,8 +620,8 @@ static int rsnd_adg_get_clkout(struct rsnd_priv *priv)
 
 rsnd_adg_get_clkout_end:
        adg->ckr = ckr;
-       adg->rbga = rbga;
-       adg->rbgb = rbgb;
+       adg->brga = brga;
+       adg->brgb = brgb;
 
        return 0;
 
@@ -663,9 +663,9 @@ void rsnd_adg_clk_dbg_info(struct rsnd_priv *priv, struct seq_file *m)
                        __clk_get_name(clk), clk, clk_get_rate(clk));
 
        dbg_msg(dev, m, "BRGCKR = 0x%08x, BRRA/BRRB = 0x%x/0x%x\n",
-               adg->ckr, adg->rbga, adg->rbgb);
-       dbg_msg(dev, m, "BRGA (for 44100 base) = %d\n", adg->rbg_rate[ADG_HZ_441]);
-       dbg_msg(dev, m, "BRGB (for 48000 base) = %d\n", adg->rbg_rate[ADG_HZ_48]);
+               adg->ckr, adg->brga, adg->brgb);
+       dbg_msg(dev, m, "BRGA (for 44100 base) = %d\n", adg->brg_rate[ADG_HZ_441]);
+       dbg_msg(dev, m, "BRGB (for 48000 base) = %d\n", adg->brg_rate[ADG_HZ_48]);
 
        /*
         * Actual CLKOUT will be exchanged in rsnd_adg_ssi_clk_try_start()
index 005b179a770a05e2b8251fa7b25ce298c040931a..5eb056b942ce8d6c1d95a9ee49c27be5d8bae8d0 100644 (file)
@@ -1088,22 +1088,39 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
-       int ret = -EINVAL, _ret = 0;
+       struct snd_soc_component *component;
+       int ret = -EINVAL, _ret = 0, start_dma_last = 0, i;
        int rollback = 0;
 
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_START:
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+               /* Do we need to start dma last? */
+               for_each_rtd_components(rtd, i, component) {
+                       if (component->driver->start_dma_last) {
+                               start_dma_last = 1;
+                               break;
+                       }
+               }
+
                ret = snd_soc_link_trigger(substream, cmd, 0);
                if (ret < 0)
                        goto start_err;
 
-               ret = snd_soc_pcm_component_trigger(substream, cmd, 0);
-               if (ret < 0)
-                       goto start_err;
+               if (start_dma_last) {
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, 0);
+                       if (ret < 0)
+                               goto start_err;
+
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, 0);
+               } else {
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, 0);
+                       if (ret < 0)
+                               goto start_err;
 
-               ret = snd_soc_pcm_dai_trigger(substream, cmd, 0);
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, 0);
+               }
 start_err:
                if (ret < 0)
                        rollback = 1;
diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..d4e32b3
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_LOONGARCH_BITSPERLONG_H
+#define __ASM_LOONGARCH_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_LOONGARCH_BITSPERLONG_H */
index 5a2baf28a1dcdaf546cd213e4d0eef389a43ad50..1343a62106de9ec3cca2f86f62b1bfe3d4cbb99b 100644 (file)
@@ -57,12 +57,14 @@ struct orc_entry {
        unsigned        sp_reg:4;
        unsigned        bp_reg:4;
        unsigned        type:2;
+       unsigned        signal:1;
        unsigned        end:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
        unsigned        bp_reg:4;
        unsigned        sp_reg:4;
-       unsigned        unused:5;
+       unsigned        unused:4;
        unsigned        end:1;
+       unsigned        signal:1;
        unsigned        type:2;
 #endif
 } __packed;
index 62c54ffbeeaacc74aa9e52697ab11d7deec92d82..9ac3df3fccf0118412f1139521e5c954f0d890d9 100644 (file)
@@ -15,6 +15,7 @@ struct unwind_hint {
        s16             sp_offset;
        u8              sp_reg;
        u8              type;
+       u8              signal;
        u8              end;
 };
 #endif
@@ -49,7 +50,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)              \
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end)      \
        "987: \n\t"                                             \
        ".pushsection .discard.unwind_hints\n\t"                \
        /* struct unwind_hint */                                \
@@ -57,6 +58,7 @@ struct unwind_hint {
        ".short " __stringify(sp_offset) "\n\t"                 \
        ".byte " __stringify(sp_reg) "\n\t"                     \
        ".byte " __stringify(type) "\n\t"                       \
+       ".byte " __stringify(signal) "\n\t"                     \
        ".byte " __stringify(end) "\n\t"                        \
        ".balign 4 \n\t"                                        \
        ".popsection\n\t"
@@ -129,7 +131,7 @@ struct unwind_hint {
  * the debuginfo as necessary.  It will also warn if it sees any
  * inconsistencies.
  */
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .Lunwind_hint_ip_\@:
        .pushsection .discard.unwind_hints
                /* struct unwind_hint */
@@ -137,6 +139,7 @@ struct unwind_hint {
                .short \sp_offset
                .byte \sp_reg
                .byte \type
+               .byte \signal
                .byte \end
                .balign 4
        .popsection
@@ -174,7 +177,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)      \
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \
        "\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
 #define STACK_FRAME_NON_STANDARD_FP(func)
@@ -182,7 +185,7 @@ struct unwind_hint {
 #define ASM_REACHABLE
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .endm
 .macro STACK_FRAME_NON_STANDARD func:req
 .endm
index 8c4e3e536c04285e155617cde126f2c4fce10e08..ed134fbdfd32d3351226300f0a725414f04ade2a 100644 (file)
@@ -33,6 +33,8 @@ enum netdev_xdp_act {
        NETDEV_XDP_ACT_HW_OFFLOAD = 16,
        NETDEV_XDP_ACT_RX_SG = 32,
        NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+       NETDEV_XDP_ACT_MASK = 127,
 };
 
 enum {
index a2cb8b16d6f10363b6a9cda15bf6cf744d8a1fb8..4b3797fe784bad06c42d21cfedcb5428ac2e61ae 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
-from .nlspec import SpecAttr, SpecAttrSet, SpecFamily, SpecOperation
+from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
+    SpecFamily, SpecOperation
 from .ynl import YnlFamily
 
-__all__ = ["SpecAttr", "SpecAttrSet", "SpecFamily", "SpecOperation",
-           "YnlFamily"]
+__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
+           "SpecFamily", "SpecOperation", "YnlFamily"]
index 0a2cfb5862aa74d914fe25aba86528bedd768813..960a356e8225e63360a820b6e9f7230e2986e3e0 100644 (file)
@@ -57,6 +57,92 @@ class SpecElement:
         pass
 
 
+class SpecEnumEntry(SpecElement):
+    """ Entry within an enum declared in the Netlink spec.
+
+    Attributes:
+        doc         documentation string
+        enum_set    back reference to the enum
+        value       numerical value of this enum (use accessors in most situations!)
+
+    Methods:
+        raw_value   raw value, i.e. the id in the enum, unlike user value which is a mask for flags
+        user_value   user value, same as raw value for enums, for flags it's the mask
+    """
+    def __init__(self, enum_set, yaml, prev, value_start):
+        if isinstance(yaml, str):
+            yaml = {'name': yaml}
+        super().__init__(enum_set.family, yaml)
+
+        self.doc = yaml.get('doc', '')
+        self.enum_set = enum_set
+
+        if 'value' in yaml:
+            self.value = yaml['value']
+        elif prev:
+            self.value = prev.value + 1
+        else:
+            self.value = value_start
+
+    def has_doc(self):
+        return bool(self.doc)
+
+    def raw_value(self):
+        return self.value
+
+    def user_value(self):
+        if self.enum_set['type'] == 'flags':
+            return 1 << self.value
+        else:
+            return self.value
+
+
+class SpecEnumSet(SpecElement):
+    """ Enum type
+
+    Represents an enumeration (list of numerical constants)
+    as declared in the "definitions" section of the spec.
+
+    Attributes:
+        type            enum or flags
+        entries         entries by name
+        entries_by_val  entries by value
+    Methods:
+        get_mask      for flags compute the mask of all defined values
+    """
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+
+        self.type = yaml['type']
+
+        prev_entry = None
+        value_start = self.yaml.get('value-start', 0)
+        self.entries = dict()
+        self.entries_by_val = dict()
+        for entry in self.yaml['entries']:
+            e = self.new_entry(entry, prev_entry, value_start)
+            self.entries[e.name] = e
+            self.entries_by_val[e.raw_value()] = e
+            prev_entry = e
+
+    def new_entry(self, entry, prev_entry, value_start):
+        return SpecEnumEntry(self, entry, prev_entry, value_start)
+
+    def has_doc(self):
+        if 'doc' in self.yaml:
+            return True
+        for entry in self.entries.values():
+            if entry.has_doc():
+                return True
+        return False
+
+    def get_mask(self):
+        mask = 0
+        for e in self.entries.values():
+            mask += e.user_value()
+        return mask
+
+
 class SpecAttr(SpecElement):
     """ Single Netlink atttribute type
 
@@ -193,6 +279,7 @@ class SpecFamily(SpecElement):
         msgs       dict of all messages (index by name)
         msgs_by_value  dict of all messages (indexed by name)
         ops        dict of all valid requests / responses
+        consts     dict of all constants/enums
     """
     def __init__(self, spec_path, schema_path=None):
         with open(spec_path, "r") as stream:
@@ -222,6 +309,7 @@ class SpecFamily(SpecElement):
         self.req_by_value = collections.OrderedDict()
         self.rsp_by_value = collections.OrderedDict()
         self.ops = collections.OrderedDict()
+        self.consts = collections.OrderedDict()
 
         last_exception = None
         while len(self._resolution_list) > 0:
@@ -242,6 +330,9 @@ class SpecFamily(SpecElement):
             if len(resolved) == 0:
                 raise last_exception
 
+    def new_enum(self, elem):
+        return SpecEnumSet(self, elem)
+
     def new_attr_set(self, elem):
         return SpecAttrSet(self, elem)
 
@@ -296,6 +387,12 @@ class SpecFamily(SpecElement):
     def resolve(self):
         self.resolve_up(super())
 
+        for elem in self.yaml['definitions']:
+            if elem['type'] == 'enum' or elem['type'] == 'flags':
+                self.consts[elem['name']] = self.new_enum(elem)
+            else:
+                self.consts[elem['name']] = elem
+
         for elem in self.yaml['attribute-sets']:
             attr_set = self.new_attr_set(elem)
             self.attr_sets[elem['name']] = attr_set
index a842adc8e87e3b35259db0fb4dae9d77927f3423..90764a83c6461118d6dd9aeff19d16224bf87f9a 100644 (file)
@@ -303,11 +303,6 @@ class YnlFamily(SpecFamily):
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1)
 
-        self._types = dict()
-
-        for elem in self.yaml.get('definitions', []):
-            self._types[elem['name']] = elem
-
         self.async_msg_ids = set()
         self.async_msg_queue = []
 
@@ -353,13 +348,13 @@ class YnlFamily(SpecFamily):
 
     def _decode_enum(self, rsp, attr_spec):
         raw = rsp[attr_spec['name']]
-        enum = self._types[attr_spec['enum']]
+        enum = self.consts[attr_spec['enum']]
         i = attr_spec.get('value-start', 0)
         if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']:
             value = set()
             while raw:
                 if raw & 1:
-                    value.add(enum['entries'][i])
+                    value.add(enum.entries_by_val[i].name)
                 raw >>= 1
                 i += 1
         else:
index c940ca834d3f3822acfaf0174c1de96ef16a6b03..d47376f19de72a19e7c3d54315d74afa9f7c668f 100755 (executable)
@@ -6,7 +6,7 @@ import collections
 import os
 import yaml
 
-from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation
+from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
 
 
 def c_upper(name):
@@ -567,97 +567,37 @@ class Struct:
         self.inherited = [c_lower(x) for x in sorted(self._inherited)]
 
 
-class EnumEntry:
+class EnumEntry(SpecEnumEntry):
     def __init__(self, enum_set, yaml, prev, value_start):
-        if isinstance(yaml, str):
-            self.name = yaml
-            yaml = {}
-            self.doc = ''
-        else:
-            self.name = yaml['name']
-            self.doc = yaml.get('doc', '')
-
-        self.yaml = yaml
-        self.enum_set = enum_set
-        self.c_name = c_upper(enum_set.value_pfx + self.name)
-
-        if 'value' in yaml:
-            self.value = yaml['value']
-            if prev:
-                self.value_change = (self.value != prev.value + 1)
-        elif prev:
-            self.value_change = False
-            self.value = prev.value + 1
+        super().__init__(enum_set, yaml, prev, value_start)
+
+        if prev:
+            self.value_change = (self.value != prev.value + 1)
         else:
-            self.value = value_start
             self.value_change = (self.value != 0)
-
         self.value_change = self.value_change or self.enum_set['type'] == 'flags'
 
-    def __getitem__(self, key):
-        return self.yaml[key]
-
-    def __contains__(self, key):
-        return key in self.yaml
-
-    def has_doc(self):
-        return bool(self.doc)
+        # Added by resolve:
+        self.c_name = None
+        delattr(self, "c_name")
 
-    # raw value, i.e. the id in the enum, unlike user value which is a mask for flags
-    def raw_value(self):
-        return self.value
+    def resolve(self):
+        self.resolve_up(super())
 
-    # user value, same as raw value for enums, for flags it's the mask
-    def user_value(self):
-        if self.enum_set['type'] == 'flags':
-            return 1 << self.value
-        else:
-            return self.value
+        self.c_name = c_upper(self.enum_set.value_pfx + self.name)
 
 
-class EnumSet:
+class EnumSet(SpecEnumSet):
     def __init__(self, family, yaml):
-        self.yaml = yaml
-        self.family = family
-
         self.render_name = c_lower(family.name + '-' + yaml['name'])
         self.enum_name = 'enum ' + self.render_name
 
         self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-")
 
-        self.type = yaml['type']
-
-        prev_entry = None
-        value_start = self.yaml.get('value-start', 0)
-        self.entries = {}
-        self.entry_list = []
-        for entry in self.yaml['entries']:
-            e = EnumEntry(self, entry, prev_entry, value_start)
-            self.entries[e.name] = e
-            self.entry_list.append(e)
-            prev_entry = e
-
-    def __getitem__(self, key):
-        return self.yaml[key]
-
-    def __contains__(self, key):
-        return key in self.yaml
-
-    def has_doc(self):
-        if 'doc' in self.yaml:
-            return True
-        for entry in self.entry_list:
-            if entry.has_doc():
-                return True
-        return False
+        super().__init__(family, yaml)
 
-    def get_mask(self):
-        mask = 0
-        idx = self.yaml.get('value-start', 0)
-        for _ in self.entry_list:
-            mask |= 1 << idx
-            idx += 1
-        return mask
+    def new_entry(self, entry, prev_entry, value_start):
+        return EnumEntry(self, entry, prev_entry, value_start)
 
 
 class AttrSet(SpecAttrSet):
@@ -792,8 +732,6 @@ class Family(SpecFamily):
 
         self.mcgrps = self.yaml.get('mcast-groups', {'list': []})
 
-        self.consts = dict()
-
         self.hooks = dict()
         for when in ['pre', 'post']:
             self.hooks[when] = dict()
@@ -820,6 +758,9 @@ class Family(SpecFamily):
         if self.kernel_policy == 'global':
             self._load_global_policy()
 
+    def new_enum(self, elem):
+        return EnumSet(self, elem)
+
     def new_attr_set(self, elem):
         return AttrSet(self, elem)
 
@@ -837,12 +778,6 @@ class Family(SpecFamily):
                 }
 
     def _dictify(self):
-        for elem in self.yaml['definitions']:
-            if elem['type'] == 'enum' or elem['type'] == 'flags':
-                self.consts[elem['name']] = EnumSet(self, elem)
-            else:
-                self.consts[elem['name']] = elem
-
         ntf = []
         for msg in self.msgs.values():
             if 'notify' in msg:
@@ -1980,7 +1915,7 @@ def render_uapi(family, cw):
                 if 'doc' in enum:
                     doc = ' - ' + enum['doc']
                 cw.write_doc_line(enum.enum_name + doc)
-                for entry in enum.entry_list:
+                for entry in enum.entries.values():
                     if entry.has_doc():
                         doc = '@' + entry.c_name + ': ' + entry['doc']
                         cw.write_doc_line(doc)
@@ -1988,7 +1923,7 @@ def render_uapi(family, cw):
 
             uapi_enum_start(family, cw, const, 'name')
             name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-")
-            for entry in enum.entry_list:
+            for entry in enum.entries.values():
                 suffix = ','
                 if entry.value_change:
                     suffix = f" = {entry.user_value()}" + suffix
@@ -1996,9 +1931,14 @@ def render_uapi(family, cw):
 
             if const.get('render-max', False):
                 cw.nl()
-                max_name = c_upper(name_pfx + 'max')
-                cw.p('__' + max_name + ',')
-                cw.p(max_name + ' = (__' + max_name + ' - 1)')
+                if const['type'] == 'flags':
+                    max_name = c_upper(name_pfx + 'mask')
+                    max_val = f' = {enum.get_mask()},'
+                    cw.p(max_name + max_val)
+                else:
+                    max_name = c_upper(name_pfx + 'max')
+                    cw.p('__' + max_name + ',')
+                    cw.p(max_name + ' = (__' + max_name + ' - 1)')
             cw.block_end(line=';')
             cw.nl()
         elif const['type'] == 'const':
index 14236db3677f6355eef18e2662bfd19d6b9c5ec0..4faa4dd72f350010a497b09020fb03338a99297d 100644 (file)
@@ -2,3 +2,4 @@
 arch/x86/lib/inat-tables.c
 /objtool
 fixdep
+libsubcmd/
index 33f2ee5a46d3bc981be557feb053cae2fcae954a..a3cdf8af6635a81d6e7c48037394ccefd351602c 100644 (file)
@@ -16,8 +16,6 @@ objtool-y += libctype.o
 objtool-y += str_error_r.o
 objtool-y += librbtree.o
 
-CFLAGS += -I$(srctree)/tools/lib
-
 $(OUTPUT)libstring.o: ../lib/string.c FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_o_c)
index 8a671902a18757b63f493b5611f5ef49addffb3a..8e53fc6735ef2668dcc51f571ec4b1eeb505b72f 100644 (file)
@@ -410,6 +410,14 @@ the objtool maintainers.
    can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
    directive right before the call.
 
+12. file.o: warning: func(): not an indirect call target
+
+   This means that objtool is running with --ibt and a function expected
+   to be an indirect call target is not. In particular, this happens for
+   init_module() or cleanup_module() if a module relies on these special
+   names and does not use module_init() / module_exit() macros to create
+   them.
+
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
index a3a9cc24e0e374937dfd96fc0f9b9b46fc16b3ee..83b100c1e7f6840c7c33caae8c804e5b5d9f2d46 100644 (file)
@@ -2,19 +2,18 @@
 include ../scripts/Makefile.include
 include ../scripts/Makefile.arch
 
-# always use the host compiler
-AR      = $(HOSTAR)
-CC      = $(HOSTCC)
-LD      = $(HOSTLD)
-
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
-SUBCMD_SRCDIR          = $(srctree)/tools/lib/subcmd/
-LIBSUBCMD_OUTPUT       = $(or $(OUTPUT),$(CURDIR)/)
-LIBSUBCMD              = $(LIBSUBCMD_OUTPUT)libsubcmd.a
+LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/
+ifneq ($(OUTPUT),)
+  LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
+else
+  LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd
+endif
+LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
 
 OBJTOOL    := $(OUTPUT)objtool
 OBJTOOL_IN := $(OBJTOOL)-in.o
@@ -28,16 +27,29 @@ INCLUDES := -I$(srctree)/tools/include \
            -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
            -I$(srctree)/tools/arch/$(SRCARCH)/include  \
            -I$(srctree)/tools/objtool/include \
-           -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
+           -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include \
+           -I$(LIBSUBCMD_OUTPUT)/include
+# Note, EXTRA_WARNINGS here was determined for CC and not HOSTCC, it
+# is passed here to match a legacy behavior.
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
-CFLAGS   := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
-LDFLAGS  += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
+OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
 
 # Allow old libelf to be used:
-elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
-CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - | grep elf_getshdr)
+OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
+
+# Always want host compilation.
+HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
 
 AWK = awk
+MKDIR = mkdir
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
 
 BUILD_ORC := n
 
@@ -49,21 +61,33 @@ export BUILD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
-$(OBJTOOL_IN): fixdep FORCE
-       @$(CONFIG_SHELL) ./sync-check.sh
-       @$(MAKE) $(build)=objtool
+$(OBJTOOL_IN): fixdep $(LIBSUBCMD) FORCE
+       $(Q)$(CONFIG_SHELL) ./sync-check.sh
+       $(Q)$(MAKE) $(build)=objtool $(HOST_OVERRIDES) CFLAGS="$(OBJTOOL_CFLAGS)" \
+               LDFLAGS="$(OBJTOOL_LDFLAGS)"
+
 
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-       $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+       $(QUIET_LINK)$(HOSTCC) $(OBJTOOL_IN) $(OBJTOOL_LDFLAGS) -o $@
+
+
+$(LIBSUBCMD_OUTPUT):
+       $(Q)$(MKDIR) -p $@
 
+$(LIBSUBCMD): fixdep $(LIBSUBCMD_OUTPUT) FORCE
+       $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \
+               DESTDIR=$(LIBSUBCMD_OUTPUT) prefix= subdir= \
+               $(HOST_OVERRIDES) EXTRA_CFLAGS="$(OBJTOOL_CFLAGS)" \
+               $@ install_headers
 
-$(LIBSUBCMD): fixdep FORCE
-       $(Q)$(MAKE) -C $(SUBCMD_SRCDIR) OUTPUT=$(LIBSUBCMD_OUTPUT)
+$(LIBSUBCMD)-clean:
+       $(call QUIET_CLEAN, libsubcmd)
+       $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT)
 
-clean:
+clean: $(LIBSUBCMD)-clean
        $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
        $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-       $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(LIBSUBCMD)
+       $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
 
 FORCE:
 
index 9c653805a08a97cdbb84d0dd88c2a6b26069c7f4..53b55690f32041a4c83fcab976fab04ac21474e0 100644 (file)
@@ -41,38 +41,36 @@ const char *arch_ret_insn(int len)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
                            unsigned long offset, unsigned int maxlen,
-                           unsigned int *len, enum insn_type *type,
-                           unsigned long *immediate,
-                           struct list_head *ops_list)
+                           struct instruction *insn)
 {
        unsigned int opcode;
        enum insn_type typ;
        unsigned long imm;
-       u32 insn;
+       u32 ins;
 
-       insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
-       opcode = insn >> 26;
+       ins = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
+       opcode = ins >> 26;
        typ = INSN_OTHER;
        imm = 0;
 
        switch (opcode) {
        case 18: /* b[l][a] */
-               if ((insn & 3) == 1) /* bl */
+               if ((ins & 3) == 1) /* bl */
                        typ = INSN_CALL;
 
-               imm = insn & 0x3fffffc;
+               imm = ins & 0x3fffffc;
                if (imm & 0x2000000)
                        imm -= 0x4000000;
                break;
        }
 
        if (opcode == 1)
-               *len = 8;
+               insn->len = 8;
        else
-               *len = 4;
+               insn->len = 4;
 
-       *type = typ;
-       *immediate = imm;
+       insn->type = typ;
+       insn->immediate = imm;
 
        return 0;
 }
index e7b030f7e2a5bddf02aae6d41cc2080b0857c335..9ef024fd648c1b39e039fc2ffb62a2aa8c344756 100644 (file)
@@ -105,7 +105,7 @@ bool arch_pc_relative_reloc(struct reloc *reloc)
 #define ADD_OP(op) \
        if (!(op = calloc(1, sizeof(*op)))) \
                return -1; \
-       else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+       else for (*ops_list = op, ops_list = &op->next; op; op = NULL)
 
 /*
  * Helpers to decode ModRM/SIB:
@@ -146,12 +146,11 @@ static bool has_notrack_prefix(struct insn *insn)
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
                            unsigned long offset, unsigned int maxlen,
-                           unsigned int *len, enum insn_type *type,
-                           unsigned long *immediate,
-                           struct list_head *ops_list)
+                           struct instruction *insn)
 {
+       struct stack_op **ops_list = &insn->stack_ops;
        const struct elf *elf = file->elf;
-       struct insn insn;
+       struct insn ins;
        int x86_64, ret;
        unsigned char op1, op2, op3, prefix,
                      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
@@ -165,42 +164,42 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
        if (x86_64 == -1)
                return -1;
 
-       ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+       ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen,
                          x86_64 ? INSN_MODE_64 : INSN_MODE_32);
        if (ret < 0) {
                WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
                return -1;
        }
 
-       *len = insn.length;
-       *type = INSN_OTHER;
+       insn->len = ins.length;
+       insn->type = INSN_OTHER;
 
-       if (insn.vex_prefix.nbytes)
+       if (ins.vex_prefix.nbytes)
                return 0;
 
-       prefix = insn.prefixes.bytes[0];
+       prefix = ins.prefixes.bytes[0];
 
-       op1 = insn.opcode.bytes[0];
-       op2 = insn.opcode.bytes[1];
-       op3 = insn.opcode.bytes[2];
+       op1 = ins.opcode.bytes[0];
+       op2 = ins.opcode.bytes[1];
+       op3 = ins.opcode.bytes[2];
 
-       if (insn.rex_prefix.nbytes) {
-               rex = insn.rex_prefix.bytes[0];
+       if (ins.rex_prefix.nbytes) {
+               rex = ins.rex_prefix.bytes[0];
                rex_w = X86_REX_W(rex) >> 3;
                rex_r = X86_REX_R(rex) >> 2;
                rex_x = X86_REX_X(rex) >> 1;
                rex_b = X86_REX_B(rex);
        }
 
-       if (insn.modrm.nbytes) {
-               modrm = insn.modrm.bytes[0];
+       if (ins.modrm.nbytes) {
+               modrm = ins.modrm.bytes[0];
                modrm_mod = X86_MODRM_MOD(modrm);
                modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
                modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
        }
 
-       if (insn.sib.nbytes) {
-               sib = insn.sib.bytes[0];
+       if (ins.sib.nbytes) {
+               sib = ins.sib.bytes[0];
                /* sib_scale = X86_SIB_SCALE(sib); */
                sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
                sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
@@ -254,7 +253,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                break;
 
        case 0x70 ... 0x7f:
-               *type = INSN_JUMP_CONDITIONAL;
+               insn->type = INSN_JUMP_CONDITIONAL;
                break;
 
        case 0x80 ... 0x83:
@@ -278,7 +277,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                if (!rm_is_reg(CFI_SP))
                        break;
 
-               imm = insn.immediate.value;
+               imm = ins.immediate.value;
                if (op1 & 2) { /* sign extend */
                        if (op1 & 1) { /* imm32 */
                                imm <<= 32;
@@ -309,7 +308,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                        ADD_OP(op) {
                                op->src.type = OP_SRC_AND;
                                op->src.reg = CFI_SP;
-                               op->src.offset = insn.immediate.value;
+                               op->src.offset = ins.immediate.value;
                                op->dest.type = OP_DEST_REG;
                                op->dest.reg = CFI_SP;
                        }
@@ -356,7 +355,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                                        op->src.reg = CFI_SP;
                                        op->dest.type = OP_DEST_REG_INDIRECT;
                                        op->dest.reg = modrm_rm;
-                                       op->dest.offset = insn.displacement.value;
+                                       op->dest.offset = ins.displacement.value;
                                }
                                break;
                        }
@@ -389,7 +388,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                                op->src.reg = modrm_reg;
                                op->dest.type = OP_DEST_REG_INDIRECT;
                                op->dest.reg = CFI_BP;
-                               op->dest.offset = insn.displacement.value;
+                               op->dest.offset = ins.displacement.value;
                        }
                        break;
                }
@@ -402,7 +401,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                                op->src.reg = modrm_reg;
                                op->dest.type = OP_DEST_REG_INDIRECT;
                                op->dest.reg = CFI_SP;
-                               op->dest.offset = insn.displacement.value;
+                               op->dest.offset = ins.displacement.value;
                        }
                        break;
                }
@@ -419,7 +418,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                        ADD_OP(op) {
                                op->src.type = OP_SRC_REG_INDIRECT;
                                op->src.reg = CFI_BP;
-                               op->src.offset = insn.displacement.value;
+                               op->src.offset = ins.displacement.value;
                                op->dest.type = OP_DEST_REG;
                                op->dest.reg = modrm_reg;
                        }
@@ -432,7 +431,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                        ADD_OP(op) {
                                op->src.type = OP_SRC_REG_INDIRECT;
                                op->src.reg = CFI_SP;
-                               op->src.offset = insn.displacement.value;
+                               op->src.offset = ins.displacement.value;
                                op->dest.type = OP_DEST_REG;
                                op->dest.reg = modrm_reg;
                        }
@@ -464,7 +463,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
                /* lea disp(%src), %dst */
                ADD_OP(op) {
-                       op->src.offset = insn.displacement.value;
+                       op->src.offset = ins.displacement.value;
                        if (!op->src.offset) {
                                /* lea (%src), %dst */
                                op->src.type = OP_SRC_REG;
@@ -487,7 +486,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                break;
 
        case 0x90:
-               *type = INSN_NOP;
+               insn->type = INSN_NOP;
                break;
 
        case 0x9c:
@@ -511,39 +510,39 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                if (op2 == 0x01) {
 
                        if (modrm == 0xca)
-                               *type = INSN_CLAC;
+                               insn->type = INSN_CLAC;
                        else if (modrm == 0xcb)
-                               *type = INSN_STAC;
+                               insn->type = INSN_STAC;
 
                } else if (op2 >= 0x80 && op2 <= 0x8f) {
 
-                       *type = INSN_JUMP_CONDITIONAL;
+                       insn->type = INSN_JUMP_CONDITIONAL;
 
                } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
                           op2 == 0x35) {
 
                        /* sysenter, sysret */
-                       *type = INSN_CONTEXT_SWITCH;
+                       insn->type = INSN_CONTEXT_SWITCH;
 
                } else if (op2 == 0x0b || op2 == 0xb9) {
 
                        /* ud2 */
-                       *type = INSN_BUG;
+                       insn->type = INSN_BUG;
 
                } else if (op2 == 0x0d || op2 == 0x1f) {
 
                        /* nopl/nopw */
-                       *type = INSN_NOP;
+                       insn->type = INSN_NOP;
 
                } else if (op2 == 0x1e) {
 
                        if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb))
-                               *type = INSN_ENDBR;
+                               insn->type = INSN_ENDBR;
 
 
                } else if (op2 == 0x38 && op3 == 0xf8) {
-                       if (insn.prefixes.nbytes == 1 &&
-                           insn.prefixes.bytes[0] == 0xf2) {
+                       if (ins.prefixes.nbytes == 1 &&
+                           ins.prefixes.bytes[0] == 0xf2) {
                                /* ENQCMD cannot be used in the kernel. */
                                WARN("ENQCMD instruction at %s:%lx", sec->name,
                                     offset);
@@ -591,29 +590,29 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
        case 0xcc:
                /* int3 */
-               *type = INSN_TRAP;
+               insn->type = INSN_TRAP;
                break;
 
        case 0xe3:
                /* jecxz/jrcxz */
-               *type = INSN_JUMP_CONDITIONAL;
+               insn->type = INSN_JUMP_CONDITIONAL;
                break;
 
        case 0xe9:
        case 0xeb:
-               *type = INSN_JUMP_UNCONDITIONAL;
+               insn->type = INSN_JUMP_UNCONDITIONAL;
                break;
 
        case 0xc2:
        case 0xc3:
-               *type = INSN_RETURN;
+               insn->type = INSN_RETURN;
                break;
 
        case 0xc7: /* mov imm, r/m */
                if (!opts.noinstr)
                        break;
 
-               if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
+               if (ins.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
                        struct reloc *immr, *disp;
                        struct symbol *func;
                        int idx;
@@ -661,17 +660,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
        case 0xca: /* retf */
        case 0xcb: /* retf */
-               *type = INSN_CONTEXT_SWITCH;
+               insn->type = INSN_CONTEXT_SWITCH;
                break;
 
        case 0xe0: /* loopne */
        case 0xe1: /* loope */
        case 0xe2: /* loop */
-               *type = INSN_JUMP_CONDITIONAL;
+               insn->type = INSN_JUMP_CONDITIONAL;
                break;
 
        case 0xe8:
-               *type = INSN_CALL;
+               insn->type = INSN_CALL;
                /*
                 * For the impact on the stack, a CALL behaves like
                 * a PUSH of an immediate value (the return address).
@@ -683,30 +682,30 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                break;
 
        case 0xfc:
-               *type = INSN_CLD;
+               insn->type = INSN_CLD;
                break;
 
        case 0xfd:
-               *type = INSN_STD;
+               insn->type = INSN_STD;
                break;
 
        case 0xff:
                if (modrm_reg == 2 || modrm_reg == 3) {
 
-                       *type = INSN_CALL_DYNAMIC;
-                       if (has_notrack_prefix(&insn))
+                       insn->type = INSN_CALL_DYNAMIC;
+                       if (has_notrack_prefix(&ins))
                                WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
                } else if (modrm_reg == 4) {
 
-                       *type = INSN_JUMP_DYNAMIC;
-                       if (has_notrack_prefix(&insn))
+                       insn->type = INSN_JUMP_DYNAMIC;
+                       if (has_notrack_prefix(&ins))
                                WARN("notrack prefix found at %s:0x%lx", sec->name, offset);
 
                } else if (modrm_reg == 5) {
 
                        /* jmpf */
-                       *type = INSN_CONTEXT_SWITCH;
+                       insn->type = INSN_CONTEXT_SWITCH;
 
                } else if (modrm_reg == 6) {
 
@@ -723,7 +722,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
                break;
        }
 
-       *immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+       insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0;
 
        return 0;
 }
index a4f39407bf59a76dc0d6c4301f215d8506b4aabd..7c175198d09fc7a510713c26649af960a867a130 100644 (file)
@@ -65,7 +65,7 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
        return found ? 0 : -1;
 }
 
-const struct option check_options[] = {
+static const struct option check_options[] = {
        OPT_GROUP("Actions:"),
        OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
        OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
index 35374812afdc409ca5d2b0b6df91ae8ad8b34dc7..f937be1afe65ca9c968038c701b927dd4126f409 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/static_call_types.h>
 
 struct alternative {
-       struct list_head list;
+       struct alternative *next;
        struct instruction *insn;
        bool skip_orig;
 };
@@ -47,27 +47,29 @@ struct instruction *find_insn(struct objtool_file *file,
        return NULL;
 }
 
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-                                             struct instruction *insn)
+struct instruction *next_insn_same_sec(struct objtool_file *file,
+                                      struct instruction *insn)
 {
-       struct instruction *next = list_next_entry(insn, list);
+       if (insn->idx == INSN_CHUNK_MAX)
+               return find_insn(file, insn->sec, insn->offset + insn->len);
 
-       if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+       insn++;
+       if (!insn->len)
                return NULL;
 
-       return next;
+       return insn;
 }
 
 static struct instruction *next_insn_same_func(struct objtool_file *file,
                                               struct instruction *insn)
 {
-       struct instruction *next = list_next_entry(insn, list);
+       struct instruction *next = next_insn_same_sec(file, insn);
        struct symbol *func = insn_func(insn);
 
        if (!func)
                return NULL;
 
-       if (&next->list != &file->insn_list && insn_func(next) == func)
+       if (next && insn_func(next) == func)
                return next;
 
        /* Check if we're already in the subfunction: */
@@ -78,17 +80,35 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
        return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sec(struct objtool_file *file,
+                                             struct instruction *insn)
+{
+       if (insn->idx == 0) {
+               if (insn->prev_len)
+                       return find_insn(file, insn->sec, insn->offset - insn->prev_len);
+               return NULL;
+       }
+
+       return insn - 1;
+}
+
 static struct instruction *prev_insn_same_sym(struct objtool_file *file,
-                                              struct instruction *insn)
+                                             struct instruction *insn)
 {
-       struct instruction *prev = list_prev_entry(insn, list);
+       struct instruction *prev = prev_insn_same_sec(file, insn);
 
-       if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
+       if (prev && insn_func(prev) == insn_func(insn))
                return prev;
 
        return NULL;
 }
 
+#define for_each_insn(file, insn)                                      \
+       for (struct section *__sec, *__fake = (struct section *)1;      \
+            __fake; __fake = NULL)                                     \
+               for_each_sec(file, __sec)                               \
+                       sec_for_each_insn(file, __sec, insn)
+
 #define func_for_each_insn(file, func, insn)                           \
        for (insn = find_insn(file, func->sec, func->offset);           \
             insn;                                                      \
@@ -96,16 +116,13 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 
 #define sym_for_each_insn(file, sym, insn)                             \
        for (insn = find_insn(file, sym->sec, sym->offset);             \
-            insn && &insn->list != &file->insn_list &&                 \
-               insn->sec == sym->sec &&                                \
-               insn->offset < sym->offset + sym->len;                  \
-            insn = list_next_entry(insn, list))
+            insn && insn->offset < sym->offset + sym->len;             \
+            insn = next_insn_same_sec(file, insn))
 
 #define sym_for_each_insn_continue_reverse(file, sym, insn)            \
-       for (insn = list_prev_entry(insn, list);                        \
-            &insn->list != &file->insn_list &&                         \
-               insn->sec == sym->sec && insn->offset >= sym->offset;   \
-            insn = list_prev_entry(insn, list))
+       for (insn = prev_insn_same_sec(file, insn);                     \
+            insn && insn->offset >= sym->offset;                       \
+            insn = prev_insn_same_sec(file, insn))
 
 #define sec_for_each_insn_from(file, insn)                             \
        for (; insn; insn = next_insn_same_sec(file, insn))
@@ -114,16 +131,34 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
        for (insn = next_insn_same_sec(file, insn); insn;               \
             insn = next_insn_same_sec(file, insn))
 
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+       if (insn->type == INSN_JUMP_DYNAMIC ||
+           insn->type == INSN_CALL_DYNAMIC)
+               return NULL;
+
+       return insn->_call_dest;
+}
+
+static inline struct reloc *insn_jump_table(struct instruction *insn)
+{
+       if (insn->type == INSN_JUMP_DYNAMIC ||
+           insn->type == INSN_CALL_DYNAMIC)
+               return insn->_jump_table;
+
+       return NULL;
+}
+
 static bool is_jump_table_jump(struct instruction *insn)
 {
        struct alt_group *alt_group = insn->alt_group;
 
-       if (insn->jump_table)
+       if (insn_jump_table(insn))
                return true;
 
        /* Retpoline alternative for a jump table? */
        return alt_group && alt_group->orig_group &&
-              alt_group->orig_group->first_insn->jump_table;
+              insn_jump_table(alt_group->orig_group->first_insn);
 }
 
 static bool is_sibling_call(struct instruction *insn)
@@ -137,8 +172,8 @@ static bool is_sibling_call(struct instruction *insn)
                        return !is_jump_table_jump(insn);
        }
 
-       /* add_jump_destinations() sets insn->call_dest for sibling calls. */
-       return (is_static_jump(insn) && insn->call_dest);
+       /* add_jump_destinations() sets insn_call_dest(insn) for sibling calls. */
+       return (is_static_jump(insn) && insn_call_dest(insn));
 }
 
 /*
@@ -274,8 +309,8 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state,
 
        /*
         * We need the full vmlinux for noinstr validation, otherwise we can
-        * not correctly determine insn->call_dest->sec (external symbols do
-        * not have a section).
+        * not correctly determine insn_call_dest(insn)->sec (external symbols
+        * do not have a section).
         */
        if (opts.link && opts.noinstr && sec)
                state->noinstr = sec->noinstr;
@@ -366,6 +401,9 @@ static int decode_instructions(struct objtool_file *file)
        int ret;
 
        for_each_sec(file, sec) {
+               struct instruction *insns = NULL;
+               u8 prev_len = 0;
+               u8 idx = 0;
 
                if (!(sec->sh.sh_flags & SHF_EXECINSTR))
                        continue;
@@ -391,26 +429,31 @@ static int decode_instructions(struct objtool_file *file)
                        sec->init = true;
 
                for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
-                       insn = malloc(sizeof(*insn));
-                       if (!insn) {
-                               WARN("malloc failed");
-                               return -1;
+                       if (!insns || idx == INSN_CHUNK_MAX) {
+                               insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+                               if (!insns) {
+                                       WARN("malloc failed");
+                                       return -1;
+                               }
+                               idx = 0;
+                       } else {
+                               idx++;
                        }
-                       memset(insn, 0, sizeof(*insn));
-                       INIT_LIST_HEAD(&insn->alts);
-                       INIT_LIST_HEAD(&insn->stack_ops);
-                       INIT_LIST_HEAD(&insn->call_node);
+                       insn = &insns[idx];
+                       insn->idx = idx;
 
+                       INIT_LIST_HEAD(&insn->call_node);
                        insn->sec = sec;
                        insn->offset = offset;
+                       insn->prev_len = prev_len;
 
                        ret = arch_decode_instruction(file, sec, offset,
                                                      sec->sh.sh_size - offset,
-                                                     &insn->len, &insn->type,
-                                                     &insn->immediate,
-                                                     &insn->stack_ops);
+                                                     insn);
                        if (ret)
-                               goto err;
+                               return ret;
+
+                       prev_len = insn->len;
 
                        /*
                         * By default, "ud2" is a dead end unless otherwise
@@ -421,10 +464,11 @@ static int decode_instructions(struct objtool_file *file)
                                insn->dead_end = true;
 
                        hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
-                       list_add_tail(&insn->list, &file->insn_list);
                        nr_insns++;
                }
 
+//             printf("%s: last chunk used: %d\n", sec->name, (int)idx);
+
                list_for_each_entry(func, &sec->symbol_list, list) {
                        if (func->type != STT_NOTYPE && func->type != STT_FUNC)
                                continue;
@@ -467,10 +511,6 @@ static int decode_instructions(struct objtool_file *file)
                printf("nr_insns: %lu\n", nr_insns);
 
        return 0;
-
-err:
-       free(insn);
-       return ret;
 }
 
 /*
@@ -585,7 +625,7 @@ static int add_dead_ends(struct objtool_file *file)
                }
                insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (insn)
-                       insn = list_prev_entry(insn, list);
+                       insn = prev_insn_same_sec(file, insn);
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
@@ -620,7 +660,7 @@ reachable:
                }
                insn = find_insn(file, reloc->sym->sec, reloc->addend);
                if (insn)
-                       insn = list_prev_entry(insn, list);
+                       insn = prev_insn_same_sec(file, insn);
                else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
                        insn = find_last_insn(file, reloc->sym->sec);
                        if (!insn) {
@@ -682,7 +722,7 @@ static int create_static_call_sections(struct objtool_file *file)
                        return -1;
 
                /* find key symbol */
-               key_name = strdup(insn->call_dest->name);
+               key_name = strdup(insn_call_dest(insn)->name);
                if (!key_name) {
                        perror("strdup");
                        return -1;
@@ -690,6 +730,7 @@ static int create_static_call_sections(struct objtool_file *file)
                if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
                            STATIC_CALL_TRAMP_PREFIX_LEN)) {
                        WARN("static_call: trampoline name malformed: %s", key_name);
+                       free(key_name);
                        return -1;
                }
                tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
@@ -699,6 +740,7 @@ static int create_static_call_sections(struct objtool_file *file)
                if (!key_sym) {
                        if (!opts.module) {
                                WARN("static_call: can't find static_call_key symbol: %s", tmp);
+                               free(key_name);
                                return -1;
                        }
 
@@ -711,7 +753,7 @@ static int create_static_call_sections(struct objtool_file *file)
                         * trampoline address.  This is fixed up in
                         * static_call_add_module().
                         */
-                       key_sym = insn->call_dest;
+                       key_sym = insn_call_dest(insn);
                }
                free(key_name);
 
@@ -856,8 +898,15 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
        list_for_each_entry(insn, &file->endbr_list, call_node) {
 
                int *site = (int *)sec->data->d_buf + idx;
+               struct symbol *sym = insn->sym;
                *site = 0;
 
+               if (opts.module && sym && sym->type == STT_FUNC &&
+                   insn->offset == sym->offset &&
+                   (!strcmp(sym->name, "init_module") ||
+                    !strcmp(sym->name, "cleanup_module")))
+                       WARN("%s(): not an indirect call target", sym->name);
+
                if (elf_add_reloc_to_insn(file->elf, sec,
                                          idx * sizeof(int),
                                          R_X86_64_PC32,
@@ -1302,43 +1351,42 @@ __weak bool arch_is_rethunk(struct symbol *sym)
        return false;
 }
 
-#define NEGATIVE_RELOC ((void *)-1L)
-
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
-       if (insn->reloc == NEGATIVE_RELOC)
+       struct reloc *reloc;
+
+       if (insn->no_reloc)
                return NULL;
 
-       if (!insn->reloc) {
-               if (!file)
-                       return NULL;
+       if (!file)
+               return NULL;
 
-               insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-                                                      insn->offset, insn->len);
-               if (!insn->reloc) {
-                       insn->reloc = NEGATIVE_RELOC;
-                       return NULL;
-               }
+       reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+                                        insn->offset, insn->len);
+       if (!reloc) {
+               insn->no_reloc = 1;
+               return NULL;
        }
 
-       return insn->reloc;
+       return reloc;
 }
 
 static void remove_insn_ops(struct instruction *insn)
 {
-       struct stack_op *op, *tmp;
+       struct stack_op *op, *next;
 
-       list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
-               list_del(&op->list);
+       for (op = insn->stack_ops; op; op = next) {
+               next = op->next;
                free(op);
        }
+       insn->stack_ops = NULL;
 }
 
 static void annotate_call_site(struct objtool_file *file,
                               struct instruction *insn, bool sibling)
 {
        struct reloc *reloc = insn_reloc(file, insn);
-       struct symbol *sym = insn->call_dest;
+       struct symbol *sym = insn_call_dest(insn);
 
        if (!sym)
                sym = reloc->sym;
@@ -1423,7 +1471,7 @@ static void annotate_call_site(struct objtool_file *file,
 static void add_call_dest(struct objtool_file *file, struct instruction *insn,
                          struct symbol *dest, bool sibling)
 {
-       insn->call_dest = dest;
+       insn->_call_dest = dest;
        if (!dest)
                return;
 
@@ -1681,12 +1729,12 @@ static int add_call_destinations(struct objtool_file *file)
                        if (insn->ignore)
                                continue;
 
-                       if (!insn->call_dest) {
+                       if (!insn_call_dest(insn)) {
                                WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
                                return -1;
                        }
 
-                       if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
+                       if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) {
                                WARN_FUNC("unsupported call to non-function",
                                          insn->sec, insn->offset);
                                return -1;
@@ -1724,36 +1772,50 @@ static int handle_group_alt(struct objtool_file *file,
                            struct instruction *orig_insn,
                            struct instruction **new_insn)
 {
-       struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+       struct instruction *last_new_insn = NULL, *insn, *nop = NULL;
        struct alt_group *orig_alt_group, *new_alt_group;
        unsigned long dest_off;
 
-
-       orig_alt_group = malloc(sizeof(*orig_alt_group));
+       orig_alt_group = orig_insn->alt_group;
        if (!orig_alt_group) {
-               WARN("malloc failed");
-               return -1;
-       }
-       orig_alt_group->cfi = calloc(special_alt->orig_len,
-                                    sizeof(struct cfi_state *));
-       if (!orig_alt_group->cfi) {
-               WARN("calloc failed");
-               return -1;
-       }
+               struct instruction *last_orig_insn = NULL;
 
-       last_orig_insn = NULL;
-       insn = orig_insn;
-       sec_for_each_insn_from(file, insn) {
-               if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-                       break;
+               orig_alt_group = malloc(sizeof(*orig_alt_group));
+               if (!orig_alt_group) {
+                       WARN("malloc failed");
+                       return -1;
+               }
+               orig_alt_group->cfi = calloc(special_alt->orig_len,
+                                            sizeof(struct cfi_state *));
+               if (!orig_alt_group->cfi) {
+                       WARN("calloc failed");
+                       return -1;
+               }
 
-               insn->alt_group = orig_alt_group;
-               last_orig_insn = insn;
-       }
-       orig_alt_group->orig_group = NULL;
-       orig_alt_group->first_insn = orig_insn;
-       orig_alt_group->last_insn = last_orig_insn;
+               insn = orig_insn;
+               sec_for_each_insn_from(file, insn) {
+                       if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+                               break;
 
+                       insn->alt_group = orig_alt_group;
+                       last_orig_insn = insn;
+               }
+               orig_alt_group->orig_group = NULL;
+               orig_alt_group->first_insn = orig_insn;
+               orig_alt_group->last_insn = last_orig_insn;
+               orig_alt_group->nop = NULL;
+       } else {
+               if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
+                   orig_alt_group->first_insn->offset != special_alt->orig_len) {
+                       WARN_FUNC("weirdly overlapping alternative! %ld != %d",
+                                 orig_insn->sec, orig_insn->offset,
+                                 orig_alt_group->last_insn->offset +
+                                 orig_alt_group->last_insn->len -
+                                 orig_alt_group->first_insn->offset,
+                                 special_alt->orig_len);
+                       return -1;
+               }
+       }
 
        new_alt_group = malloc(sizeof(*new_alt_group));
        if (!new_alt_group) {
@@ -1775,8 +1837,6 @@ static int handle_group_alt(struct objtool_file *file,
                        return -1;
                }
                memset(nop, 0, sizeof(*nop));
-               INIT_LIST_HEAD(&nop->alts);
-               INIT_LIST_HEAD(&nop->stack_ops);
 
                nop->sec = special_alt->new_sec;
                nop->offset = special_alt->new_off + special_alt->new_len;
@@ -1830,7 +1890,7 @@ static int handle_group_alt(struct objtool_file *file,
 
                dest_off = arch_jump_destination(insn);
                if (dest_off == special_alt->new_off + special_alt->new_len) {
-                       insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
+                       insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
                        if (!insn->jump_dest) {
                                WARN_FUNC("can't find alternative jump destination",
                                          insn->sec, insn->offset);
@@ -1845,12 +1905,11 @@ static int handle_group_alt(struct objtool_file *file,
                return -1;
        }
 
-       if (nop)
-               list_add(&nop->list, &last_new_insn->list);
 end:
        new_alt_group->orig_group = orig_alt_group;
        new_alt_group->first_insn = *new_insn;
-       new_alt_group->last_insn = nop ? : last_new_insn;
+       new_alt_group->last_insn = last_new_insn;
+       new_alt_group->nop = nop;
        new_alt_group->cfi = orig_alt_group->cfi;
        return 0;
 }
@@ -1900,7 +1959,7 @@ static int handle_jump_alt(struct objtool_file *file,
        else
                file->jl_long++;
 
-       *new_insn = list_next_entry(orig_insn, list);
+       *new_insn = next_insn_same_sec(file, orig_insn);
        return 0;
 }
 
@@ -1974,7 +2033,8 @@ static int add_special_section_alts(struct objtool_file *file)
                alt->insn = new_insn;
                alt->skip_orig = special_alt->skip_orig;
                orig_insn->ignore_alts |= special_alt->skip_alt;
-               list_add_tail(&alt->list, &orig_insn->alts);
+               alt->next = orig_insn->alts;
+               orig_insn->alts = alt;
 
                list_del(&special_alt->list);
                free(special_alt);
@@ -2033,7 +2093,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
                }
 
                alt->insn = dest_insn;
-               list_add_tail(&alt->list, &insn->alts);
+               alt->next = insn->alts;
+               insn->alts = alt;
                prev_offset = reloc->offset;
        }
 
@@ -2123,7 +2184,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
                reloc = find_jump_table(file, func, insn);
                if (reloc) {
                        reloc->jump_table_start = true;
-                       insn->jump_table = reloc;
+                       insn->_jump_table = reloc;
                }
        }
 }
@@ -2135,10 +2196,10 @@ static int add_func_jump_tables(struct objtool_file *file,
        int ret;
 
        func_for_each_insn(file, func, insn) {
-               if (!insn->jump_table)
+               if (!insn_jump_table(insn))
                        continue;
 
-               ret = add_jump_table(file, insn, insn->jump_table);
+               ret = add_jump_table(file, insn, insn_jump_table(insn));
                if (ret)
                        return ret;
        }
@@ -2271,6 +2332,7 @@ static int read_unwind_hints(struct objtool_file *file)
 
                cfi.cfa.offset = bswap_if_needed(file->elf, hint->sp_offset);
                cfi.type = hint->type;
+               cfi.signal = hint->signal;
                cfi.end = hint->end;
 
                insn->cfi = cfi_hash_find_or_add(&cfi);
@@ -2610,8 +2672,8 @@ static int decode_sections(struct objtool_file *file)
 static bool is_fentry_call(struct instruction *insn)
 {
        if (insn->type == INSN_CALL &&
-           insn->call_dest &&
-           insn->call_dest->fentry)
+           insn_call_dest(insn) &&
+           insn_call_dest(insn)->fentry)
                return true;
 
        return false;
@@ -3206,8 +3268,12 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
                alt_cfi[group_off] = insn->cfi;
        } else {
                if (cficmp(alt_cfi[group_off], insn->cfi)) {
-                       WARN_FUNC("stack layout conflict in alternatives",
-                                 insn->sec, insn->offset);
+                       struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
+                       struct instruction *orig = orig_group->first_insn;
+                       char *where = offstr(insn->sec, insn->offset);
+                       WARN_FUNC("stack layout conflict in alternatives: %s",
+                                 orig->sec, orig->offset, where);
+                       free(where);
                        return -1;
                }
        }
@@ -3221,7 +3287,7 @@ static int handle_insn_ops(struct instruction *insn,
 {
        struct stack_op *op;
 
-       list_for_each_entry(op, &insn->stack_ops, list) {
+       for (op = insn->stack_ops; op; op = op->next) {
 
                if (update_cfi_state(insn, next_insn, &state->cfi, op))
                        return 1;
@@ -3318,8 +3384,8 @@ static inline const char *call_dest_name(struct instruction *insn)
        struct reloc *rel;
        int idx;
 
-       if (insn->call_dest)
-               return insn->call_dest->name;
+       if (insn_call_dest(insn))
+               return insn_call_dest(insn)->name;
 
        rel = insn_reloc(NULL, insn);
        if (rel && !strcmp(rel->sym->name, "pv_ops")) {
@@ -3401,13 +3467,13 @@ static int validate_call(struct objtool_file *file,
                         struct insn_state *state)
 {
        if (state->noinstr && state->instr <= 0 &&
-           !noinstr_call_dest(file, insn, insn->call_dest)) {
+           !noinstr_call_dest(file, insn, insn_call_dest(insn))) {
                WARN_FUNC("call to %s() leaves .noinstr.text section",
                                insn->sec, insn->offset, call_dest_name(insn));
                return 1;
        }
 
-       if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+       if (state->uaccess && !func_uaccess_safe(insn_call_dest(insn))) {
                WARN_FUNC("call to %s() with UACCESS enabled",
                                insn->sec, insn->offset, call_dest_name(insn));
                return 1;
@@ -3485,11 +3551,28 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file,
         * Simulate the fact that alternatives are patched in-place.  When the
         * end of a replacement alt_group is reached, redirect objtool flow to
         * the end of the original alt_group.
+        *
+        * insn->alts->insn -> alt_group->first_insn
+        *                     ...
+        *                     alt_group->last_insn
+        *                     [alt_group->nop]      -> next(orig_group->last_insn)
         */
-       if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
-               return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+       if (alt_group) {
+               if (alt_group->nop) {
+                       /* ->nop implies ->orig_group */
+                       if (insn == alt_group->last_insn)
+                               return alt_group->nop;
+                       if (insn == alt_group->nop)
+                               goto next_orig;
+               }
+               if (insn == alt_group->last_insn && alt_group->orig_group)
+                       goto next_orig;
+       }
 
        return next_insn_same_sec(file, insn);
+
+next_orig:
+       return next_insn_same_sec(file, alt_group->orig_group->last_insn);
 }
 
 /*
@@ -3590,10 +3673,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                if (propagate_alt_cfi(file, insn))
                        return 1;
 
-               if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+               if (!insn->ignore_alts && insn->alts) {
                        bool skip_orig = false;
 
-                       list_for_each_entry(alt, &insn->alts, list) {
+                       for (alt = insn->alts; alt; alt = alt->next) {
                                if (alt->skip_orig)
                                        skip_orig = true;
 
@@ -3740,11 +3823,25 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
        return 0;
 }
 
+static int validate_unwind_hint(struct objtool_file *file,
+                                 struct instruction *insn,
+                                 struct insn_state *state)
+{
+       if (insn->hint && !insn->visited && !insn->ignore) {
+               int ret = validate_branch(file, insn_func(insn), insn, *state);
+               if (ret && opts.backtrace)
+                       BT_FUNC("<=== (hint)", insn);
+               return ret;
+       }
+
+       return 0;
+}
+
 static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 {
        struct instruction *insn;
        struct insn_state state;
-       int ret, warnings = 0;
+       int warnings = 0;
 
        if (!file->hints)
                return 0;
@@ -3752,22 +3849,11 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
        init_insn_state(file, &state, sec);
 
        if (sec) {
-               insn = find_insn(file, sec, 0);
-               if (!insn)
-                       return 0;
+               sec_for_each_insn(file, sec, insn)
+                       warnings += validate_unwind_hint(file, insn, &state);
        } else {
-               insn = list_first_entry(&file->insn_list, typeof(*insn), list);
-       }
-
-       while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
-               if (insn->hint && !insn->visited && !insn->ignore) {
-                       ret = validate_branch(file, insn_func(insn), insn, state);
-                       if (ret && opts.backtrace)
-                               BT_FUNC("<=== (hint)", insn);
-                       warnings += ret;
-               }
-
-               insn = list_next_entry(insn, list);
+               for_each_insn(file, insn)
+                       warnings += validate_unwind_hint(file, insn, &state);
        }
 
        return warnings;
@@ -3792,11 +3878,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
                insn->visited |= VISITED_ENTRY;
 
-               if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+               if (!insn->ignore_alts && insn->alts) {
                        struct alternative *alt;
                        bool skip_orig = false;
 
-                       list_for_each_entry(alt, &insn->alts, list) {
+                       for (alt = insn->alts; alt; alt = alt->next) {
                                if (alt->skip_orig)
                                        skip_orig = true;
 
@@ -3845,11 +3931,11 @@ static int validate_entry(struct objtool_file *file, struct instruction *insn)
 
                        /* fallthrough */
                case INSN_CALL:
-                       dest = find_insn(file, insn->call_dest->sec,
-                                        insn->call_dest->offset);
+                       dest = find_insn(file, insn_call_dest(insn)->sec,
+                                        insn_call_dest(insn)->offset);
                        if (!dest) {
                                WARN("Unresolved function after linking!?: %s",
-                                    insn->call_dest->name);
+                                    insn_call_dest(insn)->name);
                                return -1;
                        }
 
@@ -3950,13 +4036,13 @@ static int validate_retpoline(struct objtool_file *file)
 static bool is_kasan_insn(struct instruction *insn)
 {
        return (insn->type == INSN_CALL &&
-               !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+               !strcmp(insn_call_dest(insn)->name, "__asan_handle_no_return"));
 }
 
 static bool is_ubsan_insn(struct instruction *insn)
 {
        return (insn->type == INSN_CALL &&
-               !strcmp(insn->call_dest->name,
+               !strcmp(insn_call_dest(insn)->name,
                        "__ubsan_handle_builtin_unreachable"));
 }
 
@@ -4033,8 +4119,9 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
         *
         * It may also insert a UD2 after calling a __noreturn function.
         */
-       prev_insn = list_prev_entry(insn, list);
-       if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
+       prev_insn = prev_insn_same_sec(file, insn);
+       if ((prev_insn->dead_end ||
+            dead_end_function(file, insn_call_dest(prev_insn))) &&
            (insn->type == INSN_BUG ||
             (insn->type == INSN_JUMP_UNCONDITIONAL &&
              insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -4064,7 +4151,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
                if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
                        break;
 
-               insn = list_next_entry(insn, list);
+               insn = next_insn_same_sec(file, insn);
        }
 
        return false;
@@ -4077,10 +4164,10 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
                return 0;
 
        for (;;) {
-               struct instruction *prev = list_prev_entry(insn, list);
+               struct instruction *prev = prev_insn_same_sec(file, insn);
                u64 offset;
 
-               if (&prev->list == &file->insn_list)
+               if (!prev)
                        break;
 
                if (prev->type != INSN_NOP)
@@ -4479,7 +4566,7 @@ int check(struct objtool_file *file)
 
        warnings += ret;
 
-       if (list_empty(&file->insn_list))
+       if (!nr_insns)
                goto out;
 
        if (opts.retpoline) {
@@ -4588,7 +4675,7 @@ int check(struct objtool_file *file)
                warnings += ret;
        }
 
-       if (opts.orc && !list_empty(&file->insn_list)) {
+       if (opts.orc && nr_insns) {
                ret = orc_create(file);
                if (ret < 0)
                        goto out;
index 64443a7f4bbf9f1b35ff858233f24d7927c6a21d..6806ce01d933470b2138d2b659946af18ea76a5e 100644 (file)
@@ -284,13 +284,13 @@ static int read_sections(struct elf *elf)
            !elf_alloc_hash(section_name, sections_nr))
                return -1;
 
+       elf->section_data = calloc(sections_nr, sizeof(*sec));
+       if (!elf->section_data) {
+               perror("calloc");
+               return -1;
+       }
        for (i = 0; i < sections_nr; i++) {
-               sec = malloc(sizeof(*sec));
-               if (!sec) {
-                       perror("malloc");
-                       return -1;
-               }
-               memset(sec, 0, sizeof(*sec));
+               sec = &elf->section_data[i];
 
                INIT_LIST_HEAD(&sec->symbol_list);
                INIT_LIST_HEAD(&sec->reloc_list);
@@ -422,13 +422,13 @@ static int read_symbols(struct elf *elf)
            !elf_alloc_hash(symbol_name, symbols_nr))
                return -1;
 
+       elf->symbol_data = calloc(symbols_nr, sizeof(*sym));
+       if (!elf->symbol_data) {
+               perror("calloc");
+               return -1;
+       }
        for (i = 0; i < symbols_nr; i++) {
-               sym = malloc(sizeof(*sym));
-               if (!sym) {
-                       perror("malloc");
-                       return -1;
-               }
-               memset(sym, 0, sizeof(*sym));
+               sym = &elf->symbol_data[i];
 
                sym->idx = i;
 
@@ -918,13 +918,13 @@ static int read_relocs(struct elf *elf)
                sec->base->reloc = sec;
 
                nr_reloc = 0;
+               sec->reloc_data = calloc(sec->sh.sh_size / sec->sh.sh_entsize, sizeof(*reloc));
+               if (!sec->reloc_data) {
+                       perror("calloc");
+                       return -1;
+               }
                for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
-                       reloc = malloc(sizeof(*reloc));
-                       if (!reloc) {
-                               perror("malloc");
-                               return -1;
-                       }
-                       memset(reloc, 0, sizeof(*reloc));
+                       reloc = &sec->reloc_data[i];
                        switch (sec->sh.sh_type) {
                        case SHT_REL:
                                if (read_rel_reloc(sec, i, reloc, &symndx))
@@ -1453,16 +1453,16 @@ void elf_close(struct elf *elf)
                list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
                        list_del(&sym->list);
                        hash_del(&sym->hash);
-                       free(sym);
                }
                list_for_each_entry_safe(reloc, tmpreloc, &sec->reloc_list, list) {
                        list_del(&reloc->list);
                        hash_del(&reloc->hash);
-                       free(reloc);
                }
                list_del(&sec->list);
-               free(sec);
+               free(sec->reloc_data);
        }
 
+       free(elf->symbol_data);
+       free(elf->section_data);
        free(elf);
 }
index 4ecb480131c7cb5e863362afa14840ee9bfabd04..2b6d2ce4f9a5bfd7a9cdb2d343c662996ed10ead 100644 (file)
@@ -62,9 +62,9 @@ struct op_src {
 };
 
 struct stack_op {
+       struct stack_op *next;
        struct op_dest dest;
        struct op_src src;
-       struct list_head list;
 };
 
 struct instruction;
@@ -75,9 +75,7 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
                            unsigned long offset, unsigned int maxlen,
-                           unsigned int *len, enum insn_type *type,
-                           unsigned long *immediate,
-                           struct list_head *ops_list);
+                           struct instruction *insn);
 
 bool arch_callee_saved_reg(unsigned char reg);
 
index fa45044e38630c5608e2690c3e59ac7fbe5e9135..2a108e648b7a67b3d249f841578bde6da9113836 100644 (file)
@@ -7,8 +7,6 @@
 
 #include <subcmd/parse-options.h>
 
-extern const struct option check_options[];
-
 struct opts {
        /* actions: */
        bool dump_orc;
index f11d1ac1dadf1bd190d0a603cfe612da253049a5..b1258e79a1b7e34262475961a20ea8399ce48055 100644 (file)
@@ -34,6 +34,7 @@ struct cfi_state {
        unsigned char type;
        bool bp_scratch;
        bool drap;
+       bool signal;
        bool end;
 };
 
index acd7fae593484cdee64a2183669042e7b4cbccc1..3e7c7004f7df27f6887369f7bd0b277f59a02669 100644 (file)
@@ -27,7 +27,7 @@ struct alt_group {
        struct alt_group *orig_group;
 
        /* First and last instructions in the group */
-       struct instruction *first_insn, *last_insn;
+       struct instruction *first_insn, *last_insn, *nop;
 
        /*
         * Byte-offset-addressed len-sized array of pointers to CFI structs.
@@ -36,39 +36,46 @@ struct alt_group {
        struct cfi_state **cfi;
 };
 
+#define INSN_CHUNK_BITS                8
+#define INSN_CHUNK_SIZE                (1 << INSN_CHUNK_BITS)
+#define INSN_CHUNK_MAX         (INSN_CHUNK_SIZE - 1)
+
 struct instruction {
-       struct list_head list;
        struct hlist_node hash;
        struct list_head call_node;
        struct section *sec;
        unsigned long offset;
-       unsigned int len;
-       enum insn_type type;
        unsigned long immediate;
 
-       u16 dead_end            : 1,
-          ignore               : 1,
-          ignore_alts          : 1,
-          hint                 : 1,
-          save                 : 1,
-          restore              : 1,
-          retpoline_safe       : 1,
-          noendbr              : 1,
-          entry                : 1;
-               /* 7 bit hole */
-
+       u8 len;
+       u8 prev_len;
+       u8 type;
        s8 instr;
-       u8 visited;
+
+       u32 idx                 : INSN_CHUNK_BITS,
+           dead_end            : 1,
+           ignore              : 1,
+           ignore_alts         : 1,
+           hint                : 1,
+           save                : 1,
+           restore             : 1,
+           retpoline_safe      : 1,
+           noendbr             : 1,
+           entry               : 1,
+           visited             : 4,
+           no_reloc            : 1;
+               /* 10 bit hole */
 
        struct alt_group *alt_group;
-       struct symbol *call_dest;
        struct instruction *jump_dest;
        struct instruction *first_jump_src;
-       struct reloc *jump_table;
-       struct reloc *reloc;
-       struct list_head alts;
+       union {
+               struct symbol *_call_dest;
+               struct reloc *_jump_table;
+       };
+       struct alternative *alts;
        struct symbol *sym;
-       struct list_head stack_ops;
+       struct stack_op *stack_ops;
        struct cfi_state *cfi;
 };
 
@@ -107,13 +114,11 @@ static inline bool is_jump(struct instruction *insn)
 struct instruction *find_insn(struct objtool_file *file,
                              struct section *sec, unsigned long offset);
 
-#define for_each_insn(file, insn)                                      \
-       list_for_each_entry(insn, &file->insn_list, list)
+struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn);
 
-#define sec_for_each_insn(file, sec, insn)                             \
-       for (insn = find_insn(file, sec, 0);                            \
-            insn && &insn->list != &file->insn_list &&                 \
-                       insn->sec == sec;                               \
-            insn = list_next_entry(insn, list))
+#define sec_for_each_insn(file, _sec, insn)                            \
+       for (insn = find_insn(file, _sec, 0);                           \
+            insn && insn->sec == _sec;                                 \
+            insn = next_insn_same_sec(file, insn))
 
 #endif /* _CHECK_H */
index bb60fd42b46f48553eb9013d0e5e9477717f6fac..ad0024da262b067128abc3507f21e150dbd74310 100644 (file)
@@ -39,6 +39,7 @@ struct section {
        char *name;
        int idx;
        bool changed, text, rodata, noinstr, init, truncate;
+       struct reloc *reloc_data;
 };
 
 struct symbol {
@@ -49,12 +50,11 @@ struct symbol {
        GElf_Sym sym;
        struct section *sec;
        char *name;
-       unsigned int idx;
-       unsigned char bind, type;
+       unsigned int idx, len;
        unsigned long offset;
-       unsigned int len;
        unsigned long __subtree_last;
        struct symbol *pfunc, *cfunc, *alias;
+       unsigned char bind, type;
        u8 uaccess_safe      : 1;
        u8 static_call_tramp : 1;
        u8 retpoline_thunk   : 1;
@@ -104,6 +104,9 @@ struct elf {
        struct hlist_head *section_hash;
        struct hlist_head *section_name_hash;
        struct hlist_head *reloc_hash;
+
+       struct section *section_data;
+       struct symbol *symbol_data;
 };
 
 #define OFFSET_STRIDE_BITS     4
index 6b40977bcdb1a041a28c8c7710b62bbc8e4a10c9..94a33ee7b36306f85530efe149277c0c8ddf9061 100644 (file)
@@ -21,7 +21,6 @@ struct pv_state {
 
 struct objtool_file {
        struct elf *elf;
-       struct list_head insn_list;
        DECLARE_HASHTABLE(insn_hash, 20);
        struct list_head retpoline_call_list;
        struct list_head return_thunk_list;
index dc4721e190023296af0e09761d0e1f40b32ddd8c..86d4af9c5aa9dc83cd543a140ab1979979a1d6fa 100644 (file)
@@ -19,6 +19,7 @@ struct special_alt {
        bool skip_orig;
        bool skip_alt;
        bool jump_or_nop;
+       u8 key_addend;
 
        struct section *orig_sec;
        unsigned long orig_off;
@@ -27,7 +28,6 @@ struct special_alt {
        unsigned long new_off;
 
        unsigned int orig_len, new_len; /* group only */
-       u8 key_addend;
 };
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
index 6affd8067f8375970be4e12a6abe69cef3902a57..c54f7235c5d94157011a5e202955c539a9182a26 100644 (file)
@@ -99,7 +99,6 @@ struct objtool_file *objtool_open_read(const char *_objname)
                return NULL;
        }
 
-       INIT_LIST_HEAD(&file.insn_list);
        hash_init(file.insn_hash);
        INIT_LIST_HEAD(&file.retpoline_call_list);
        INIT_LIST_HEAD(&file.return_thunk_list);
index 4f1211fec82ce0bb77eb0f4d236ff8e30ddd2e59..2d8ebdcd1db3c18b43d44ff6f97ebaf268d668e5 100644 (file)
@@ -211,8 +211,8 @@ int orc_dump(const char *_objname)
 
                print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset));
 
-               printf(" type:%s end:%d\n",
-                      orc_type_name(orc[i].type), orc[i].end);
+               printf(" type:%s signal:%d end:%d\n",
+                      orc_type_name(orc[i].type), orc[i].signal, orc[i].end);
        }
 
        elf_end(elf);
index 1f22b7ebae588be26a33ec6524c6f098926b828e..57a4527d598825ef077d0c3043b584394a51540e 100644 (file)
@@ -27,6 +27,7 @@ static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi,
        }
 
        orc->end = cfi->end;
+       orc->signal = cfi->signal;
 
        if (cfi->cfa.base == CFI_UNDEFINED) {
                orc->sp_reg = ORC_REG_UNDEFINED;
index 9c8d827f69afb5840765d3a0d6b410e41b6119f8..baa85c31526b389e49bb3f5da20947ca2af9f3cd 100644 (file)
@@ -26,7 +26,7 @@ struct special_entry {
        unsigned char key; /* jump_label key */
 };
 
-struct special_entry entries[] = {
+static const struct special_entry entries[] = {
        {
                .sec = ".altinstructions",
                .group = true,
@@ -65,7 +65,7 @@ static void reloc_to_sec_off(struct reloc *reloc, struct section **sec,
        *off = reloc->sym->offset + reloc->addend;
 }
 
-static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
                         struct section *sec, int idx,
                         struct special_alt *alt)
 {
@@ -139,7 +139,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
  */
 int special_get_alts(struct elf *elf, struct list_head *alts)
 {
-       struct special_entry *entry;
+       const struct special_entry *entry;
        struct section *sec;
        unsigned int nr_entries;
        struct special_alt *alt;
index 0c6c7f45688784b59cd7e85032c8d1d9a3a12925..1c72d07cb9fed7237bb91568826e77f49be9bc06 100644 (file)
@@ -5,7 +5,7 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
                                   -e s/s390x/s390/ -e s/parisc64/parisc/ \
                                   -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
                                   -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
-                                  -e s/riscv.*/riscv/)
+                                  -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/)
 
 ifndef ARCH
 ARCH := $(HOSTARCH)
@@ -34,6 +34,15 @@ ifeq ($(ARCH),sh64)
        SRCARCH := sh
 endif
 
+# Additional ARCH settings for loongarch
+ifeq ($(ARCH),loongarch32)
+       SRCARCH := loongarch
+endif
+
+ifeq ($(ARCH),loongarch64)
+       SRCARCH := loongarch
+endif
+
 LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
 ifeq ($(LP64), 1)
   IS_64_BIT := 1
index 09ad5a4ebd1f1c59ba6541031ce12ef0cb1ee591..591104e79812ef2918c23f84b692790fe519f8fc 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/capability.h>
 
 struct kernel_cap_struct {
-       __u32 cap[_LINUX_CAPABILITY_U32S_3];
+       __u64 val;
 } __attribute__((preserve_access_index));
 
 struct cred {
@@ -19,14 +19,13 @@ SEC("lsm.s/userns_create")
 int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
 {
        struct kernel_cap_struct caps = cred->cap_effective;
-       int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
-       __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
+       __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);
 
        if (ret)
                return 0;
 
        ret = -EPERM;
-       if (caps.cap[cap_index] & cap_mask)
+       if (caps.val & cap_mask)
                return 0;
 
        return -EPERM;
index 45974156522217e844d490ffc689375484333eac..a4f8e7c53c1f73550b3a9675ff25494d8ff7b9ca 100644 (file)
@@ -28,6 +28,9 @@ s390*)
 mips*)
   ARG1=%r4
 ;;
+loongarch*)
+  ARG1=%r4
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
index d4662c8cf407f17e46f0d6d9db1dd431290bc761..1df61e13a812b789d11a06cc9a3ddc517a72a359 100644 (file)
@@ -40,6 +40,10 @@ mips*)
   GOODREG=%r4
   BADREG=%r12
 ;;
+loongarch*)
+  GOODREG=%r4
+  BADREG=%r12
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
index 9c5a55abca6b688a29375afda91d5c6937783fb4..5b5cef445b5490350b9a7da03496f209a1fb02b8 100644 (file)
@@ -17,5 +17,6 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_HIDRAW=y
 CONFIG_HID=y
+CONFIG_HID_BPF=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_UHID=y
index 66c5be25c13d03892d878b4f58154f657daa54c9..48e52f995a98c84c7c94c0c20cf87ff66fac2c1f 100755 (executable)
@@ -240,7 +240,7 @@ check_expected_one()
        fi
 
        stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
-               "${var}" "${!var}" "${!exp}"
+               "${var}" "${!exp}" "${!var}"
        return 1
 }
 
index 61386e499b77daac56b29cce11b7453d40ac6f3e..43ec36b179dc092f404f18a9773bbf39c90fd8ef 100644 (file)
@@ -128,6 +128,8 @@ struct seccomp_data {
 #  define __NR_seccomp 277
 # elif defined(__csky__)
 #  define __NR_seccomp 277
+# elif defined(__loongarch__)
+#  define __NR_seccomp 277
 # elif defined(__hppa__)
 #  define __NR_seccomp 338
 # elif defined(__powerpc__)
@@ -1755,6 +1757,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
                                    NT_ARM_SYSTEM_CALL, &__v)); \
        } while (0)
 # define SYSCALL_RET(_regs)    (_regs).regs[0]
+#elif defined(__loongarch__)
+# define ARCH_REGS             struct user_pt_regs
+# define SYSCALL_NUM(_regs)    (_regs).regs[11]
+# define SYSCALL_RET(_regs)    (_regs).regs[4]
 #elif defined(__riscv) && __riscv_xlen == 64
 # define ARCH_REGS             struct user_regs_struct
 # define SYSCALL_NUM(_regs)    (_regs).a7