Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Apr 2024 20:04:21 +0000 (13:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Apr 2024 20:04:21 +0000 (13:04 -0700)
Pull arm64 fixes from Catalin Marinas:

 - Fix a kernel fault during page table walking in huge_pte_alloc() with
   PTABLE_LEVELS=5 due to using p4d_offset() instead of p4d_alloc()

 - head.S fix and cleanup to disable the MMU before toggling the
   HCR_EL2.E2H bit when entering the kernel with the MMU on from the EFI
   stub. Changing this bit (currently from VHE to nVHE) causes some
   system registers as well as page table descriptors to be interpreted
   differently, potentially resulting in spurious MMU faults

 - Fix translation fault in swsusp_save() accessing MEMBLOCK_NOMAP
   memory ranges due to kernel_page_present() returning true in most
   configurations other than rodata_full == true,
   CONFIG_DEBUG_PAGEALLOC=y or CONFIG_KFENCE=y

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: hibernate: Fix level3 translation fault in swsusp_save()
  arm64/head: Disable MMU at EL2 before clearing HCR_EL2.E2H
  arm64/head: Drop unnecessary pre-disable-MMU workaround
  arm64/hugetlb: Fix page table walk in huge_pte_alloc()

505 files changed:
.mailmap
CREDITS
Documentation/admin-guide/hw-vuln/spectre.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
Documentation/driver-api/virtio/writing_virtio_drivers.rst
Documentation/filesystems/bcachefs/index.rst [new file with mode: 0644]
Documentation/filesystems/index.rst
Documentation/mm/page_owner.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
arch/arm/mach-omap2/board-n8x0.c
arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
arch/loongarch/boot/dts/loongson-2k1000.dtsi
arch/loongarch/boot/dts/loongson-2k2000-ref.dts
arch/loongarch/boot/dts/loongson-2k2000.dtsi
arch/loongarch/include/asm/addrspace.h
arch/loongarch/include/asm/io.h
arch/loongarch/include/asm/kfence.h
arch/loongarch/include/asm/page.h
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/mips/include/asm/ptrace.h
arch/mips/kernel/asm-offsets.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-n64.S
arch/mips/kernel/scall64-o32.S
arch/s390/kernel/entry.S
arch/x86/Kconfig
arch/x86/entry/common.c
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/syscall_32.c
arch/x86/entry/syscall_64.c
arch/x86/entry/syscall_x32.c
arch/x86/events/core.c
arch/x86/hyperv/hv_apic.c
arch/x86/hyperv/hv_proc.c
arch/x86/include/asm/apic.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/syscall.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/topology.c
arch/x86/kernel/cpu/topology_amd.c
arch/x86/kvm/reverse_cpuid.h
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/x86.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-iocost.c
block/blk-settings.c
drivers/accel/ivpu/ivpu_drv.c
drivers/accel/ivpu/ivpu_drv.h
drivers/accel/ivpu/ivpu_hw.h
drivers/accel/ivpu/ivpu_hw_37xx.c
drivers/accel/ivpu/ivpu_hw_40xx.c
drivers/accel/ivpu/ivpu_ipc.c
drivers/accel/ivpu/ivpu_mmu.c
drivers/accel/ivpu/ivpu_pm.c
drivers/acpi/scan.c
drivers/ata/ahci.c
drivers/ata/libata-core.c
drivers/ata/libata-scsi.c
drivers/cache/sifive_ccache.c
drivers/char/random.c
drivers/cxl/acpi.c
drivers/cxl/core/cdat.c
drivers/cxl/core/mbox.c
drivers/cxl/core/port.c
drivers/cxl/core/regs.c
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h
drivers/firmware/arm_ffa/driver.c
drivers/firmware/arm_scmi/powercap.c
drivers/firmware/arm_scmi/raw_mode.c
drivers/gpio/gpio-crystalcove.c
drivers/gpio/gpio-lpc32xx.c
drivers/gpio/gpio-wcove.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/soc21.c
drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc_state.c
drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
drivers/gpu/drm/ast/ast_dp.c
drivers/gpu/drm/drm_client_modeset.c
drivers/gpu/drm/i915/display/intel_cdclk.c
drivers/gpu/drm/i915/display/intel_cdclk.h
drivers/gpu/drm/i915/display/intel_ddi.c
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp_hdcp.c
drivers/gpu/drm/i915/display/intel_psr.c
drivers/gpu/drm/i915/display/intel_vrr.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_uc.c
drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/msm_fb.c
drivers/gpu/drm/msm/msm_kms.c
drivers/gpu/drm/nouveau/nouveau_bios.c
drivers/gpu/drm/nouveau/nouveau_dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/panel/panel-novatek-nt36672e.c
drivers/gpu/drm/panel/panel-visionox-rm69299.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/qxl/qxl_release.c
drivers/gpu/drm/radeon/pptable.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/ttm/ttm_pool.c
drivers/gpu/drm/v3d/v3d_irq.c
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
drivers/gpu/drm/xe/display/intel_fb_bo.c
drivers/gpu/drm/xe/display/xe_display.c
drivers/gpu/drm/xe/regs/xe_engine_regs.h
drivers/gpu/drm/xe/xe_hwmon.c
drivers/gpu/drm/xe/xe_lrc.c
drivers/gpu/drm/xe/xe_migrate.c
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/host1x/bus.c
drivers/hv/channel.c
drivers/hv/connection.c
drivers/hv/vmbus_drv.c
drivers/iommu/amd/init.c
drivers/iommu/amd/iommu.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/perfmon.c
drivers/iommu/intel/svm.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/irqchip/irq-gic-v3-its.c
drivers/isdn/mISDN/socket.c
drivers/md/raid1.c
drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
drivers/mmc/host/omap.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/ethernet/amazon/ena/ena_com.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_xdp.c
drivers/net/ethernet/amd/pds_core/core.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/dev.c
drivers/net/ethernet/amd/pds_core/main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/marvell/octeontx2/nic/qos.c
drivers/net/ethernet/mediatek/mtk_wed.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
drivers/net/ethernet/micrel/ks8851.h
drivers/net/ethernet/micrel/ks8851_common.c
drivers/net/ethernet/micrel/ks8851_par.c
drivers/net/ethernet/micrel/ks8851_spi.c
drivers/net/ethernet/microchip/sparx5/sparx5_port.c
drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
drivers/net/ethernet/realtek/r8169.h
drivers/net/ethernet/realtek/r8169_leds.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/stmicro/stmmac/mmc.h
drivers/net/ethernet/stmicro/stmmac/mmc_core.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/geneve.c
drivers/net/hyperv/netvsc.c
drivers/net/tun.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/qmi_wwan.c
drivers/net/virtio_net.c
drivers/pci/quirks.c
drivers/platform/chrome/cros_ec_uart.c
drivers/platform/x86/acer-wmi.c
drivers/platform/x86/amd/pmc/pmc-quirks.c
drivers/platform/x86/amd/pmf/Makefile
drivers/platform/x86/amd/pmf/acpi.c
drivers/platform/x86/amd/pmf/core.c
drivers/platform/x86/amd/pmf/pmf-quirks.c [new file with mode: 0644]
drivers/platform/x86/amd/pmf/pmf.h
drivers/platform/x86/intel/hid.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.c
drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
drivers/platform/x86/intel/vbtn.c
drivers/platform/x86/lg-laptop.c
drivers/platform/x86/toshiba_acpi.c
drivers/pwm/pwm-dwc-core.c
drivers/pwm/pwm-dwc.c
drivers/pwm/pwm-dwc.h
drivers/s390/cio/device.c
drivers/s390/cio/device_fsm.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/qdio_main.c
drivers/s390/net/ism_drv.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/qla2xxx/qla_edif.c
drivers/scsi/scsi_lib.c
drivers/scsi/sg.c
drivers/target/target_core_configfs.c
drivers/thermal/thermal_debugfs.c
drivers/ufs/host/ufs-qcom.c
drivers/uio/uio_hv_generic.c
drivers/vhost/vhost.c
drivers/virt/vmgenid.c
drivers/virtio/virtio.c
fs/bcachefs/acl.c
fs/bcachefs/backpointers.c
fs/bcachefs/backpointers.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_node_scan.c
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_write_buffer.c
fs/bcachefs/buckets.h
fs/bcachefs/chardev.c
fs/bcachefs/checksum.c
fs/bcachefs/checksum.h
fs/bcachefs/compress.h
fs/bcachefs/data_update.c
fs/bcachefs/debug.c
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/extents.c
fs/bcachefs/eytzinger.c
fs/bcachefs/eytzinger.h
fs/bcachefs/fs-io-direct.c
fs/bcachefs/fs-io.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/opts.c
fs/bcachefs/opts.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes.c
fs/bcachefs/sb-downgrade.c
fs/bcachefs/sb-errors_types.h
fs/bcachefs/sb-members.c
fs/bcachefs/sb-members.h
fs/bcachefs/snapshot.c
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/super_types.h
fs/bcachefs/sysfs.c
fs/bcachefs/tests.c
fs/bcachefs/util.h
fs/btrfs/delayed-inode.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/root-tree.c
fs/btrfs/root-tree.h
fs/btrfs/transaction.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/kernfs/file.c
fs/nfsd/nfs4xdr.c
fs/nilfs2/dir.c
fs/proc/bootconfig.c
fs/smb/client/cached_dir.c
fs/smb/client/cifsglob.h
fs/smb/client/connect.c
fs/smb/client/fs_context.c
fs/smb/client/fs_context.h
fs/smb/client/inode.c
fs/smb/client/misc.c
fs/smb/client/smb2ops.c
fs/smb/client/smb2pdu.c
fs/squashfs/inode.c
fs/tracefs/event_inode.c
fs/zonefs/super.c
include/acpi/acpi_bus.h
include/asm-generic/bug.h
include/asm-generic/hyperv-tlfs.h
include/asm-generic/mshyperv.h
include/linux/bootconfig.h
include/linux/compiler.h
include/linux/dma-fence.h
include/linux/gfp_types.h
include/linux/gpio/property.h
include/linux/hyperv.h
include/linux/io_uring_types.h
include/linux/irqflags.h
include/linux/mm.h
include/linux/randomize_kstack.h
include/linux/rwbase_rt.h
include/linux/rwsem.h
include/linux/shmem_fs.h
include/linux/sockptr.h
include/linux/swapops.h
include/linux/u64_stats_sync.h
include/linux/udp.h
include/linux/virtio.h
include/net/addrconf.h
include/net/bluetooth/bluetooth.h
include/net/ip_tunnels.h
include/net/netfilter/nf_flow_table.h
include/net/netfilter/nf_tables.h
include/net/sch_generic.h
include/trace/events/rpcgss.h
include/uapi/linux/vhost.h
init/main.c
io_uring/io_uring.c
io_uring/net.c
kernel/cpu.c
kernel/dma/swiotlb.c
kernel/fork.c
kernel/kprobes.c
kernel/power/suspend.c
kernel/time/tick-common.c
kernel/time/tick-sched.c
kernel/trace/Kconfig
kernel/trace/ring_buffer.c
kernel/trace/trace_events.c
lib/bootconfig.c
lib/checksum_kunit.c
lib/test_ubsan.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/madvise.c
mm/memory-failure.c
mm/page_owner.c
mm/shmem.c
net/batman-adv/translation-table.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sock.c
net/bluetooth/hci_sync.c
net/bluetooth/iso.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bridge/br_input.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_private.h
net/bridge/netfilter/nf_conntrack_bridge.c
net/core/dev.c
net/ipv4/fib_frontend.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/route.c
net/ipv6/addrconf.c
net/ipv6/ip6_fib.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/nf_flow_table_inet.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_lookup.c
net/netfilter/nft_set_bitmap.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_rbtree.c
net/nfc/llcp_sock.c
net/openvswitch/conntrack.c
net/sched/sch_generic.c
net/unix/af_unix.c
net/unix/garbage.c
net/xdp/xsk.c
scripts/gcc-plugins/stackleak_plugin.c
sound/core/seq/seq_ump_convert.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/tas2781_hda_i2c.c
tools/hv/hv_kvp_daemon.c
tools/include/linux/kernel.h
tools/include/linux/mm.h
tools/include/linux/panic.h [new file with mode: 0644]
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c
tools/testing/cxl/test/cxl.c
tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
tools/testing/selftests/kselftest.h
tools/testing/selftests/kselftest_harness.h
tools/testing/selftests/net/tcp_ao/lib/proc.c
tools/testing/selftests/net/tcp_ao/lib/setup.c
tools/testing/selftests/net/tcp_ao/rst.c
tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
tools/testing/selftests/net/udpgso.c
tools/testing/selftests/timers/posix_timers.c
tools/testing/selftests/timers/valid-adjtimex.c
tools/testing/selftests/turbostat/defcolumns.py [new file with mode: 0755]

index 8284692f9610715fa2bc04f8771cb45d1b5ebf88..625b496bf5f45100b8c563349eb077059cfa0138 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -446,7 +446,8 @@ Mythri P K <mythripk@ti.com>
 Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
 Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
 Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
-Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
 Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
 Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
 Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
diff --git a/CREDITS b/CREDITS
index c55c5a0ee4ff65e244eb3a9de9aeb35515bc2381..0107047f807bfc01a0c5e7ad380e15a5ddc95776 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55
 S: 13353 Berlin
 S: Germany
 
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
 N: Emanuel Pirker
 E: epirker@edu.uni-klu.ac.at
 D: AIC5800 IEEE 1394, RAW I/O on 1394
index cce768afec6bed11a961643dcdc2d1ae97848684..25a04cda4c2c054864fa1792d98d9f095ea56a17 100644 (file)
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
 the BHB might be shared across privilege levels even in the presence of
 Enhanced IBRS.
 
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
 
 Attack scenarios
 ----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
   'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB
   ===========================  =======================================================
 
+  - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+   - System is not affected
+ * - BHI: Retpoline
+   - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+   - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+   - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+   - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+   - System is vulnerable; KVM is protected by software clearing sequence
+
 Full mitigation might require a microcode update from the CPU
 vendor. When the necessary microcode is not available, the kernel will
 report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2
 
    Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
    boot, by setting the IBRS bit, and they're automatically protected against
-   Spectre v2 variant attacks.
+   some Spectre v2 variant attacks. The BHB can still influence the choice of
+   indirect branch predictor entry, and although branch predictor entries are
+   isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+   between modes. Systems which support BHI_DIS_S will set it to protect against
+   BHI attacks.
 
    On Intel's enhanced IBRS systems, this includes cross-thread branch target
    injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,18 @@ kernel command line.
                spectre_v2=off. Spectre variant 1 mitigations
                cannot be disabled.
 
+       spectre_bhi=
+
+               [X86] Control mitigation of Branch History Injection
+               (BHI) vulnerability.  This setting affects the deployment
+               of the HW BHI control and the SW BHB clearing sequence.
+
+               on
+                       (default) Enable the HW or SW mitigation as
+                       needed.
+               off
+                       Disable the mitigation.
+
 For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
 
 Mitigation selection guide
index 623fce7d5fcd0c4392432908e21aaba5134e3aa0..902ecd92a29fbe83df18d32d1a8fe652c8277132 100644 (file)
                                               retbleed=off [X86]
                                               spec_rstack_overflow=off [X86]
                                               spec_store_bypass_disable=off [X86,PPC]
+                                              spectre_bhi=off [X86]
                                               spectre_v2_user=off [X86]
                                               srbds=off [X86,INTEL]
                                               ssbd=force-off [ARM64]
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/admin-guide/laptops/sonypi.rst
 
+       spectre_bhi=    [X86] Control mitigation of Branch History Injection
+                       (BHI) vulnerability.  This setting affects the
+                       deployment of the HW BHI control and the SW BHB
+                       clearing sequence.
+
+                       on   - (default) Enable the HW or SW mitigation
+                              as needed.
+                       off  - Disable the mitigation.
+
        spectre_v2=     [X86,EARLY] Control mitigation of Spectre variant 2
                        (indirect branch speculation) vulnerability.
                        The default operation protects the kernel from
index c0d6a4fdff97e37f31ecc763347497aea9450780..e6dc5494baee29a7171c11ac074159e6a08f8627 100644 (file)
@@ -53,6 +53,15 @@ patternProperties:
       compatible:
         const: qcom,sm8150-dpu
 
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        contains:
+          const: qcom,sm8150-dp
+
   "^dsi@[0-9a-f]+$":
     type: object
     additionalProperties: true
index afcdeed4e88af625ea4f0f371cc11ffdbe824859..bc813fe74faba5ae50bc81ecb2f75f9e1d8803c9 100644 (file)
@@ -52,6 +52,9 @@ properties:
       - const: main
       - const: mm
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
index e14c58796d250116107041b1be3e40aafa564656..e5de6f5d061a7c2162bc6fac628e542389602be3 100644 (file)
@@ -97,7 +97,6 @@ like this::
 
        static struct virtio_driver virtio_dummy_driver = {
                .driver.name =  KBUILD_MODNAME,
-               .driver.owner = THIS_MODULE,
                .id_table =     id_table,
                .probe =        virtio_dummy_probe,
                .remove =       virtio_dummy_remove,
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
new file mode 100644 (file)
index 0000000..e2bd61c
--- /dev/null
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+   :maxdepth: 2
+   :numbered:
+
+   errorcodes
index 0ea1e44fa02823ffd51f4739a3a9aab635a35bbe..1f9b4c905a6a7c0646fca9764829151582eb6e7c 100644 (file)
@@ -69,6 +69,7 @@ Documentation for filesystem implementations.
    afs
    autofs
    autofs-mount-control
+   bcachefs/index
    befs
    bfs
    btrfs
index 0d0334cd51798b63af73cb86f891c07e1c7e587c..3a45a20fc05a1f90a67b5b61e6bbb654145928a3 100644 (file)
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
 each page. It is already implemented and activated if page owner is
 enabled. Other usages are more than welcome.
 
-It can also be used to show all the stacks and their outstanding
-allocations, which gives us a quick overview of where the memory is going
-without the need to screen through all the pages and match the allocation
-and free operation.
+It can also be used to show all the stacks and their current number of
+allocated base pages, which gives us a quick overview of where the memory
+is going without the need to screen through all the pages and match the
+allocation and free operation.
 
 page owner is disabled by default. So, if you'd like to use it, you need
 to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
 
        cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
        cat stacks.txt
-        prep_new_page+0xa9/0x120
-        get_page_from_freelist+0x7e6/0x2140
-        __alloc_pages+0x18a/0x370
-        new_slab+0xc8/0x580
-        ___slab_alloc+0x1f2/0xaf0
-        __slab_alloc.isra.86+0x22/0x40
-        kmem_cache_alloc+0x31b/0x350
-        __khugepaged_enter+0x39/0x100
-        dup_mmap+0x1c7/0x5ce
-        copy_process+0x1afe/0x1c90
-        kernel_clone+0x9a/0x3c0
-        __do_sys_clone+0x66/0x90
-        do_syscall_64+0x7f/0x160
-        entry_SYSCALL_64_after_hwframe+0x6c/0x74
-       stack_count: 234
+        post_alloc_hook+0x177/0x1a0
+        get_page_from_freelist+0xd01/0xd80
+        __alloc_pages+0x39e/0x7e0
+        allocate_slab+0xbc/0x3f0
+        ___slab_alloc+0x528/0x8a0
+        kmem_cache_alloc+0x224/0x3b0
+        sk_prot_alloc+0x58/0x1a0
+        sk_alloc+0x32/0x4f0
+        inet_create+0x427/0xb50
+        __sock_create+0x2e4/0x650
+        inet_ctl_sock_create+0x30/0x180
+        igmp_net_init+0xc1/0x130
+        ops_init+0x167/0x410
+        setup_net+0x304/0xa60
+        copy_net_ns+0x29b/0x4a0
+        create_new_namespaces+0x4a1/0x820
+       nr_base_pages: 16
        ...
        ...
        echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
        cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
        cat stacks_7000.txt
-        prep_new_page+0xa9/0x120
-        get_page_from_freelist+0x7e6/0x2140
-        __alloc_pages+0x18a/0x370
-        alloc_pages_mpol+0xdf/0x1e0
-        folio_alloc+0x14/0x50
-        filemap_alloc_folio+0xb0/0x100
-        page_cache_ra_unbounded+0x97/0x180
-        filemap_fault+0x4b4/0x1200
-        __do_fault+0x2d/0x110
-        do_pte_missing+0x4b0/0xa30
-        __handle_mm_fault+0x7fa/0xb70
-        handle_mm_fault+0x125/0x300
-        do_user_addr_fault+0x3c9/0x840
-        exc_page_fault+0x68/0x150
-        asm_exc_page_fault+0x22/0x30
-       stack_count: 8248
+        post_alloc_hook+0x177/0x1a0
+        get_page_from_freelist+0xd01/0xd80
+        __alloc_pages+0x39e/0x7e0
+        alloc_pages_mpol+0x22e/0x490
+        folio_alloc+0xd5/0x110
+        filemap_alloc_folio+0x78/0x230
+        page_cache_ra_order+0x287/0x6f0
+        filemap_get_pages+0x517/0x1160
+        filemap_read+0x304/0x9f0
+        xfs_file_buffered_read+0xe6/0x1d0 [xfs]
+        xfs_file_read_iter+0x1f0/0x380 [xfs]
+        __kernel_read+0x3b9/0x730
+        kernel_read_file+0x309/0x4d0
+        __do_sys_finit_module+0x381/0x730
+        do_syscall_64+0x8d/0x150
+        entry_SYSCALL_64_after_hwframe+0x62/0x6a
+       nr_base_pages: 20824
        ...
 
        cat /sys/kernel/debug/page_owner > page_owner_full.txt
index aea47e04c3a52aa6774a090c2bd17555306c4a02..ee9cc2b40409ea90bf9b483df91e3690b82a3f68 100644 (file)
@@ -2191,7 +2191,6 @@ N:        mxs
 
 ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
 M:     Shawn Guo <shawnguo@kernel.org>
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2708,7 +2707,7 @@ F:        sound/soc/rockchip/
 N:     rockchip
 
 ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org
@@ -3573,6 +3572,7 @@ S:        Supported
 C:     irc://irc.oftc.net/bcache
 T:     git https://evilpiepirate.org/git/bcachefs.git
 F:     fs/bcachefs/
+F:     Documentation/filesystems/bcachefs/
 
 BDISP ST MEDIA DRIVER
 M:     Fabien Dessenne <fabien.dessenne@foss.st.com>
@@ -4869,7 +4869,6 @@ F:        drivers/power/supply/cw2015_battery.c
 CEPH COMMON CODE (LIBCEPH)
 M:     Ilya Dryomov <idryomov@gmail.com>
 M:     Xiubo Li <xiubli@redhat.com>
-R:     Jeff Layton <jlayton@kernel.org>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
@@ -4881,7 +4880,6 @@ F:        net/ceph/
 CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
 M:     Xiubo Li <xiubli@redhat.com>
 M:     Ilya Dryomov <idryomov@gmail.com>
-R:     Jeff Layton <jlayton@kernel.org>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
@@ -5557,7 +5555,7 @@ F:        drivers/cpuidle/cpuidle-big_little.c
 CPUIDLE DRIVER - ARM EXYNOS
 M:     Daniel Lezcano <daniel.lezcano@linaro.org>
 M:     Kukjin Kim <kgene@kernel.org>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -8523,7 +8521,6 @@ S:        Maintained
 F:     drivers/video/fbdev/fsl-diu-fb.*
 
 FREESCALE DMA DRIVER
-M:     Li Yang <leoyang.li@nxp.com>
 M:     Zhang Wei <zw@zh-kernel.org>
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
@@ -8688,10 +8685,9 @@ F:       drivers/soc/fsl/qe/tsa.h
 F:     include/dt-bindings/soc/cpm1-fsl,tsa.h
 
 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M:     Li Yang <leoyang.li@nxp.com>
 L:     netdev@vger.kernel.org
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Maintained
+S:     Orphan
 F:     drivers/net/ethernet/freescale/ucc_geth*
 
 FREESCALE QUICC ENGINE UCC HDLC DRIVER
@@ -8708,10 +8704,9 @@ S:       Maintained
 F:     drivers/tty/serial/ucc_uart.c
 
 FREESCALE SOC DRIVERS
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linuxppc-dev@lists.ozlabs.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:     Maintained
+S:     Orphan
 F:     Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
 F:     Documentation/devicetree/bindings/soc/fsl/
 F:     drivers/soc/fsl/
@@ -8745,10 +8740,9 @@ F:       Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
 F:     sound/soc/fsl/fsl_qmc_audio.c
 
 FREESCALE USB PERIPHERAL DRIVERS
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linux-usb@vger.kernel.org
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Maintained
+S:     Orphan
 F:     drivers/usb/gadget/udc/fsl*
 
 FREESCALE USB PHY DRIVER
@@ -9000,7 +8994,7 @@ F:        drivers/i2c/muxes/i2c-mux-gpio.c
 F:     include/linux/platform_data/i2c-mux-gpio.h
 
 GENERIC GPIO RESET DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     drivers/reset/reset-gpio.c
 
@@ -10030,7 +10024,7 @@ F:      drivers/media/platform/st/sti/hva
 
 HWPOISON MEMORY FAILURE HANDLING
 M:     Miaohe Lin <linmiaohe@huawei.com>
-R:     Naoya Horiguchi <naoya.horiguchi@nec.com>
+R:     Naoya Horiguchi <nao.horiguchi@gmail.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/hwpoison-inject.c
@@ -13295,7 +13289,7 @@ F:      drivers/iio/adc/max11205.c
 
 MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
 R:     Iskren Chernev <iskren.chernev@gmail.com>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Matheus Castello <matheus@castello.eng.br>
 L:     linux-pm@vger.kernel.org
@@ -13305,7 +13299,7 @@ F:      drivers/power/supply/max17040_battery.c
 
 MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
 R:     Hans de Goede <hdegoede@redhat.com>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
 R:     Purism Kernel Team <kernel@puri.sm>
@@ -13363,7 +13357,7 @@ F:      Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
 F:     drivers/power/supply/max77976_charger.c
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 B:     mailto:linux-samsung-soc@vger.kernel.org
@@ -13374,7 +13368,7 @@ F:      drivers/power/supply/max77693_charger.c
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:     Chanwoo Choi <cw00.choi@samsung.com>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 B:     mailto:linux-samsung-soc@vger.kernel.org
@@ -14158,7 +14152,7 @@ F:      mm/mm_init.c
 F:     tools/testing/memblock/
 
 MEMORY CONTROLLER DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 B:     mailto:krzysztof.kozlowski@linaro.org
@@ -15539,7 +15533,7 @@ F:      include/uapi/linux/nexthop.h
 F:     net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/
@@ -15916,7 +15910,7 @@ F:      Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
 F:     drivers/regulator/pf8x00-regulator.c
 
 NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -16527,7 +16521,7 @@ K:      of_overlay_remove
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:     Rob Herring <robh@kernel.org>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M:     Krzysztof Kozlowski <krzk+dt@kernel.org>
 M:     Conor Dooley <conor+dt@kernel.org>
 L:     devicetree@vger.kernel.org
 S:     Maintained
@@ -16974,7 +16968,6 @@ F:      drivers/pci/controller/dwc/pci-exynos.c
 
 PCI DRIVER FOR SYNOPSYS DESIGNWARE
 M:     Jingoo Han <jingoohan1@gmail.com>
-M:     Gustavo Pimentel <gustavo.pimentel@synopsys.com>
 M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
@@ -17485,7 +17478,7 @@ F:      Documentation/devicetree/bindings/pinctrl/renesas,*
 F:     drivers/pinctrl/renesas/
 
 PIN CONTROLLER - SAMSUNG
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -19453,7 +19446,7 @@ F:      Documentation/devicetree/bindings/sound/samsung*
 F:     sound/soc/samsung/
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -19488,7 +19481,7 @@ S:      Maintained
 F:     drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -19514,7 +19507,7 @@ F:      drivers/media/platform/samsung/s3c-camif/
 F:     include/media/drv-intf/s3c_camif.h
 
 SAMSUNG S3FWRN5 NFC DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
 F:     drivers/nfc/s3fwrn5
@@ -19535,7 +19528,7 @@ S:      Supported
 F:     drivers/media/i2c/s5k5baf.c
 
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Vladimir Zapolskiy <vz@mleia.com>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
@@ -19557,7 +19550,7 @@ F:      Documentation/devicetree/bindings/media/samsung,fimc.yaml
 F:     drivers/media/platform/samsung/exynos4-is/
 
 SAMSUNG SOC CLOCK DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 M:     Chanwoo Choi <cw00.choi@samsung.com>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
@@ -19589,7 +19582,7 @@ F:      drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
 M:     Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -22577,6 +22570,7 @@ Q:      https://patchwork.kernel.org/project/linux-pm/list/
 B:     https://bugzilla.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
 F:     tools/power/x86/turbostat/
+F:     tools/testing/selftests/turbostat/
 
 TW5864 VIDEO4LINUX DRIVER
 M:     Bluecherry Maintainers <maintainers@bluecherrydvr.com>
@@ -23785,7 +23779,7 @@ S:      Orphan
 F:     drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/w1/
 F:     Documentation/w1/
index e1bf12891cb0e4a7471d60cf4b2eb0050d600d2f..59d8a7f95d0a863e4308853f9e01baa9fb8fed84 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index 9f066785bb71d93ca5da01a22d15ed2effba5901..65afb1de48b36e843bd665a91df6b33badfd23d1 100644 (file)
@@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB
 
 config PAGE_SHIFT
        int
-       default 12 if PAGE_SIZE_4KB
-       default 13 if PAGE_SIZE_8KB
-       default 14 if PAGE_SIZE_16KB
-       default 15 if PAGE_SIZE_32KB
-       default 16 if PAGE_SIZE_64KB
-       default 18 if PAGE_SIZE_256KB
+       default 12 if PAGE_SIZE_4KB
+       default 13 if PAGE_SIZE_8KB
+       default 14 if PAGE_SIZE_16KB
+       default 15 if PAGE_SIZE_32KB
+       default 16 if PAGE_SIZE_64KB
+       default 18 if PAGE_SIZE_256KB
 
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
index 1235a71c6abe96564059010e214f87304d7d4e8c..52869e68f833c4d8f7cefdcefeadba9b8b78f87a 100644 (file)
        bus-width = <4>;
        no-1-8-v;
        no-sdio;
-       no-emmc;
+       no-mmc;
        status = "okay";
 };
 
index ba7231b364bb8c76296e953bbfa450bc49c1293a..7bab113ca6da79ed3941e7d6550fecfd31687f25 100644 (file)
                                remote-endpoint = <&mipi_from_sensor>;
                                clock-lanes = <0>;
                                data-lanes = <1>;
+                               link-frequencies = /bits/ 64 <330000000>;
                        };
                };
        };
index 31755a378c7364b5b5a055fa59b8796600a898a9..ff2a4a4d822047168008446e6c1835bd7358e789 100644 (file)
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
 static struct gpiod_lookup_table tusb_gpio_table = {
        .dev_id = "musb-tusb",
        .table = {
-               GPIO_LOOKUP("gpio-0-15", 0, "enable",
-                           GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("gpio-48-63", 10, "int",
-                           GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -140,12 +138,11 @@ static int slot1_cover_open;
 static int slot2_cover_open;
 static struct device *mmc_device;
 
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
        .dev_id = "mmci-omap.0",
        .table = {
                /* Slot switch, GPIO 96 */
-               GPIO_LOOKUP("gpio-80-111", 16,
-                           "switch", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
 static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
        .dev_id = "mmci-omap.0",
        .table = {
+               /* Slot switch, GPIO 96 */
+               GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
                /* Slot index 1, VSD power, GPIO 23 */
-               GPIO_LOOKUP_IDX("gpio-16-31", 7,
-                               "vsd", 1, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
                /* Slot index 1, VIO power, GPIO 9 */
-               GPIO_LOOKUP_IDX("gpio-0-15", 9,
-                               "vio", 1, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
 
 static void __init n8x0_mmc_init(void)
 {
-       gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
        if (board_is_n810()) {
                mmc1_data.slots[0].name = "external";
 
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
                mmc1_data.slots[1].name = "internal";
                mmc1_data.slots[1].ban_openended = 1;
                gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+       } else {
+               gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
        }
 
        mmc1_data.nr_slots = 2;
index 3c42240e78e245fe54ab5c637d9fa071dc2c0b34..4aaf5a0c1ed8af6f7f845be079c9297f35d2d72b 100644 (file)
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
                fsl,usbphy = <&usbphy1>;
                fsl,usbmisc = <&usbmisc1 0>;
-               clocks = <&usb2_lpcg 0>;
+               clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
                ahb-burst-config = <0x0>;
                tx-burst-size-dword = <0x10>;
                rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
        usbphy1: usbphy@5b100000 {
                compatible = "fsl,imx7ulp-usbphy";
                reg = <0x5b100000 0x1000>;
-               clocks = <&usb2_lpcg 1>;
+               clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
                power-domains = <&pd IMX_SC_R_USB_0_PHY>;
                status = "disabled";
        };
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b010000 0x10000>;
                clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc0_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_0>;
                status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b020000 0x10000>;
                clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc1_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc1_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_1>;
                fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b030000 0x10000>;
                clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc2_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc2_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_2>;
                status = "disabled";
index cab3468b1875ee885f32a842f92d56cc0b744998..f7a91d43a0ffe10e85e2b1e71ff6751c314b6ef7 100644 (file)
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi0_lpcg 0>,
-                        <&spi0_lpcg 1>;
+               clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+                        <&spi0_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi1_lpcg 0>,
-                        <&spi1_lpcg 1>;
+               clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+                        <&spi1_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi2_lpcg 0>,
-                        <&spi2_lpcg 1>;
+               clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+                        <&spi2_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi3_lpcg 0>,
-                        <&spi3_lpcg 1>;
+               clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+                        <&spi3_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
                compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
                reg = <0x5a190000 0x1000>;
                interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&adma_pwm_lpcg 1>,
-                        <&adma_pwm_lpcg 0>;
+               clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+                        <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a880000 0x10000>;
                interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&adc0_lpcg 0>,
-                        <&adc0_lpcg 1>;
+               clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+                        <&adc0_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a890000 0x10000>;
                interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&adc1_lpcg 0>,
-                        <&adc1_lpcg 1>;
+               clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+                        <&adc1_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a8d0000 0x10000>;
                interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
                 * CAN1 shares CAN0's clock and to enable CAN0's clock it
                 * has to be powered on.
                 */
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
                 * CAN2 shares CAN0's clock and to enable CAN0's clock it
                 * has to be powered on.
                 */
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
index 7e510b21bbac555b38cede99f97b4edc177bf520..764c1a08e3b118841299d99a5cecb29a095e2f66 100644 (file)
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d000000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm0_lpcg 4>,
-                        <&pwm0_lpcg 1>;
+               clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm0_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d010000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm1_lpcg 4>,
-                        <&pwm1_lpcg 1>;
+               clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm1_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d020000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm2_lpcg 4>,
-                        <&pwm2_lpcg 1>;
+               clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm2_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d030000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm3_lpcg 4>,
-                        <&pwm3_lpcg 1>;
+               clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm3_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
index 41c79d2ebdd6201dc10278204c064a4c01c71709..f24b14744799e16bb1145738bfb18fd8343c00ee 100644 (file)
@@ -14,6 +14,7 @@
                pinctrl-0 = <&pinctrl_usbcon1>;
                type = "micro";
                label = "otg";
+               vbus-supply = <&reg_usb1_vbus>;
                id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
 
                port {
 };
 
 &usb3_phy0 {
-       vbus-supply = <&reg_usb1_vbus>;
        status = "okay";
 };
 
index d5c400b355af564123497cd1805e0b0ad56ded21..f5491a608b2f3793ca410871fda7e5005db661e1 100644 (file)
@@ -14,6 +14,7 @@
                pinctrl-0 = <&pinctrl_usbcon1>;
                type = "micro";
                label = "otg";
+               vbus-supply = <&reg_usb1_vbus>;
                id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
 
                port {
 };
 
 &usb3_phy0 {
-       vbus-supply = <&reg_usb1_vbus>;
        status = "okay";
 };
 
index 11626fae5f97f3a9b2c94528d1957fdc73f9aac8..aa9f28c4431d0249cce852026eda7a9a7cad3ff0 100644 (file)
 };
 
 &flexcan2 {
-       clocks = <&can1_lpcg 1>,
-                <&can1_lpcg 0>;
+       clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+                <&can1_lpcg IMX_LPCG_CLK_0>;
        assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
        fsl,clk-source = /bits/ 8 <1>;
 };
 
 &flexcan3 {
-       clocks = <&can2_lpcg 1>,
-                <&can2_lpcg 0>;
+       clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+                <&can2_lpcg IMX_LPCG_CLK_0>;
        assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
        fsl,clk-source = /bits/ 8 <1>;
 };
index 49a70f8c3cab22b758dd290a9fab2374a62abae9..b6aeb1f70e2a038ac2eb3bfe6c402bd37b4dcd6a 100644 (file)
                #size-cells = <2>;
                dma-coherent;
 
+               isa@18000000 {
+                       compatible = "isa";
+                       #size-cells = <1>;
+                       #address-cells = <2>;
+                       ranges = <1 0x0 0x0 0x18000000 0x4000>;
+               };
+
                liointc0: interrupt-controller@1fe01400 {
                        compatible = "loongson,liointc-2.0";
                        reg = <0x0 0x1fe01400 0x0 0x40>,
index dca91caf895e3cd9e428e75b91da9392bfb49d82..74b99bd234cc38df9a087915280e86ddb5bd56d4 100644 (file)
 
 &gmac0 {
        status = "okay";
+
+       phy-mode = "gmii";
+       phy-handle = <&phy0>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy0: ethernet-phy@0 {
+                       reg = <2>;
+               };
+       };
 };
 
 &gmac1 {
        status = "okay";
+
+       phy-mode = "gmii";
+       phy-handle = <&phy1>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy1: ethernet-phy@1 {
+                       reg = <2>;
+               };
+       };
 };
 
 &gmac2 {
        status = "okay";
+
+       phy-mode = "rgmii";
+       phy-handle = <&phy2>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy2: ethernet-phy@2 {
+                       reg = <0>;
+               };
+       };
 };
index a231949b5f553a3814f48f6875e65ac2ed73d09a..9eab2d02cbe8bff12a26ce11dd7ac1543b7c1f82 100644 (file)
                #address-cells = <2>;
                #size-cells = <2>;
 
+               isa@18400000 {
+                       compatible = "isa";
+                       #size-cells = <1>;
+                       #address-cells = <2>;
+                       ranges = <1 0x0 0x0 0x18400000 0x4000>;
+               };
+
                pmc: power-management@100d0000 {
                        compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
                        reg = <0x0 0x100d0000 0x0 0x58>;
                msi: msi-controller@1fe01140 {
                        compatible = "loongson,pch-msi-1.0";
                        reg = <0x0 0x1fe01140 0x0 0x8>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
                        msi-controller;
                        loongson,msi-base-vec = <64>;
                        loongson,msi-num-vecs = <192>;
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
+                       msi-parent = <&msi>;
                        bus-range = <0x0 0xff>;
-                       ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+                       ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
                                 <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
 
                        gmac0: ethernet@3,0 {
                                reg = <0x1800 0x0 0x0 0x0 0x0>;
-                               interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+                                            <13 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
 
                        gmac1: ethernet@3,1 {
                                reg = <0x1900 0x0 0x0 0x0 0x0>;
-                               interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+                                            <15 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
 
                        gmac2: ethernet@3,2 {
                                reg = <0x1a00 0x0 0x0 0x0 0x0>;
-                               interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+                                            <18 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
index b24437e28c6eda457b2be003b51ad3809600f7cc..7bd47d65bf7a048fda5183ed6844d5dbc129b232 100644 (file)
@@ -11,6 +11,7 @@
 #define _ASM_ADDRSPACE_H
 
 #include <linux/const.h>
+#include <linux/sizes.h>
 
 #include <asm/loongarch.h>
 
index 4a8adcca329b81e4f289dd7825fb15dbf2f4f7a9..c2f9979b2979e5e92e791e3f8304975db9e929c9 100644 (file)
 #include <asm/pgtable-bits.h>
 #include <asm/string.h>
 
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page)     ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
 extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
 extern void __init early_iounmap(void __iomem *addr, unsigned long size);
 
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 
 #define __io_aw() mmiowb()
 
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) :     \
+       page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr)                                                            \
+({                                                                                     \
+       extern char *__kfence_pool;                                                     \
+       (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) :                \
+       page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
 #include <asm-generic/io.h>
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
index 6c82aea1c99398c46484a77cc28da1316799affb..a6a5760da3a3323641e3fa422f3da87cdb4b66f8 100644 (file)
@@ -16,6 +16,7 @@
 static inline bool arch_kfence_init_pool(void)
 {
        int err;
+       char *kaddr, *vaddr;
        char *kfence_pool = __kfence_pool;
        struct vm_struct *area;
 
@@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void)
                return false;
        }
 
+       kaddr = kfence_pool;
+       vaddr = __kfence_pool;
+       while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+               set_page_address(virt_to_page(kaddr), vaddr);
+               kaddr += PAGE_SIZE;
+               vaddr += PAGE_SIZE;
+       }
+
        return true;
 }
 
index 44027060c54a28bd34a80f538135491e3ebc758a..e85df33f11c77212c2e8ec8e6b3f1dbb955bc622 100644 (file)
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 struct page *dmw_virt_to_page(unsigned long kaddr);
 struct page *tlb_virt_to_page(unsigned long kaddr);
 
-#define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn)       __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr)     __phys_to_pfn(paddr)
+
+#define page_to_phys(page)     pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr)    pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page)     __va(page_to_phys(page))
+#define virt_to_page(kaddr)    phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page)                                                             \
+({                                                                                     \
+       extern char *__kfence_pool;                                                     \
+       (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page);        \
+})
 
 #define virt_to_page(kaddr)                                                            \
 ({                                                                                     \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
        dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
 })
 
+#endif
+
+#define pfn_to_virt(pfn)       page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr)     page_to_pfn(virt_to_page(kaddr))
+
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
 
index a9630a81b38abbfc575ea4174af049ccd5a9a888..89af7c12e8c08d4faab2919cf22034b5ab0f5a6b 100644 (file)
@@ -4,6 +4,7 @@
  */
 #include <linux/export.h>
 #include <linux/io.h>
+#include <linux/kfence.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr)
 {
        unsigned long vaddr = (unsigned long)kaddr;
 
+       if (is_kfence_address((void *)kaddr))
+               return 1;
+
        if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
                return 0;
 
index 2aae72e638713a658475e6fb82fc73eae0fc3469..bda018150000e66b906420ea7e3a5f79472ca352 100644 (file)
 
 struct page *dmw_virt_to_page(unsigned long kaddr)
 {
-       return pfn_to_page(virt_to_pfn(kaddr));
+       return phys_to_page(__pa(kaddr));
 }
 EXPORT_SYMBOL(dmw_virt_to_page);
 
 struct page *tlb_virt_to_page(unsigned long kaddr)
 {
-       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+       return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
 }
 EXPORT_SYMBOL(tlb_virt_to_page);
 
index d14d0e37ad02ddf10b42cfed590c65f97f8de424..4a2b40ce39e0911d74806b2db54d69a9735d33ef 100644 (file)
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
 #define exception_ip(regs) exception_ip(regs)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
 extern void die(const char *, struct pt_regs *) __noreturn;
index d1b11f66f748f06483edbc08e48d1b4e5e684156..cb1045ebab0621ad2c8c59eaebe96b13d47e4514 100644 (file)
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
        OFFSET(TI_CPU, thread_info, cpu);
        OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
        OFFSET(TI_REGS, thread_info, regs);
+       OFFSET(TI_SYSCALL, thread_info, syscall);
        DEFINE(_THREAD_SIZE, THREAD_SIZE);
        DEFINE(_THREAD_MASK, THREAD_MASK);
        DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
index 59288c13b581b89ccb46214c7be02126a017dab2..61503a36067e9ef15c2ff7598256c6fd1de6ac8d 100644 (file)
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
  * Notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
 {
        user_exit();
 
-       current_thread_info()->syscall = syscall;
-
        if (test_thread_flag(TIF_SYSCALL_TRACE)) {
                if (ptrace_report_syscall_entry(regs))
                        return -1;
-               syscall = current_thread_info()->syscall;
        }
 
 #ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
                struct seccomp_data sd;
                unsigned long args[6];
 
-               sd.nr = syscall;
+               sd.nr = current_thread_info()->syscall;
                sd.arch = syscall_get_arch(current);
                syscall_get_arguments(current, regs, args);
                for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
                ret = __secure_computing(&sd);
                if (ret == -1)
                        return ret;
-               syscall = current_thread_info()->syscall;
        }
 #endif
 
        if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
                trace_sys_enter(regs, regs->regs[2]);
 
-       audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+       audit_syscall_entry(current_thread_info()->syscall,
+                           regs->regs[4], regs->regs[5],
                            regs->regs[6], regs->regs[7]);
 
        /*
         * Negative syscall numbers are mistaken for rejected syscalls, but
         * won't have had the return value set appropriately, so we do so now.
         */
-       if (syscall < 0)
+       if (current_thread_info()->syscall < 0)
                syscall_set_return_value(current, regs, -ENOSYS, 0);
-       return syscall;
+       return current_thread_info()->syscall;
 }
 
 /*
index 18dc9b34505614d2bc84767479a3e9972c1ba8ad..2c604717e63080b1c1949a080bfadf1cab94acd6 100644 (file)
@@ -77,6 +77,18 @@ loads_done:
        PTR_WD  load_a7, bad_stack_a7
        .previous
 
+       /*
+        * syscall number is in v0 unless we called syscall(__NR_###)
+        * where the real syscall number is in a0
+        */
+       subu    t2, v0,  __NR_O32_Linux
+       bnez    t2, 1f /* __NR_syscall at offset 0 */
+       LONG_S  a0, TI_SYSCALL($28)     # Save a0 as syscall number
+       b       2f
+1:
+       LONG_S  v0, TI_SYSCALL($28)     # Save v0 as syscall number
+2:
+
        lw      t0, TI_FLAGS($28)       # syscall tracing enabled?
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        and     t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
 
-       /*
-        * syscall number is in v0 unless we called syscall(__NR_###)
-        * where the real syscall number is in a0
-        */
-       move    a1, v0
-       subu    t2, v0,  __NR_O32_Linux
-       bnez    t2, 1f /* __NR_syscall at offset 0 */
-       lw      a1, PT_R4(sp)
-
-1:     jal     syscall_trace_enter
+       jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
 
index 97456b2ca7dc32f13cac9a5843a3adea89735318..97788859238c344a64d1f75f2fdd6c2a4bc58006 100644 (file)
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
 
        sd      a3, PT_R26(sp)          # save a3 for syscall restarting
 
+       LONG_S  v0, TI_SYSCALL($28)     # Store syscall number
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
 n32_syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
-       move    a1, v0
        jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
index e6264aa62e457f02b8a50df8b266a58b8361717d..be11ea5cc67e043c8a20fe0fecb4a0414b589ee9 100644 (file)
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
 
        sd      a3, PT_R26(sp)          # save a3 for syscall restarting
 
+       LONG_S  v0, TI_SYSCALL($28)     # Store syscall number
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
 syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
-       move    a1, v0
        jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
index d3c2616cba22690bffd63b4521dc0f0ea7216315..7a5abb73e53127876af7e9d5f13dae2f8b08c3e8 100644 (file)
@@ -79,6 +79,22 @@ loads_done:
        PTR_WD  load_a7, bad_stack_a7
        .previous
 
+       /*
+        * absolute syscall number is in v0 unless we called syscall(__NR_###)
+        * where the real syscall number is in a0
+        * note: NR_syscall is the first O32 syscall but the macro is
+        * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+        * therefore __NR_O32_Linux is used (4000)
+        */
+
+       subu    t2, v0,  __NR_O32_Linux
+       bnez    t2, 1f /* __NR_syscall at offset 0 */
+       LONG_S  a0, TI_SYSCALL($28)     # Save a0 as syscall number
+       b       2f
+1:
+       LONG_S  v0, TI_SYSCALL($28)     # Save v0 as syscall number
+2:
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
        sd      a7, PT_R11(sp)          # For indirect syscalls
 
        move    a0, sp
-       /*
-        * absolute syscall number is in v0 unless we called syscall(__NR_###)
-        * where the real syscall number is in a0
-        * note: NR_syscall is the first O32 syscall but the macro is
-        * only defined when compiling with -mabi=32 (CONFIG_32BIT)
-        * therefore __NR_O32_Linux is used (4000)
-        */
-       .set    push
-       .set    reorder
-       subu    t1, v0,  __NR_O32_Linux
-       move    a1, v0
-       bnez    t1, 1f /* __NR_syscall at offset 0 */
-       ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
-       .set    pop
-
-1:     jal     syscall_trace_enter
+       jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
 
index 3dc85638bc63b7d96eb3ebc25c558cff967395c6..6a1e0fbbaa15b325c898548afe1189905d4b0520 100644 (file)
@@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler)
        mvc     __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
        stctg   %c1,%c1,__PT_CR1(%r11)
 #if IS_ENABLED(CONFIG_KVM)
-       lg      %r12,__LC_GMAP
+       ltg     %r12,__LC_GMAP
+       jz      5f
        clc     __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
        jne     5f
        BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
index 4fff6ed46e902cfbe723cf5ed5ce517e2d131891..4474bf32d0a4970daec7fad3f12f8aa4a9e43871 100644 (file)
@@ -2633,6 +2633,16 @@ config MITIGATION_RFDS
          stored in floating point, vector and integer registers.
          See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
 
+config MITIGATION_SPECTRE_BHI
+       bool "Mitigate Spectre-BHB (Branch History Injection)"
+       depends on CPU_SUP_INTEL
+       default y
+       help
+         Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+         where the branch history buffer is poisoned to speculatively steer
+         indirect branches.
+         See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
 endif
 
 config ARCH_HAS_ADD_PAGES
index 6356060caaf311af8370ccaeb69aab85847b62d1..6de50b80702e61087c432faf580a0ab063d22507 100644 (file)
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
 
        if (likely(unr < NR_syscalls)) {
                unr = array_index_nospec(unr, NR_syscalls);
-               regs->ax = sys_call_table[unr](regs);
+               regs->ax = x64_sys_call(regs, unr);
                return true;
        }
        return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 
        if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
                xnr = array_index_nospec(xnr, X32_NR_syscalls);
-               regs->ax = x32_sys_call_table[xnr](regs);
+               regs->ax = x32_sys_call(regs, xnr);
                return true;
        }
        return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 
        if (likely(unr < IA32_NR_syscalls)) {
                unr = array_index_nospec(unr, IA32_NR_syscalls);
-               regs->ax = ia32_sys_call_table[unr](regs);
+               regs->ax = ia32_sys_call(regs, unr);
        } else if (nr != -1) {
                regs->ax = __ia32_sys_ni_syscall(regs);
        }
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
 }
 
 /**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
  *
  * This entry point can be used by 32-bit and 64-bit programs to perform
  * 32-bit system calls.  Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
  *   eax:                              system call number
  *   ebx, ecx, edx, esi, edi, ebp:     arg1 - arg 6
  */
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
 {
        int nr;
 
index 8af2a26b24f6a9783f9bb348cd67c15e1c3799c8..1b5be07f86698a3b634a0d83b6578775781d1739 100644 (file)
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
        /* clobbers %rax, make sure it is after saving the syscall nr */
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        call    do_syscall_64           /* returns with IRQs disabled */
 
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
        call    make_task_dead
 SYM_CODE_END(rewind_stack_and_make_dead)
 .popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ *    call 2
+ *      call 2
+ *        call 2
+ *          call 2
+ *           call 2
+ *           ret
+ *         ret
+ *        ret
+ *      ret
+ *    ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone.  Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+       push    %rbp
+       mov     %rsp, %rbp
+       movl    $5, %ecx
+       ANNOTATE_INTRA_FUNCTION_CALL
+       call    1f
+       jmp     5f
+       .align 64, 0xcc
+       ANNOTATE_INTRA_FUNCTION_CALL
+1:     call    2f
+       RET
+       .align 64, 0xcc
+2:     movl    $5, %eax
+3:     jmp     4f
+       nop
+4:     sub     $1, %eax
+       jnz     3b
+       sub     $1, %ecx
+       jnz     1b
+       RET
+5:     lfence
+       pop     %rbp
+       RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
index eabf48c4d4b4c30367792f5d9a0b158a9ecf8a04..c779046cc3fe792658a984648328000535812dea 100644 (file)
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
 
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        /*
         * SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
 
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        movq    %rsp, %rdi
        call    do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
        ANNOTATE_NOENDBR
        int3
 SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+       ANNOTATE_NOENDBR
+       UNWIND_HINT_FUNC
+       CLEAR_BRANCH_HISTORY
+       jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
index 8cfc9bc73e7f8b21f748367256a78df3dc5e5b4a..c2235bae17ef665098342c323a24e4b388c169cb 100644 (file)
 #include <asm/syscalls_32.h>
 #undef __SYSCALL
 
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
 #define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_32.h>
+       default: return __ia32_sys_ni_syscall(regs);
+       }
+};
index be120eec1fc9f95c69c23074bcd3fbc355b90d47..33b3f09e6f151e11faca1c9d13f0eb4917f3392b 100644 (file)
 #include <asm/syscalls_64.h>
 #undef __SYSCALL
 
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
 #define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
 };
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_64.h>
+       default: return __x64_sys_ni_syscall(regs);
+       }
+};
index bdd0e03a1265d23e474c5c45e1bd64e7b14b7b79..03de4a93213182c6fa5809b077a54ea51be411ea 100644 (file)
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
 
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_x32.h>
+       default: return __x64_sys_ni_syscall(regs);
+       }
 };
index 09050641ce5d3c02ad099d8faabbe5e98fe57570..5b0dd07b1ef19e915c1553eb13ca1c20ef1814ff 100644 (file)
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        while (++i < cpuc->n_events) {
                cpuc->event_list[i-1] = cpuc->event_list[i];
                cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+               cpuc->assign[i-1] = cpuc->assign[i];
        }
        cpuc->event_constraint[i-1] = NULL;
        --cpuc->n_events;
index 5fc45543e95502cf16607e69e891c6e282136b30..0569f579338b516b22fe447248ae1ae4e4880a03 100644 (file)
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
  * IPI implementation on Hyper-V.
  */
 static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
-               bool exclude_self)
+                              bool exclude_self)
 {
        struct hv_send_ipi_ex *ipi_arg;
        unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
        if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
                ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 
-               nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
-                               exclude_self ? cpu_is_self : NULL);
+               nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+                                               exclude_self ? cpu_is_self : NULL);
 
                /*
                 * 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
        }
 
        status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
-                             ipi_arg, NULL);
+                                    ipi_arg, NULL);
 
 ipi_mask_ex_done:
        local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
 }
 
 static bool __send_ipi_mask(const struct cpumask *mask, int vector,
-               bool exclude_self)
+                           bool exclude_self)
 {
        int cur_cpu, vcpu, this_cpu = smp_processor_id();
        struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
                        return false;
        }
 
-       if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+       if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
                return false;
 
        /*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
        }
 
        status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
-                                    ipi_arg.cpu_mask);
+                                       ipi_arg.cpu_mask);
        return hv_result_success(status);
 
 do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
                        return false;
        }
 
-       if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+       if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
                return false;
 
        if (vp >= 64)
index 68a0843d4750f765b50dd303c82bc445f442646e..3fa1f2ee7b0d0630df03675bddfdad0c40ad411d 100644 (file)
@@ -3,7 +3,6 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/clockchips.h>
-#include <linux/acpi.h>
 #include <linux/hyperv.h>
 #include <linux/slab.h>
 #include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
 
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 {
-       struct hv_add_logical_processor_in *input;
-       struct hv_add_logical_processor_out *output;
+       struct hv_input_add_logical_processor *input;
+       struct hv_output_add_logical_processor *output;
        u64 status;
        unsigned long flags;
        int ret = HV_STATUS_SUCCESS;
-       int pxm = node_to_pxm(node);
 
        /*
         * When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 
                input->lp_index = lp_index;
                input->apic_id = apic_id;
-               input->flags = 0;
-               input->proximity_domain_info.domain_id = pxm;
-               input->proximity_domain_info.flags.reserved = 0;
-               input->proximity_domain_info.flags.proximity_info_valid = 1;
-               input->proximity_domain_info.flags.proximity_preferred = 1;
+               input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
                status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
                                         input, output);
                local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
        u64 status;
        unsigned long irq_flags;
        int ret = HV_STATUS_SUCCESS;
-       int pxm = node_to_pxm(node);
 
        /* Root VPs don't seem to need pages deposited */
        if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
                input->vp_index = vp_index;
                input->flags = flags;
                input->subnode_type = HvSubnodeAny;
-               if (node != NUMA_NO_NODE) {
-                       input->proximity_domain_info.domain_id = pxm;
-                       input->proximity_domain_info.flags.reserved = 0;
-                       input->proximity_domain_info.flags.proximity_info_valid = 1;
-                       input->proximity_domain_info.flags.proximity_preferred = 1;
-               } else {
-                       input->proximity_domain_info.as_uint64 = 0;
-               }
+               input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
                status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
                local_irq_restore(irq_flags);
 
index 94ce0f7c9d3a26cd2b766a60042a0b941b3fe0d2..e6ab0cf15ed573b3acfd5fce79bc20cfce7c493a 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/mpspec.h>
 #include <asm/msr.h>
 #include <asm/hardirq.h>
+#include <asm/io.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3     1
 
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
 
 static inline u32 native_apic_mem_read(u32 reg)
 {
-       return *((volatile u32 *)(APIC_BASE + reg));
+       return readl((void __iomem *)(APIC_BASE + reg));
 }
 
 static inline void native_apic_mem_eoi(void)
index a38f8f9ba65729125234814c08547498e4e3b8bc..3c7434329661c66e7c34283f0a3f2c59a87f8044 100644 (file)
 
 /*
  * Extended auxiliary flags: Linux defined - for features scattered in various
- * CPUID levels like 0x80000022, etc.
+ * CPUID levels like 0x80000022, etc and Linux defined features.
  *
  * Reuse free bits when adding new feature flags!
  */
 #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP     (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL           (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW       (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
 
 /*
  * BUG word(s)
 #define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
 #define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #define X86_BUG_RFDS                   X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI                    X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
 #endif /* _ASM_X86_CPUFEATURES_H */
index 05956bd8bacf50e35f463c13720a38735fe8b1b5..e72c2b87295799af9d44eb84f59d095f4f90acfd 100644 (file)
 #define SPEC_CTRL_SSBD                 BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
 #define SPEC_CTRL_RRSBA_DIS_S_SHIFT    6          /* Disable RRSBA behavior */
 #define SPEC_CTRL_RRSBA_DIS_S          BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT      10         /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S            BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
 
 /* A mask for bits which the kernel toggles when controlling mitigations */
 #define SPEC_CTRL_MITIGATIONS_MASK     (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
-                                                       | SPEC_CTRL_RRSBA_DIS_S)
+                                                       | SPEC_CTRL_RRSBA_DIS_S \
+                                                       | SPEC_CTRL_BHI_DIS_S)
 
 #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
                                                 * are restricted to targets in
                                                 * kernel.
                                                 */
+#define ARCH_CAP_BHI_NO                        BIT(20) /*
+                                                * CPU is not affected by Branch
+                                                * History Injection.
+                                                */
 #define ARCH_CAP_PBRSB_NO              BIT(24) /*
                                                 * Not susceptible to Post-Barrier
                                                 * Return Stack Buffer Predictions.
index 170c89ed22fcd3a27106d166a9e7f5a5d1fadf80..ff5f1ecc7d1e6512fcc34f4a6e5df5976e9087f0 100644 (file)
        ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
 .endm
 
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+       ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+       ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE                                        \
@@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
 extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
 
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
 extern void (*x86_return_thunk)(void);
 
 extern void __warn_thunk(void);
index f44e2f9ab65d779f35bac9c5e58dd8b694778efc..2fc7bc3863ff6f7a932ac2ee05682a2ba71f3308 100644 (file)
 #include <asm/thread_info.h>   /* for TS_COMPAT */
 #include <asm/unistd.h>
 
+/* This is used purely for kernel/trace/trace_syscalls.c */
 typedef long (*sys_call_ptr_t)(const struct pt_regs *);
 extern const sys_call_ptr_t sys_call_table[];
 
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
 /*
  * These may not exist, but still put the prototypes in so we
  * can use IS_ENABLED().
  */
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
 
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 }
 
 bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
 
 #endif /* CONFIG_X86_32 */
 
index a42d8a6f7149588bc74213268733003bf7ccf470..c342c4aa9c6848c607238dad1ff07105737d5873 100644 (file)
@@ -1687,11 +1687,11 @@ static int x2apic_state;
 
 static bool x2apic_hw_locked(void)
 {
-       u64 ia32_cap;
+       u64 x86_arch_cap_msr;
        u64 msr;
 
-       ia32_cap = x86_read_arch_cap_msr();
-       if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+       x86_arch_cap_msr = x86_read_arch_cap_msr();
+       if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
                rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
                return (msr & LEGACY_XAPIC_DISABLED);
        }
index 9bf17c9c29dad2e3f3c38c07253accc667cadea3..cb9eece55904d049edc600960bdaa0db58765459 100644 (file)
@@ -535,7 +535,6 @@ clear_sev:
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
-       u64 value;
        u32 dummy;
 
        if (c->x86 >= 0xf)
@@ -603,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 
        early_detect_mem_encrypt(c);
 
-       /* Re-enable TopologyExtensions if switched off by BIOS */
-       if (c->x86 == 0x15 &&
-           (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
-           !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
-               if (msr_set_bit(0xc0011005, 54) > 0) {
-                       rdmsrl(0xc0011005, value);
-                       if (value & BIT_64(54)) {
-                               set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-                               pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
-                       }
-               }
-       }
-
        if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
                if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
                        setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
index e7ba936d798b8198f5837118d5bb33d40389ccc7..ca295b0c1eeee05b812c27bb88bd814dba3c1f00 100644 (file)
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
 u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
 EXPORT_SYMBOL_GPL(x86_pred_cmd);
 
+static u64 __ro_after_init x86_arch_cap_msr;
+
 static DEFINE_MUTEX(spec_ctrl_mutex);
 
 void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
                x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
        }
 
+       x86_arch_cap_msr = x86_read_arch_cap_msr();
+
        /* Select the proper CPU mitigations before patching alternatives: */
        spectre_v1_select_mitigation();
        spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
 
 static void __init taa_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_TAA)) {
                taa_mitigation = TAA_MITIGATION_OFF;
                return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
         * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
         * update is required.
         */
-       ia32_cap = x86_read_arch_cap_msr();
-       if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
-           !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+       if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+           !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
                taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
 
        /*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
 
 static void __init mmio_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
             boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
             cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
        if (mmio_mitigation == MMIO_MITIGATION_OFF)
                return;
 
-       ia32_cap = x86_read_arch_cap_msr();
-
        /*
         * Enable CPU buffer clear mitigation for host and VMM, if also affected
         * by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
         * be propagated to uncore buffers, clearing the Fill buffers on idle
         * is required irrespective of SMT state.
         */
-       if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+       if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
                static_branch_enable(&mds_idle_clear);
 
        /*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
         * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
         * affected systems.
         */
-       if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+       if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
            (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
             boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
-            !(ia32_cap & ARCH_CAP_MDS_NO)))
+            !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
                mmio_mitigation = MMIO_MITIGATION_VERW;
        else
                mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
        if (rfds_mitigation == RFDS_MITIGATION_OFF)
                return;
 
-       if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
                setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
        else
                rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
 
 static void __init srbds_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_SRBDS))
                return;
 
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
         * are only exposed to SRBDS when TSX is enabled or when CPU is affected
         * by Processor MMIO Stale Data vulnerability.
         */
-       ia32_cap = x86_read_arch_cap_msr();
-       if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+       if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
            !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
                srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
        else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
        /* Will verify below that mitigation _can_ be disabled */
 
        /* No microcode */
-       if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+       if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
                if (gds_mitigation == GDS_MITIGATION_FORCE) {
                        /*
                         * This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
        return SPECTRE_V2_RETPOLINE;
 }
 
+static bool __ro_after_init rrsba_disabled;
+
 /* Disable in-kernel use of non-RSB RET predictors */
 static void __init spec_ctrl_disable_kernel_rrsba(void)
 {
-       u64 ia32_cap;
+       if (rrsba_disabled)
+               return;
 
-       if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+       if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+               rrsba_disabled = true;
                return;
+       }
 
-       ia32_cap = x86_read_arch_cap_msr();
+       if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+               return;
 
-       if (ia32_cap & ARCH_CAP_RRSBA) {
-               x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
-               update_spec_ctrl(x86_spec_ctrl_base);
-       }
+       x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+       update_spec_ctrl(x86_spec_ctrl_base);
+       rrsba_disabled = true;
 }
 
 static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,73 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
        dump_stack();
 }
 
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+               return false;
+
+       x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+       update_spec_ctrl(x86_spec_ctrl_base);
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+       return true;
+}
+
+enum bhi_mitigations {
+       BHI_MITIGATION_OFF,
+       BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+       IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off"))
+               bhi_mitigation = BHI_MITIGATION_OFF;
+       else if (!strcmp(str, "on"))
+               bhi_mitigation = BHI_MITIGATION_ON;
+       else
+               pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+       return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+       if (bhi_mitigation == BHI_MITIGATION_OFF)
+               return;
+
+       /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+       if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+               spec_ctrl_disable_kernel_rrsba();
+               if (rrsba_disabled)
+                       return;
+       }
+
+       if (spec_ctrl_bhi_dis())
+               return;
+
+       if (!IS_ENABLED(CONFIG_X86_64))
+               return;
+
+       /* Mitigate KVM by default */
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+       pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+       /* Mitigate syscalls when the mitigation is forced =on */
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+       pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
        enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1784,9 @@ static void __init spectre_v2_select_mitigation(void)
            mode == SPECTRE_V2_RETPOLINE)
                spec_ctrl_disable_kernel_rrsba();
 
+       if (boot_cpu_has(X86_BUG_BHI))
+               bhi_select_mitigation();
+
        spectre_v2_enabled = mode;
        pr_info("%s\n", spectre_v2_strings[mode]);
 
@@ -1832,8 +1901,6 @@ static void update_indir_branch_cond(void)
 /* Update the static key controlling the MDS CPU buffer clear in idle */
 static void update_mds_branch_idle(void)
 {
-       u64 ia32_cap = x86_read_arch_cap_msr();
-
        /*
         * Enable the idle clearing if SMT is active on CPUs which are
         * affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1915,7 @@ static void update_mds_branch_idle(void)
        if (sched_smt_active()) {
                static_branch_enable(&mds_idle_clear);
        } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
-                  (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+                  (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
                static_branch_disable(&mds_idle_clear);
        }
 }
@@ -2695,15 +2762,15 @@ static char *stibp_state(void)
 
        switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
-               return ", STIBP: disabled";
+               return "; STIBP: disabled";
        case SPECTRE_V2_USER_STRICT:
-               return ", STIBP: forced";
+               return "; STIBP: forced";
        case SPECTRE_V2_USER_STRICT_PREFERRED:
-               return ", STIBP: always-on";
+               return "; STIBP: always-on";
        case SPECTRE_V2_USER_PRCTL:
        case SPECTRE_V2_USER_SECCOMP:
                if (static_key_enabled(&switch_to_cond_stibp))
-                       return ", STIBP: conditional";
+                       return "; STIBP: conditional";
        }
        return "";
 }
@@ -2712,10 +2779,10 @@ static char *ibpb_state(void)
 {
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                if (static_key_enabled(&switch_mm_always_ibpb))
-                       return ", IBPB: always-on";
+                       return "; IBPB: always-on";
                if (static_key_enabled(&switch_mm_cond_ibpb))
-                       return ", IBPB: conditional";
-               return ", IBPB: disabled";
+                       return "; IBPB: conditional";
+               return "; IBPB: disabled";
        }
        return "";
 }
@@ -2725,14 +2792,30 @@ static char *pbrsb_eibrs_state(void)
        if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
                if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
                    boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
-                       return ", PBRSB-eIBRS: SW sequence";
+                       return "; PBRSB-eIBRS: SW sequence";
                else
-                       return ", PBRSB-eIBRS: Vulnerable";
+                       return "; PBRSB-eIBRS: Vulnerable";
        } else {
-               return ", PBRSB-eIBRS: Not affected";
+               return "; PBRSB-eIBRS: Not affected";
        }
 }
 
+static const char *spectre_bhi_state(void)
+{
+       if (!boot_cpu_has_bug(X86_BUG_BHI))
+               return "; BHI: Not affected";
+       else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+               return "; BHI: BHI_DIS_S";
+       else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+               return "; BHI: SW loop, KVM: SW loop";
+       else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
+               return "; BHI: Retpoline";
+       else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+               return "; BHI: Vulnerable, KVM: SW loop";
+
+       return "; BHI: Vulnerable";
+}
+
 static ssize_t spectre_v2_show_state(char *buf)
 {
        if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2828,15 @@ static ssize_t spectre_v2_show_state(char *buf)
            spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
                return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
 
-       return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+       return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
                          spectre_v2_strings[spectre_v2_enabled],
                          ibpb_state(),
-                         boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+                         boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
                          stibp_state(),
-                         boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+                         boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
                          pbrsb_eibrs_state(),
+                         spectre_bhi_state(),
+                         /* this should always be at the end */
                          spectre_v2_module_string());
 }
 
index 5c1e6d6be267af3e7b489e9f71937e7be6b25448..605c26c009c8ac61c8560231ea6b35d2381ff2aa 100644 (file)
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define NO_SPECTRE_V2          BIT(8)
 #define NO_MMIO                        BIT(9)
 #define NO_EIBRS_PBRSB         BIT(10)
+#define NO_BHI                 BIT(11)
 
 #define VULNWL(vendor, family, model, whitelist)       \
        X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
        VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
 
        /* AMD Family 0xf - 0x12 */
-       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
 
        /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
-       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
 
        /* Zhaoxin Family 7 */
-       VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
-       VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+       VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+       VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
        {}
 };
 
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
 
 u64 x86_read_arch_cap_msr(void)
 {
-       u64 ia32_cap = 0;
+       u64 x86_arch_cap_msr = 0;
 
        if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
 
-       return ia32_cap;
+       return x86_arch_cap_msr;
 }
 
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
 {
-       return (ia32_cap & ARCH_CAP_FBSDP_NO &&
-               ia32_cap & ARCH_CAP_PSDP_NO &&
-               ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+       return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+               x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+               x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
 }
 
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
 {
        /* The "immunity" bit trumps everything else: */
-       if (ia32_cap & ARCH_CAP_RFDS_NO)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
                return false;
 
        /*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
         * indicate that mitigation is needed because guest is running on a
         * vulnerable hardware or may migrate to such hardware:
         */
-       if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
                return true;
 
        /* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
 
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
-       u64 ia32_cap = x86_read_arch_cap_msr();
+       u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
 
        /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
        if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
-           !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+           !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
                setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
 
        if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
                setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
        if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
-           !(ia32_cap & ARCH_CAP_SSB_NO) &&
+           !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
           !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
                setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * Don't use AutoIBRS when SNP is enabled because it degrades host
         * userspace indirect branch performance.
         */
-       if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+       if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
            (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
             !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
                setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
                if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
-                   !(ia32_cap & ARCH_CAP_PBRSB_NO))
+                   !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
                        setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
        }
 
        if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
-           !(ia32_cap & ARCH_CAP_MDS_NO)) {
+           !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
                setup_force_cpu_bug(X86_BUG_MDS);
                if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
                        setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * TSX_CTRL check alone is not sufficient for cases when the microcode
         * update is not present or running as guest that don't get TSX_CTRL.
         */
-       if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+       if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
            (cpu_has(c, X86_FEATURE_RTM) ||
-            (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+            (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
                setup_force_cpu_bug(X86_BUG_TAA);
 
        /*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
         * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
         */
-       if (!arch_cap_mmio_immune(ia32_cap)) {
+       if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
                if (cpu_matches(cpu_vuln_blacklist, MMIO))
                        setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
                else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
        }
 
        if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
-               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
                        setup_force_cpu_bug(X86_BUG_RETBLEED);
        }
 
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
         * which means that AVX will be disabled.
         */
-       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
            boot_cpu_has(X86_FEATURE_AVX))
                setup_force_cpu_bug(X86_BUG_GDS);
 
-       if (vulnerable_to_rfds(ia32_cap))
+       if (vulnerable_to_rfds(x86_arch_cap_msr))
                setup_force_cpu_bug(X86_BUG_RFDS);
 
+       /* When virtualized, eIBRS could be hidden, assume vulnerable */
+       if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+           !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+           (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+            boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+               setup_force_cpu_bug(X86_BUG_BHI);
+
        if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
                return;
 
        /* Rogue Data Cache Load? No! */
-       if (ia32_cap & ARCH_CAP_RDCL_NO)
+       if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
                return;
 
        setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
index a515328d9d7d88b802f588bf678d098e0ba53b86..af5aa2c754c22226080870967d6c410067c86447 100644 (file)
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
        { X86_FEATURE_INTEL_PPIN,       CPUID_EBX,  0, 0x00000007, 1 },
        { X86_FEATURE_RRSBA_CTRL,       CPUID_EDX,  2, 0x00000007, 2 },
+       { X86_FEATURE_BHI_CTRL,         CPUID_EDX,  4, 0x00000007, 2 },
        { X86_FEATURE_CQM_LLC,          CPUID_EDX,  1, 0x0000000f, 0 },
        { X86_FEATURE_CQM_OCCUP_LLC,    CPUID_EDX,  0, 0x0000000f, 1 },
        { X86_FEATURE_CQM_MBM_TOTAL,    CPUID_EDX,  1, 0x0000000f, 1 },
index aaca8d235dc2bbee08ab6de2bdb91b231963a7f6..d17c9b71eb4a253eac42acee5a49f3811c83aaff 100644 (file)
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
        early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
        early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
 #endif
-       set_cpu_possible(cpu, true);
        set_cpu_present(cpu, true);
 }
 
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
                topo_info.nr_disabled_cpus++;
        }
 
-       /* Register present and possible CPUs in the domain maps */
+       /*
+        * Register present and possible CPUs in the domain
+        * maps. cpu_possible_map will be updated in
+        * topology_init_possible_cpus() after enumeration is done.
+        */
        for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
                set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
 }
index 1a8b3ad493afef8eeeea65fe5dba8673517f1240..a7aa6eff4ae5ba26206208479f7530721eebda2d 100644 (file)
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
        if (!sft)
                sft = get_count_order(ecx.cpu_nthreads + 1);
 
-       topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+       /*
+        * cpu_nthreads describes the number of threads in the package
+        * sft is the number of APIC ID bits per package
+        *
+        * As the number of actual threads per core is not described in
+        * this leaf, just set the CORE domain shift and let the later
+        * parsers set SMT shift. Assume one thread per core by default
+        * which is correct if there are no other CPUID leafs to parse.
+        */
+       topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+       topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
        return true;
 }
 
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
 {
        /*
         * Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
        tscan->c->topo.initial_apicid = leaf.ext_apic_id;
 
        /*
-        * If leaf 0xb is available, then SMT shift is set already. If not
-        * take it from ecx.threads_per_core and use topo_update_dom() -
-        * topology_set_dom() would propagate and overwrite the already
-        * propagated CORE level.
+        * If leaf 0xb is available, then the domain shifts are set
+        * already and nothing to do here.
         */
        if (!has_0xb) {
+               /*
+                * Leaf 0x80000008 set the CORE domain shift already.
+                * Update the SMT domain, but do not propagate it.
+                */
                unsigned int nthreads = leaf.core_nthreads + 1;
 
                topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
 
 static bool parse_fam10h_node_id(struct topo_scan *tscan)
 {
-       struct {
-               union {
+       union {
+               struct {
                        u64     node_id         :  3,
                                nodes_per_pkg   :  3,
                                unused          : 58;
-                       u64     msr;
                };
+               u64             msr;
        } nid;
 
        if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
        tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
 }
 
+static void topoext_fixup(struct topo_scan *tscan)
+{
+       struct cpuinfo_x86 *c = tscan->c;
+       u64 msrval;
+
+       /* Try to re-enable TopologyExtensions if switched off by BIOS */
+       if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+           c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+               return;
+
+       if (msr_set_bit(0xc0011005, 54) <= 0)
+               return;
+
+       rdmsrl(0xc0011005, msrval);
+       if (msrval & BIT_64(54)) {
+               set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+               pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+       }
+}
+
 static void parse_topology_amd(struct topo_scan *tscan)
 {
        bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
 void cpu_parse_topology_amd(struct topo_scan *tscan)
 {
        tscan->amd_nodes_per_pkg = 1;
+       topoext_fixup(tscan);
        parse_topology_amd(tscan);
 
        if (tscan->amd_nodes_per_pkg > 1)
index 58ac8d69c94bd124001861a1b2e06de8f3fd41b8..2f4e155080badc5efdbcc93fbc909c5bbcf70094 100644 (file)
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
 #define X86_FEATURE_IPRED_CTRL         KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
 #define KVM_X86_FEATURE_RRSBA_CTRL     KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
 #define X86_FEATURE_DDPD_U             KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL           KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL       KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
 #define X86_FEATURE_MCDT_NO            KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
 
 /* CPUID level 0x80000007 (EDX). */
@@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
        KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
        KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
        KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+       KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
        default:
                return x86_feature;
        }
index 2bfbf758d06110f49c71a22c1f54da9d9499669a..f6986dee6f8c7c52622857f131adf766d1528121 100644 (file)
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
 
        call vmx_spec_ctrl_restore_host
 
+       CLEAR_BRANCH_HISTORY_VMEXIT
+
        /* Put return value in AX */
        mov %_ASM_BX, %_ASM_AX
 
index 47d9f03b7778373393b9853fe32b153dadd9de29..984ea2089efc3132154527508976879a4e11cb10 100644 (file)
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
         ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
         ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
         ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
-        ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+        ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
 
 static u64 kvm_get_arch_capabilities(void)
 {
index bdbb557feb5a0ec949e7ac8cde0e87b6d4055f5b..059467086b13123b26630c1e84942980f3001216 100644 (file)
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
        return 0;
 }
 
+void blkg_init_queue(struct request_queue *q)
+{
+       INIT_LIST_HEAD(&q->blkg_list);
+       mutex_init(&q->blkcg_mutex);
+}
+
 int blkcg_init_disk(struct gendisk *disk)
 {
        struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
        bool preloaded;
        int ret;
 
-       INIT_LIST_HEAD(&q->blkg_list);
-       mutex_init(&q->blkcg_mutex);
-
        new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
        if (!new_blkg)
                return -ENOMEM;
index 78b74106bf10c5cbadd655e2da6b2f21416c0622..90b3959d88cfa4a13026b7262001dd1cb030dcf5 100644 (file)
@@ -189,6 +189,7 @@ struct blkcg_policy {
 extern struct blkcg blkcg_root;
 extern bool blkcg_debug_stats;
 
+void blkg_init_queue(struct request_queue *q);
 int blkcg_init_disk(struct gendisk *disk);
 void blkcg_exit_disk(struct gendisk *disk);
 
@@ -482,6 +483,7 @@ struct blkcg {
 };
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
 static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
 static inline void blkcg_exit_disk(struct gendisk *disk) { }
 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
index a16b5abdbbf56f44611d34fd238c0ee3a00d72f5..b795ac177281ad7adec63528d53def2fff1139a5 100644 (file)
@@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
        init_waitqueue_head(&q->mq_freeze_wq);
        mutex_init(&q->mq_freeze_lock);
 
+       blkg_init_queue(q);
+
        /*
         * Init percpu_ref in atomic mode so that it's faster to shutdown.
         * See blk_register_queue() for details.
@@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
        if (unlikely(!rq_list_empty(plug->cached_rq)))
                blk_mq_free_plug_rqs(plug);
 
+       plug->cur_ktime = 0;
        current->flags &= ~PF_BLOCK_TS;
 }
 
index 9a85bfbbc45a018e941cd0b778ab612a54cdea09..baa20c85799d54a86df05aa412c2e38849a800b4 100644 (file)
@@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
 {
        struct ioc *ioc = iocg->ioc;
        struct blkcg_gq *blkg = iocg_to_blkg(iocg);
-       u64 tdelta, delay, new_delay;
+       u64 tdelta, delay, new_delay, shift;
        s64 vover, vover_pct;
        u32 hwa;
 
@@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
 
        /* calculate the current delay in effect - 1/2 every second */
        tdelta = now->now - iocg->delay_at;
-       if (iocg->delay)
-               delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+       shift = div64_u64(tdelta, USEC_PER_SEC);
+       if (iocg->delay && shift < BITS_PER_LONG)
+               delay = iocg->delay >> shift;
        else
                delay = 0;
 
index cdbaef159c4bc3e2f713ac8541a36450271678e7..d2731843f2fccb481eda94e1a1dc980051d2486a 100644 (file)
@@ -182,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim)
                return -EINVAL;
 
        /*
-        * Devices that require a virtual boundary do not support scatter/gather
-        * I/O natively, but instead require a descriptor list entry for each
-        * page (which might not be identical to the Linux PAGE_SIZE).  Because
-        * of that they are not limited by our notion of "segment size".
+        * Stacking device may have both virtual boundary and max segment
+        * size limit, so allow this setting now, and long-term the two
+        * might need to move out of stacking limits since we have immutable
+        * bvec and lower layer bio splitting is supposed to handle the two
+        * correctly.
         */
-       if (lim->virt_boundary_mask) {
-               if (WARN_ON_ONCE(lim->max_segment_size &&
-                                lim->max_segment_size != UINT_MAX))
-                       return -EINVAL;
-               lim->max_segment_size = UINT_MAX;
-       } else {
+       if (!lim->virt_boundary_mask) {
                /*
                 * The maximum segment size has an odd historic 64k default that
                 * drivers probably should override.  Just like the I/O size we
index 39f6d1b98fd6a50d5d9df2defe305a23b36f9bcf..51d3f1a55d024cf5600ebd833bdf8ef5ee0627c1 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/firmware.h>
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
        return 0;
 }
 
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
-       int ret;
-
-       ret = ivpu_rpm_get_if_active(vdev);
-       if (ret < 0)
-               return ret;
-
-       *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
-       if (ret)
-               ivpu_rpm_put(vdev);
-
-       return 0;
-}
-
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
        struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
                args->value = vdev->platform;
                break;
        case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-               ret = ivpu_get_core_clock_rate(vdev, &args->value);
+               args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
                break;
        case DRM_IVPU_PARAM_NUM_CONTEXTS:
                args->value = ivpu_get_context_count(vdev);
@@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev)
 {
        int ret;
 
-       ivpu_prepare_for_reset(vdev);
+       /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+       pci_save_state(to_pci_dev(vdev->drm.dev));
 
        ret = ivpu_hw_power_down(vdev);
        if (ret)
                ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
 
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
        return ret;
 }
 
@@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
        vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
        vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
        atomic64_set(&vdev->unique_id_counter, 0);
-       xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+       xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
        xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
        xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
        lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
@@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
        /* Power up early so the rest of init code can access VPU registers */
        ret = ivpu_hw_power_up(vdev);
        if (ret)
-               goto err_power_down;
+               goto err_shutdown;
 
        ret = ivpu_mmu_global_context_init(vdev);
        if (ret)
-               goto err_power_down;
+               goto err_shutdown;
 
        ret = ivpu_mmu_init(vdev);
        if (ret)
@@ -601,10 +588,8 @@ err_mmu_rctx_fini:
        ivpu_mmu_reserved_context_fini(vdev);
 err_mmu_gctx_fini:
        ivpu_mmu_global_context_fini(vdev);
-err_power_down:
-       ivpu_hw_power_down(vdev);
-       if (IVPU_WA(d3hot_after_power_off))
-               pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+       ivpu_shutdown(vdev);
 err_xa_destroy:
        xa_destroy(&vdev->db_xa);
        xa_destroy(&vdev->submitted_jobs_xa);
@@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
 static void ivpu_dev_fini(struct ivpu_device *vdev)
 {
        ivpu_pm_disable(vdev);
+       ivpu_prepare_for_reset(vdev);
        ivpu_shutdown(vdev);
-       if (IVPU_WA(d3hot_after_power_off))
-               pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
 
        ivpu_jobs_abort_all(vdev);
        ivpu_job_done_consumer_fini(vdev);
index 7be0500d9bb8919574b02066b8389c56c6c83f05..bb4374d0eaecc9a25d2f6b28056aa5d8d762bd15 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #ifndef __IVPU_DRV_H__
@@ -90,7 +90,6 @@
 struct ivpu_wa_table {
        bool punit_disabled;
        bool clear_runtime_mem;
-       bool d3hot_after_power_off;
        bool interrupt_clear_with_0;
        bool disable_clock_relinquish;
        bool disable_d0i3_msg;
index b2909168a0a6902b4fb061910796ac19d5caf6e1..094c659d2800b127bf1c616e34973673c1f55061 100644 (file)
@@ -21,6 +21,7 @@ struct ivpu_hw_ops {
        u32 (*profiling_freq_get)(struct ivpu_device *vdev);
        void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
        u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+       u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
        u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
        u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
        u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
        return vdev->hw->ops->reg_pll_freq_get(vdev);
 };
 
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+       return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
 static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
        return vdev->hw->ops->reg_telemetry_offset_get(vdev);
index 9a0c9498baba293cece13e9584f21f7b2067c681..bd25e2d9fb0f45a35d9ef9ca7ca16f14aa151521 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include "ivpu_drv.h"
@@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 {
        vdev->wa.punit_disabled = false;
        vdev->wa.clear_runtime_mem = false;
-       vdev->wa.d3hot_after_power_off = true;
 
        REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
        if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
@@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 
        IVPU_PRINT_WA(punit_disabled);
        IVPU_PRINT_WA(clear_runtime_mem);
-       IVPU_PRINT_WA(d3hot_after_power_off);
        IVPU_PRINT_WA(interrupt_clear_with_0);
 }
 
@@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena
        /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
 }
 
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
 {
        u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
        u32 cpu_clock;
 
-       if ((config & 0xff) == PLL_RATIO_4_3)
+       if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
                cpu_clock = pll_clock * 2 / 4;
        else
                cpu_clock = pll_clock * 2 / 5;
@@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
        if (!ivpu_is_silicon(vdev))
                return PLL_SIMULATION_FREQ;
 
-       return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+       return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
 }
 
 static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
        .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
        .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
        .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+       .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
        .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
        .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
        .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
index e4eddbf5d11c250bb8ddd2a27843242166896217..b0b88d4c89264a0a95f18edc9b140d720c89279d 100644 (file)
@@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
        return PLL_RATIO_TO_FREQ(pll_curr_ratio);
 }
 
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+       return PLL_RATIO_TO_FREQ(ratio);
+}
+
 static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
        return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
        .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
        .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
        .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+       .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
        .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
        .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
        .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
index 04ac4b9840fbe56341e1552c2783715a83b58e7c..56ff067f63e29559d2e0605645c97bb1a0391142 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/genalloc.h>
@@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
        spin_lock_init(&ipc->cons_lock);
        INIT_LIST_HEAD(&ipc->cons_list);
        INIT_LIST_HEAD(&ipc->cb_msg_list);
-       drmm_mutex_init(&vdev->drm, &ipc->lock);
+       ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+       if (ret) {
+               ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+               goto err_free_rx;
+       }
        ivpu_ipc_reset(vdev);
        return 0;
 
index 91bd640655ab363b51df17a25cb9589293adc804..2e46b322c4505ea5f18997d0ef969f43239f72c8 100644 (file)
@@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd)
        case IVPU_MMU_EVT_F_VMS_FETCH:
                return "Fetch of VMS caused external abort";
        default:
-               return "Unknown CMDQ command";
+               return "Unknown event";
        }
 }
 
@@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
 {
        switch (err) {
        case IVPU_MMU_CERROR_NONE:
-               return "No CMDQ Error";
+               return "No error";
        case IVPU_MMU_CERROR_ILL:
                return "Illegal command";
        case IVPU_MMU_CERROR_ABT:
-               return "External abort on CMDQ read";
+               return "External abort on command queue read";
        case IVPU_MMU_CERROR_ATC_INV_SYNC:
                return "Sync failed to complete ATS invalidation";
        default:
-               return "Unknown CMDQ Error";
+               return "Unknown error";
        }
 }
 
index 7cce1c928a7f4e8386344fd81d58e7893f72c050..4f5ea466731ffe6b5b2ea178ae907274f26f5b62 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/highmem.h>
@@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev)
 {
        int ret;
 
-       /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
-       pci_save_state(to_pci_dev(vdev->drm.dev));
+       ivpu_prepare_for_reset(vdev);
 
        ret = ivpu_shutdown(vdev);
        if (ret)
-               ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
-       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+               ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
 
        return ret;
 }
@@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev)
 {
        int ret;
 
-       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
        pci_restore_state(to_pci_dev(vdev->drm.dev));
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
 
-retry:
        ret = ivpu_hw_power_up(vdev);
        if (ret) {
                ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
@@ -100,6 +97,7 @@ err_mmu_disable:
        ivpu_mmu_disable(vdev);
 err_power_down:
        ivpu_hw_power_down(vdev);
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
 
        if (!ivpu_fw_is_cold_boot(vdev)) {
                ivpu_pm_prepare_cold_boot(vdev);
index 7c157bf926956be5cabd6db7c708ff87759c7879..d1464324de9519cdb96e026f3733170788bb786d 100644 (file)
@@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
                        if (dep->honor_dep)
                                adev->flags.honor_deps = 1;
 
-                       adev->dep_unmet++;
+                       if (!dep->met)
+                               adev->dep_unmet++;
                }
        }
 }
index 562302e2e57ce5a2651575ad1620b1725d654f6a..6548f10e61d9c72ca89180e011f8e495058302a1 100644 (file)
@@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1;
 module_param(mobile_lpm_policy, int, 0644);
 MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
 
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+                "32-bits port map masks to ignore controllers ports. "
+                "Valid values are: "
+                "\"<mask>\" to apply the same mask to all AHCI controller "
+                "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+                "specify different masks for the controllers specified, "
+                "where <pci_dev> is the PCI ID of an AHCI controller in the "
+                "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+                                    struct ahci_host_priv *hpriv, char *mask_s)
+{
+       unsigned int mask;
+
+       if (kstrtouint(mask_s, 0, &mask)) {
+               dev_err(dev, "Invalid port map mask\n");
+               return;
+       }
+
+       hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+                                  struct ahci_host_priv *hpriv)
+{
+       char *param, *end, *str, *mask_s;
+       char *name;
+
+       if (!strlen(ahci_mask_port_map))
+               return;
+
+       str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+       if (!str)
+               return;
+
+       /* Handle single mask case */
+       if (!strchr(str, '=')) {
+               ahci_apply_port_map_mask(dev, hpriv, str);
+               goto free;
+       }
+
+       /*
+        * Mask list case: parse the parameter to apply the mask only if
+        * the device name matches.
+        */
+       param = str;
+       end = param + strlen(param);
+       while (param && param < end && *param) {
+               name = param;
+               param = strchr(name, '=');
+               if (!param)
+                       break;
+
+               *param = '\0';
+               param++;
+               if (param >= end)
+                       break;
+
+               if (strcmp(dev_name(dev), name) != 0) {
+                       param = strchr(param, ',');
+                       if (param)
+                               param++;
+                       continue;
+               }
+
+               mask_s = param;
+               param = strchr(mask_s, ',');
+               if (param) {
+                       *param = '\0';
+                       param++;
+               }
+
+               ahci_apply_port_map_mask(dev, hpriv, mask_s);
+       }
+
+free:
+       kfree(str);
+}
+
 static void ahci_pci_save_initial_config(struct pci_dev *pdev,
                                         struct ahci_host_priv *hpriv)
 {
@@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
                          "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
        }
 
+       /* Handle port map masks passed as module parameter. */
+       if (ahci_mask_port_map)
+               ahci_get_port_map_mask(&pdev->dev, hpriv);
+
        ahci_save_initial_config(&pdev->dev, hpriv);
 }
 
index be3412cdb22e78a1d663337698f07b07c66727e4..c449d60d9bb962c80ac7e196d08dd722d2c6950b 100644 (file)
@@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
        bool cdl_enabled;
        u64 val;
 
-       if (ata_id_major_version(dev->id) < 12)
+       if (ata_id_major_version(dev->id) < 11)
                goto not_supported;
 
        if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
index 2f4c58837641077f3ad91974cd11affbe6dcd1e8..e954976891a9f502930a3a7ffc5f31df113d2326 100644 (file)
@@ -4745,7 +4745,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                         * bail out.
                         */
                        if (ap->pflags & ATA_PFLAG_SUSPENDED)
-                               goto unlock;
+                               goto unlock_ap;
 
                        if (!sdev)
                                continue;
@@ -4758,7 +4758,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        if (do_resume) {
                                ret = scsi_resume_device(sdev);
                                if (ret == -EWOULDBLOCK)
-                                       goto unlock;
+                                       goto unlock_scan;
                                dev->flags &= ~ATA_DFLAG_RESUMING;
                        }
                        ret = scsi_rescan_device(sdev);
@@ -4766,12 +4766,13 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        spin_lock_irqsave(ap->lock, flags);
 
                        if (ret)
-                               goto unlock;
+                               goto unlock_ap;
                }
        }
 
-unlock:
+unlock_ap:
        spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
        mutex_unlock(&ap->scsi_scan_mutex);
 
        /* Reschedule with a delay if scsi_rescan_device() returned an error */
index 89ed6cd6b059ebb0af77dcc0d2b83a72fe995dc4..e9cc8b4786fbfb9eba5d3c1d8c06c3d08477a132 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/of_address.h>
 #include <linux/device.h>
 #include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheinfo.h>
 #include <asm/dma-noncoherent.h>
@@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
        return IRQ_HANDLED;
 }
 
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       unsigned long quirks;
+       int intr_num, rc;
+
+       quirks = (unsigned long)device_get_match_data(dev);
+
+       intr_num = platform_irq_count(pdev);
+       if (!intr_num)
+               return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+       for (int i = 0; i < intr_num; i++) {
+               if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+                       continue;
+
+               g_irq[i] = platform_get_irq(pdev, i);
+               if (g_irq[i] < 0)
+                       return g_irq[i];
+
+               rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+               if (rc)
+                       return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+       }
+
+       return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+       .probe  = sifive_ccache_probe,
+       .driver = {
+               .name           = "sifive_ccache",
+               .of_match_table = sifive_ccache_ids,
+       },
+};
+
 static int __init sifive_ccache_init(void)
 {
        struct device_node *np;
        struct resource res;
-       int i, rc, intr_num;
        const struct of_device_id *match;
        unsigned long quirks;
+       int rc;
 
        np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
        if (!np)
@@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void)
                goto err_unmap;
        }
 
-       intr_num = of_property_count_u32_elems(np, "interrupts");
-       if (!intr_num) {
-               pr_err("No interrupts property\n");
-               rc = -ENODEV;
-               goto err_unmap;
-       }
-
-       for (i = 0; i < intr_num; i++) {
-               g_irq[i] = irq_of_parse_and_map(np, i);
-
-               if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
-                       continue;
-
-               rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
-                                NULL);
-               if (rc) {
-                       pr_err("Could not request IRQ %d\n", g_irq[i]);
-                       goto err_free_irq;
-               }
-       }
-       of_node_put(np);
-
 #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
        if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
                riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
@@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void)
 #ifdef CONFIG_DEBUG_FS
        setup_sifive_debug();
 #endif
+
+       rc = platform_driver_register(&sifive_ccache_driver);
+       if (rc)
+               goto err_unmap;
+
+       of_node_put(np);
+
        return 0;
 
-err_free_irq:
-       while (--i >= 0)
-               free_irq(g_irq[i], NULL);
 err_unmap:
        iounmap(ccache_base);
 err_node_put:
index 456be28ba67cb476846c83c532e7bd04e521463f..2597cb43f43871dc0dc629c13b0b0ee3acf1398a 100644 (file)
@@ -702,7 +702,7 @@ static void extract_entropy(void *buf, size_t len)
 
 static void __cold _credit_init_bits(size_t bits)
 {
-       static struct execute_work set_ready;
+       static DECLARE_WORK(set_ready, crng_set_ready);
        unsigned int new, orig, add;
        unsigned long flags;
 
@@ -718,8 +718,8 @@ static void __cold _credit_init_bits(size_t bits)
 
        if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
                crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
-               if (static_key_initialized)
-                       execute_in_process_context(crng_set_ready, &set_ready);
+               if (static_key_initialized && system_unbound_wq)
+                       queue_work(system_unbound_wq, &set_ready);
                atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
                wake_up_interruptible(&crng_init_wait);
                kill_fasync(&fasync, SIGIO, POLL_IN);
@@ -890,8 +890,8 @@ void __init random_init(void)
 
        /*
         * If we were initialized by the cpu or bootloader before jump labels
-        * are initialized, then we should enable the static branch here, where
-        * it's guaranteed that jump labels have been initialized.
+        * or workqueues are initialized, then we should enable the static
+        * branch here, where it's guaranteed that these have been initialized.
         */
        if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
                crng_set_ready(NULL);
index af5cb818f84d6bf566e6c0a84763d8239d64700f..cb8c155a2c9b3dbdcbf00f198c5783b9559f8a89 100644 (file)
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
 {
        struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
        u32 uid;
-       int rc;
 
        if (kstrtou32(acpi_device_uid(hb), 0, &uid))
                return -EINVAL;
 
-       rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
-       if (rc < 0)
-               return rc;
-
-       /* Adjust back to picoseconds from nanoseconds */
-       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
-               dport->hb_coord[i].read_latency *= 1000;
-               dport->hb_coord[i].write_latency *= 1000;
-       }
-
-       return 0;
+       return acpi_get_genport_coordinates(uid, dport->coord);
 }
 
 static int add_host_bridge_dport(struct device *match, void *arg)
index eddbbe21450ca9dca5e71bf6ec14866cde0935d3..bb83867d9fec985634bb9b03652f1eaa34fc8a22 100644 (file)
 struct dsmas_entry {
        struct range dpa_range;
        u8 handle;
-       struct access_coordinate coord;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
 
        int entries;
        int qos_class;
 };
 
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+       u32 value;
+
+       /*
+        * Check for invalid and overflow values
+        */
+       if (entry == 0xffff || !entry)
+               return 0;
+       else if (base > (UINT_MAX / (entry)))
+               return 0;
+
+       /*
+        * CDAT fields follow the format of HMAT fields. See table 5 Device
+        * Scoped Latency and Bandwidth Information Structure in Coherent Device
+        * Attribute Table (CDAT) Specification v1.01.
+        */
+       value = entry * base;
+       switch (type) {
+       case ACPI_HMAT_ACCESS_LATENCY:
+       case ACPI_HMAT_READ_LATENCY:
+       case ACPI_HMAT_WRITE_LATENCY:
+               value = DIV_ROUND_UP(value, 1000);
+               break;
+       default:
+               break;
+       }
+       return value;
+}
+
 static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
                              const unsigned long end)
 {
@@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
        return 0;
 }
 
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
-                                     int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+                                       int access, unsigned int val)
 {
        switch (access) {
        case ACPI_HMAT_ACCESS_LATENCY:
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
        }
 }
 
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+                                     int access, unsigned int val)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
 static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
                               const unsigned long end)
 {
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
        __le16 le_val;
        u64 val;
        u16 len;
-       int rc;
 
        len = le16_to_cpu((__force __le16)hdr->length);
        if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
 
        le_base = (__force __le64)dslbis->entry_base_unit;
        le_val = (__force __le16)dslbis->entry[0];
-       rc = check_mul_overflow(le64_to_cpu(le_base),
-                               le16_to_cpu(le_val), &val);
-       if (rc)
-               pr_warn("DSLBIS value overflowed.\n");
+       val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+                            dslbis->data_type);
 
-       cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+       cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
 
        return 0;
 }
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
 static int cxl_port_perf_data_calculate(struct cxl_port *port,
                                        struct xarray *dsmas_xa)
 {
-       struct access_coordinate ep_c;
-       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
        struct dsmas_entry *dent;
        int valid_entries = 0;
        unsigned long index;
        int rc;
 
-       rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+       rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
        if (rc) {
                dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
                return rc;
        }
 
-       rc = cxl_hb_get_perf_coordinates(port, coord);
-       if (rc)  {
-               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
-               return rc;
-       }
-
        struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
 
        if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
        xa_for_each(dsmas_xa, index, dent) {
                int qos_class;
 
-               cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
-               /*
-                * Keeping the host bridge coordinates separate from the dsmas
-                * coordinates in order to allow calculation of access class
-                * 0 and 1 for region later.
-                */
-               cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
-                                       &coord[ACCESS_COORDINATE_CPU],
-                                       &dent->coord);
+               cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
                dent->entries = 1;
                rc = cxl_root->ops->qos_class(cxl_root,
-                                             &coord[ACCESS_COORDINATE_CPU],
+                                             &dent->coord[ACCESS_COORDINATE_CPU],
                                              1, &qos_class);
                if (rc != 1)
                        continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
 static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
                              struct cxl_dpa_perf *dpa_perf)
 {
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               dpa_perf->coord[i] = dent->coord[i];
        dpa_perf->dpa_range = dent->dpa_range;
-       dpa_perf->coord = dent->coord;
        dpa_perf->qos_class = dent->qos_class;
        dev_dbg(dev,
                "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
                dent->dpa_range.start, dpa_perf->qos_class,
-               dent->coord.read_bandwidth, dent->coord.write_bandwidth,
-               dent->coord.read_latency, dent->coord.write_latency);
+               dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+               dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+               dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+               dent->coord[ACCESS_COORDINATE_CPU].write_latency);
 }
 
 static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
 
                le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
                le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
-               if (check_mul_overflow(le64_to_cpu(le_base),
-                                      le16_to_cpu(le_val), &val))
-                       dev_warn(dev, "SSLBIS value overflowed!\n");
+               val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+                                    sslbis->data_type);
 
                xa_for_each(&port->dports, index, dport) {
                        if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
-                           dsp_id == dport->port_id)
-                               cxl_access_coordinate_set(&dport->sw_coord,
+                           dsp_id == dport->port_id) {
+                               cxl_access_coordinate_set(dport->coord,
                                                          sslbis->data_type,
                                                          val);
+                       }
                }
        }
 
@@ -493,6 +514,21 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
 
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+                                     struct access_coordinate *c1,
+                                     struct access_coordinate *c2)
+{
+               if (c1->write_bandwidth && c2->write_bandwidth)
+                       out->write_bandwidth = min(c1->write_bandwidth,
+                                                  c2->write_bandwidth);
+               out->write_latency = c1->write_latency + c2->write_latency;
+
+               if (c1->read_bandwidth && c2->read_bandwidth)
+                       out->read_bandwidth = min(c1->read_bandwidth,
+                                                 c2->read_bandwidth);
+               out->read_latency = c1->read_latency + c2->read_latency;
+}
+
 /**
  * cxl_coordinates_combine - Combine the two input coordinates
  *
@@ -504,15 +540,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
                             struct access_coordinate *c1,
                             struct access_coordinate *c2)
 {
-               if (c1->write_bandwidth && c2->write_bandwidth)
-                       out->write_bandwidth = min(c1->write_bandwidth,
-                                                  c2->write_bandwidth);
-               out->write_latency = c1->write_latency + c2->write_latency;
-
-               if (c1->read_bandwidth && c2->read_bandwidth)
-                       out->read_bandwidth = min(c1->read_bandwidth,
-                                                 c2->read_bandwidth);
-               out->read_latency = c1->read_latency + c2->read_latency;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
 }
 
 MODULE_IMPORT_NS(CXL);
@@ -521,17 +550,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
                                    struct cxl_endpoint_decoder *cxled)
 {
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
-       struct cxl_port *port = cxlmd->endpoint;
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
-       struct access_coordinate coord;
        struct range dpa = {
                        .start = cxled->dpa_res->start,
                        .end = cxled->dpa_res->end,
        };
        struct cxl_dpa_perf *perf;
-       int rc;
 
        switch (cxlr->mode) {
        case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
        if (!range_contains(&perf->dpa_range, &dpa))
                return;
 
-       rc = cxl_hb_get_perf_coordinates(port, hb_coord);
-       if (rc)  {
-               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
-               return;
-       }
-
        for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
-               /* Pickup the host bridge coords */
-               cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
                /* Get total bandwidth and the worst latency for the cxl region */
                cxlr->coord[i].read_latency = max_t(unsigned int,
                                                    cxlr->coord[i].read_latency,
-                                                   coord.read_latency);
+                                                   perf->coord[i].read_latency);
                cxlr->coord[i].write_latency = max_t(unsigned int,
                                                     cxlr->coord[i].write_latency,
-                                                    coord.write_latency);
-               cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
-               cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
-               /*
-                * Convert latency to nanosec from picosec to be consistent
-                * with the resulting latency coordinates computed by the
-                * HMAT_REPORTING code.
-                */
-               cxlr->coord[i].read_latency =
-                       DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
-               cxlr->coord[i].write_latency =
-                       DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+                                                    perf->coord[i].write_latency);
+               cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+               cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
        }
 }
 
index 9adda4795eb786b8658b573dd1e79befbad52255..f0f54aeccc872b50311a14958ddf874860af7982 100644 (file)
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
 
                payload->handles[i++] = gen->hdr.handle;
                dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
-                       le16_to_cpu(payload->handles[i]));
+                       le16_to_cpu(payload->handles[i - 1]));
 
                if (i == max_handles) {
                        payload->nr_recs = i;
@@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
                .payload_in = &log_type,
                .size_in = sizeof(log_type),
                .payload_out = payload,
-               .size_out = mds->payload_size,
                .min_out = struct_size(payload, records, 0),
        };
 
        do {
                int rc, i;
 
+               mbox_cmd.size_out = mds->payload_size;
+
                rc = cxl_internal_send_cmd(mds, &mbox_cmd);
                if (rc) {
                        dev_err_ratelimited(dev,
index 2b0cab556072f560420f7f7bf4d0bcddd0a01b4a..762783bb091afc8a40883c9ab2ee9c0f39e37219 100644 (file)
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
 }
 EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
 
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- *                              and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
-                               struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
 {
-       struct cxl_port *iter = port;
-       struct cxl_dport *dport;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               c[i].write_latency += latency;
+               c[i].read_latency += latency;
+       }
+}
 
-       if (!is_cxl_endpoint(port))
-               return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               if (c[i].read_bandwidth && c[i].write_bandwidth &&
+                   c[i].read_latency && c[i].write_latency)
+                       continue;
+               return false;
+       }
 
-       dport = iter->parent_dport;
-       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               iter = to_cxl_port(iter->dev.parent);
-               dport = iter->parent_dport;
+       return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+               c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
        }
+}
 
-       coord[ACCESS_COORDINATE_LOCAL] =
-               dport->hb_coord[ACCESS_COORDINATE_LOCAL];
-       coord[ACCESS_COORDINATE_CPU] =
-               dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+                                  struct access_coordinate *in)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               out[i] = in[i];
+}
 
-       return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+       return is_cxl_root(to_cxl_port(port->dev.parent));
 }
 
 /**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                      struct access_coordinate *coord)
 {
-       struct access_coordinate c = {
-               .read_bandwidth = UINT_MAX,
-               .write_bandwidth = UINT_MAX,
+       struct access_coordinate c[] = {
+               {
+                       .read_bandwidth = UINT_MAX,
+                       .write_bandwidth = UINT_MAX,
+               },
+               {
+                       .read_bandwidth = UINT_MAX,
+                       .write_bandwidth = UINT_MAX,
+               },
        };
        struct cxl_port *iter = port;
        struct cxl_dport *dport;
        struct pci_dev *pdev;
        unsigned int bw;
+       bool is_cxl_root;
 
        if (!is_cxl_endpoint(port))
                return -EINVAL;
 
-       dport = iter->parent_dport;
-
        /*
-        * Exit the loop when the parent port of the current port is cxl root.
-        * The iterative loop starts at the endpoint and gathers the
-        * latency of the CXL link from the current iter to the next downstream
-        * port each iteration. If the parent is cxl root then there is
-        * nothing to gather.
+        * Exit the loop when the parent port of the current iter port is cxl
+        * root. The iterative loop starts at the endpoint and gathers the
+        * latency of the CXL link from the current device/port to the connected
+        * downstream port each iteration.
         */
-       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               cxl_coordinates_combine(&c, &c, &dport->sw_coord);
-               c.write_latency += dport->link_latency;
-               c.read_latency += dport->link_latency;
-
-               iter = to_cxl_port(iter->dev.parent);
+       do {
                dport = iter->parent_dport;
-       }
+               iter = to_cxl_port(iter->dev.parent);
+               is_cxl_root = parent_port_is_cxl_root(iter);
+
+               /*
+                * There's no valid access_coordinate for a root port since RPs do not
+                * have CDAT and therefore needs to be skipped.
+                */
+               if (!is_cxl_root) {
+                       if (!coordinates_valid(dport->coord))
+                               return -EINVAL;
+                       cxl_coordinates_combine(c, c, dport->coord);
+               }
+               add_latency(c, dport->link_latency);
+       } while (!is_cxl_root);
+
+       dport = iter->parent_dport;
+       /* Retrieve HB coords */
+       if (!coordinates_valid(dport->coord))
+               return -EINVAL;
+       cxl_coordinates_combine(c, c, dport->coord);
 
        /* Get the calculated PCI paths bandwidth */
        pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                return -ENXIO;
        bw /= BITS_PER_BYTE;
 
-       c.write_bandwidth = min(c.write_bandwidth, bw);
-       c.read_bandwidth = min(c.read_bandwidth, bw);
-
-       *coord = c;
+       set_min_bandwidth(c, bw);
+       set_access_coordinates(coord, c);
 
        return 0;
 }
index 372786f809555f66509186c3e3476af2fad0d7f8..3c42f984eeafaa54af79ac280cd24c0df62f944f 100644 (file)
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
 static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
                                struct cxl_register_map *map)
 {
+       u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
        int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
        u64 offset = ((u64)reg_hi << 32) |
                     (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
        if (offset > pci_resource_len(pdev, bar)) {
                dev_warn(&pdev->dev,
                         "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
-                        &pdev->resource[bar], &offset, map->reg_type);
+                        &pdev->resource[bar], &offset, reg_type);
                return false;
        }
 
-       map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+       map->reg_type = reg_type;
        map->resource = pci_resource_start(pdev, bar) + offset;
        map->max_size = pci_resource_len(pdev, bar) - offset;
        return true;
index 534e25e2f0a48197a0588abd8a46d996bb333ed8..036d17db68e0068752277adf0e5b56c7b526e566 100644 (file)
@@ -663,8 +663,7 @@ struct cxl_rcrb_info {
  * @rch: Indicate whether this dport was enumerated in RCH or VH mode
  * @port: reference to cxl_port that contains this downstream port
  * @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
  * @link_latency: calculated PCIe downstream latency
  */
 struct cxl_dport {
@@ -675,8 +674,7 @@ struct cxl_dport {
        bool rch;
        struct cxl_port *port;
        struct cxl_regs regs;
-       struct access_coordinate sw_coord;
-       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
        long link_latency;
 };
 
@@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
 
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                      struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
-                               struct access_coordinate *coord);
 void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
                                    struct cxl_endpoint_decoder *cxled);
 
index 20fb3b35e89e0473ee8ad42dcd17407086fb8cdb..36cee9c30cebd20488ec5afd216187ef82497e54 100644 (file)
@@ -401,7 +401,7 @@ enum cxl_devtype {
  */
 struct cxl_dpa_perf {
        struct range dpa_range;
-       struct access_coordinate coord;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
        int qos_class;
 };
 
index f2556a8e940156bc4f9d34ae5dc92aac837b688a..9bc2e10381afd9cc6f97d6dd50510c8daa092b5b 100644 (file)
@@ -790,7 +790,7 @@ static void ffa_notification_info_get(void)
 
                        part_id = packed_id_list[ids_processed++];
 
-                       if (!ids_count[list]) { /* Global Notification */
+                       if (ids_count[list] == 1) { /* Global Notification */
                                __do_sched_recv_cb(part_id, 0, false);
                                continue;
                        }
index ea9201e7044cbdbfea4d12bb5ac2390330c5d911..1fa79bba492e880fea5af80a038eddf4cce7c003 100644 (file)
@@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
        ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
                                   POWERCAP_PAI_GET, 4, domain,
                                   &fc[POWERCAP_FC_PAI].get_addr, NULL,
-                                  &fc[POWERCAP_PAI_GET].rate_limit);
+                                  &fc[POWERCAP_FC_PAI].rate_limit);
 
        *p_fc = fc;
 }
index 350573518503355f6abaa4d24cbcac6368e8930c..130d13e9cd6beb93498469fae489b05e5ba1dfab 100644 (file)
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
        rd->raw = raw;
        filp->private_data = rd;
 
-       return 0;
+       return nonseekable_open(inode, filp);
 }
 
 static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
        .open = scmi_dbg_raw_mode_open,
        .release = scmi_dbg_raw_mode_release,
        .write = scmi_dbg_raw_mode_reset_write,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
        .read = scmi_dbg_raw_mode_message_read,
        .write = scmi_dbg_raw_mode_message_write,
        .poll = scmi_dbg_raw_mode_message_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
        .read = scmi_dbg_raw_mode_message_read,
        .write = scmi_dbg_raw_mode_message_async_write,
        .poll = scmi_dbg_raw_mode_message_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
        .release = scmi_dbg_raw_mode_release,
        .read = scmi_test_dbg_raw_mode_notif_read,
        .poll = scmi_test_dbg_raw_mode_notif_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
        .release = scmi_dbg_raw_mode_release,
        .read = scmi_test_dbg_raw_mode_errors_read,
        .poll = scmi_test_dbg_raw_mode_errors_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
index 1ee62cd58582b6496f0536fa7c45e2dc0305797f..25db014494a4de9bb8c44d0b2bd39d8786c3bb59 100644 (file)
@@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
                case 0x5e:
                        return GPIOPANELCTL;
                default:
-                       return -EOPNOTSUPP;
+                       return -ENOTSUPP;
                }
        }
 
index 5ef8af8249806aa6c1b226ed4ab9219cca91d936..c097e310c9e841044a3ef214444170721d116537 100644 (file)
@@ -529,6 +529,7 @@ static const struct of_device_id lpc32xx_gpio_of_match[] = {
        { .compatible = "nxp,lpc3220-gpio", },
        { },
 };
+MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match);
 
 static struct platform_driver lpc32xx_gpio_driver = {
        .driver         = {
index c18b6b47384f1b8b9a3a26c3ac7c5f125e82d365..94ca9d03c0949453abf3ad82e013698a7a97ffda 100644 (file)
@@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type)
        unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
 
        if (gpio >= WCOVE_GPIO_NUM)
-               return -EOPNOTSUPP;
+               return -ENOTSUPP;
 
        return reg + gpio;
 }
index 9c62552bec344e370996a028d809934e4a6f4420..b3b84647207ed47463e004e2c72745c6120857d1 100644 (file)
@@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
 extern int amdgpu_mes_kiq;
 extern int amdgpu_noretry;
 extern int amdgpu_force_asic_type;
index 0a4b09709cfb149078c6284f2a0908cbde928430..ec888fc6ead8df0ce52ec00439e5f22ca7f4e9ff 100644 (file)
@@ -819,7 +819,7 @@ retry:
 
        p->bytes_moved += ctx.bytes_moved;
        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           amdgpu_bo_in_cpu_visible_vram(bo))
+           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                p->bytes_moved_vis += ctx.bytes_moved;
 
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
index aa16d51dd8421b38a0a34fcf89263ffbf08af4fd..7753a2e64d4114a280afc99beb341f4af8f4ffac 100644 (file)
@@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                        adev->ip_blocks[i].status.hw = true;
                                }
                        }
+               } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+                                  !amdgpu_device_has_display_hardware(adev)) {
+                                       r = psp_gpu_reset(adev);
                } else {
-                       tmp = amdgpu_reset_method;
-                       /* It should do a default reset when loading or reloading the driver,
-                        * regardless of the module parameter reset_method.
-                        */
-                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-                       r = amdgpu_asic_reset(adev);
-                       amdgpu_reset_method = tmp;
-                       if (r) {
-                               dev_err(adev->dev, "asic reset on init failed\n");
-                               goto failed;
-                       }
+                               tmp = amdgpu_reset_method;
+                               /* It should do a default reset when loading or reloading the driver,
+                                * regardless of the module parameter reset_method.
+                                */
+                               amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+                               r = amdgpu_asic_reset(adev);
+                               amdgpu_reset_method = tmp;
+               }
+
+               if (r) {
+                 dev_err(adev->dev, "asic reset on init failed\n");
+                 goto failed;
                }
        }
 
index fdd36fb027ab6aa04b31c790af80596bb7da0427..ac5bf01fe8d2a9e9741d00981683b0e32b02f4eb 100644 (file)
@@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
                amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
                break;
        case IP_VERSION(14, 0, 0):
+       case IP_VERSION(14, 0, 1):
                amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
                break;
        default:
index 80b9642f2bc4f25c69e9f30c70138f073e0c6cd2..e4277298cf1aad3518025b898162ea6224e874de 100644 (file)
@@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1;
 int amdgpu_mcbp = -1;
 int amdgpu_discovery = -1;
 int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
 int amdgpu_mes_kiq;
 int amdgpu_noretry = -1;
 int amdgpu_force_asic_type = -1;
@@ -667,6 +668,15 @@ MODULE_PARM_DESC(mes,
        "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
 module_param_named(mes, amdgpu_mes, int, 0444);
 
+/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+       "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
 /**
  * DOC: mes_kiq (int)
  * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
index 4b3000c21ef2c59cba09ca39e3dc5421208049d5..e4742b65032d1dce16db69ea086c86dd4895e610 100644 (file)
@@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
                dma_fence_set_error(finished, -ECANCELED);
 
        if (finished->error < 0) {
-               DRM_INFO("Skip scheduling IBs!\n");
+               dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+                       ring->name);
        } else {
                r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
                                       &fence);
                if (r)
-                       DRM_ERROR("Error scheduling IBs (%d)\n", r);
+                       dev_err(adev->dev,
+                               "Error scheduling IBs (%d) in ring(%s)", r,
+                               ring->name);
        }
 
        job->job_run_counter++;
index a98e03e0a51f1f741895d253f896e76de29f9aec..a00cf4756ad0e2f371742e760183882773a80243 100644 (file)
@@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
 {
        int r;
 
-       r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+       if (!amdgpu_mes_log_enable)
+               return 0;
+
+       r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT,
                                    &adev->mes.event_log_gpu_obj,
                                    &adev->mes.event_log_gpu_addr,
@@ -1549,12 +1552,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
        uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
 
        seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
-                    mem, PAGE_SIZE, false);
+                    mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
 
        return 0;
 }
 
-
 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
 
 #endif
@@ -1565,7 +1567,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
 #if defined(CONFIG_DEBUG_FS)
        struct drm_minor *minor = adev_to_drm(adev)->primary;
        struct dentry *root = minor->debugfs_root;
-       if (adev->enable_mes)
+       if (adev->enable_mes && amdgpu_mes_log_enable)
                debugfs_create_file("amdgpu_mes_event_log", 0444, root,
                                    adev, &amdgpu_debugfs_mes_event_log_fops);
 
index 7d4f93fea937ae1d82ebd95af9cad8dc71586034..4c8fc3117ef8948627ef6a83cb7f603de2991662 100644 (file)
@@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level {
 
 #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
 #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
 
 struct amdgpu_mes_funcs;
 
index 010b0cb7693c9c3be5608f192cb19e583a285893..2099159a693fa02e7c508c3aecdc9f695498cecd 100644 (file)
@@ -617,8 +617,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
                return r;
 
        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           bo->tbo.resource->mem_type == TTM_PL_VRAM &&
-           amdgpu_bo_in_cpu_visible_vram(bo))
+           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
                                             ctx.bytes_moved);
        else
@@ -1272,23 +1271,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
                          struct amdgpu_mem_stats *stats)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct ttm_resource *res = bo->tbo.resource;
        uint64_t size = amdgpu_bo_size(bo);
        struct drm_gem_object *obj;
        unsigned int domain;
        bool shared;
 
        /* Abort if the BO doesn't currently have a backing store */
-       if (!bo->tbo.resource)
+       if (!res)
                return;
 
        obj = &bo->tbo.base;
        shared = drm_gem_object_is_shared_for_memory_stats(obj);
 
-       domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+       domain = amdgpu_mem_type_to_domain(res->mem_type);
        switch (domain) {
        case AMDGPU_GEM_DOMAIN_VRAM:
                stats->vram += size;
-               if (amdgpu_bo_in_cpu_visible_vram(bo))
+               if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                        stats->visible_vram += size;
                if (shared)
                        stats->vram_shared += size;
@@ -1389,10 +1390,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
        /* Remember that this BO was accessed by the CPU */
        abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
-       if (bo->resource->mem_type != TTM_PL_VRAM)
-               return 0;
-
-       if (amdgpu_bo_in_cpu_visible_vram(abo))
+       if (amdgpu_res_cpu_visible(adev, bo->resource))
                return 0;
 
        /* Can't move a pinned BO to visible VRAM */
@@ -1415,7 +1413,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
        /* this should never happen */
        if (bo->resource->mem_type == TTM_PL_VRAM &&
-           !amdgpu_bo_in_cpu_visible_vram(abo))
+           !amdgpu_res_cpu_visible(adev, bo->resource))
                return VM_FAULT_SIGBUS;
 
        ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1579,6 +1577,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
  */
 u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        struct dma_buf_attachment *attachment;
        struct dma_buf *dma_buf;
        const char *placement;
@@ -1587,10 +1586,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 
        if (dma_resv_trylock(bo->tbo.base.resv)) {
                unsigned int domain;
+
                domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
                switch (domain) {
                case AMDGPU_GEM_DOMAIN_VRAM:
-                       if (amdgpu_bo_in_cpu_visible_vram(bo))
+                       if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                                placement = "VRAM VISIBLE";
                        else
                                placement = "VRAM";
index be679c42b0b8cb5d127910803e79593910c72952..fa03d9e4874cc65b39e038014ab15fc4e58ba858 100644 (file)
@@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
        return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
 }
 
-/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       struct amdgpu_res_cursor cursor;
-
-       if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
-               return false;
-
-       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
-       while (cursor.remaining) {
-               if (cursor.start < adev->gmc.visible_vram_size)
-                       return true;
-
-               amdgpu_res_next(&cursor, cursor.size);
-       }
-
-       return false;
-}
-
 /**
  * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
  */
index fc418e670fdae27b699bdbefce8051ab128ab76c..1d71729e3f6bcef2c02f9e1ce252dc6cd6461b94 100644 (file)
@@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 
                } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
                           !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
-                          amdgpu_bo_in_cpu_visible_vram(abo)) {
+                          amdgpu_res_cpu_visible(adev, bo->resource)) {
 
                        /* Try evicting to the CPU inaccessible part of VRAM
                         * first, but only set GTT as busy placement, so this
@@ -403,40 +403,55 @@ error:
        return r;
 }
 
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
  *
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
  */
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
-                              struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+                           struct ttm_resource *res)
 {
-       u64 mem_size = (u64)mem->size;
        struct amdgpu_res_cursor cursor;
-       u64 end;
 
-       if (mem->mem_type == TTM_PL_SYSTEM ||
-           mem->mem_type == TTM_PL_TT)
+       if (!res)
+               return false;
+
+       if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+           res->mem_type == AMDGPU_PL_PREEMPT)
                return true;
-       if (mem->mem_type != TTM_PL_VRAM)
+
+       if (res->mem_type != TTM_PL_VRAM)
                return false;
 
-       amdgpu_res_first(mem, 0, mem_size, &cursor);
-       end = cursor.start + cursor.size;
+       amdgpu_res_first(res, 0, res->size, &cursor);
        while (cursor.remaining) {
+               if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
+                       return false;
                amdgpu_res_next(&cursor, cursor.size);
+       }
 
-               if (!cursor.remaining)
-                       break;
+       return true;
+}
 
-               /* ttm_resource_ioremap only supports contiguous memory */
-               if (end != cursor.start)
-                       return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+                               struct ttm_resource *mem)
+{
+       if (!amdgpu_res_cpu_visible(adev, mem))
+               return false;
 
-               end = cursor.start + cursor.size;
-       }
+       /* ttm_resource_ioremap only supports contiguous memory */
+       if (mem->mem_type == TTM_PL_VRAM &&
+           !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+               return false;
 
-       return end <= adev->gmc.visible_vram_size;
+       return true;
 }
 
 /*
@@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
        if (r) {
                /* Check that all memory is CPU accessible */
-               if (!amdgpu_mem_visible(adev, old_mem) ||
-                   !amdgpu_mem_visible(adev, new_mem)) {
+               if (!amdgpu_res_copyable(adev, old_mem) ||
+                   !amdgpu_res_copyable(adev, new_mem)) {
                        pr_err("Move buffer fallback to memcpy unavailable\n");
                        return r;
                }
@@ -557,7 +572,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                                     struct ttm_resource *mem)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       size_t bus_size = (size_t)mem->size;
 
        switch (mem->mem_type) {
        case TTM_PL_SYSTEM:
@@ -568,9 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                break;
        case TTM_PL_VRAM:
                mem->bus.offset = mem->start << PAGE_SHIFT;
-               /* check if it's visible */
-               if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
-                       return -EINVAL;
 
                if (adev->mman.aper_base_kaddr &&
                    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
index 65ec82141a8e012e8ba42b0bb627f1a4f504c465..32cf6b6f6efd96873c294648714f2c78f6ff9ec3 100644 (file)
@@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
                                      uint64_t start);
 
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+                           struct ttm_resource *res);
+
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
index 4299ce386322e7cea27232ae05a1222f62f5a850..94089069c9ada61aa61b7c2b28601b764d47c172 100644 (file)
@@ -1613,6 +1613,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
        trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+                                         struct amdgpu_bo *bo,
+                                         uint64_t saddr,
+                                         uint64_t offset,
+                                         uint64_t size)
+{
+       uint64_t tmp, lpfn;
+
+       if (saddr & AMDGPU_GPU_PAGE_MASK
+           || offset & AMDGPU_GPU_PAGE_MASK
+           || size & AMDGPU_GPU_PAGE_MASK)
+               return -EINVAL;
+
+       if (check_add_overflow(saddr, size, &tmp)
+           || check_add_overflow(offset, size, &tmp)
+           || size == 0 /* which also leads to end < begin */)
+               return -EINVAL;
+
+       /* make sure object fit at this offset */
+       if (bo && offset + size > amdgpu_bo_size(bo))
+               return -EINVAL;
+
+       /* Ensure last pfn not exceed max_pfn */
+       lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+       if (lpfn >= adev->vm_manager.max_pfn)
+               return -EINVAL;
+
+       return 0;
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -1639,21 +1670,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
        struct amdgpu_bo *bo = bo_va->base.bo;
        struct amdgpu_vm *vm = bo_va->base.vm;
        uint64_t eaddr;
+       int r;
 
-       /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
-               return -EINVAL;
-       if (saddr + size <= saddr || offset + size <= offset)
-               return -EINVAL;
-
-       /* make sure object fit at this offset */
-       eaddr = saddr + size - 1;
-       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
-           (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
-               return -EINVAL;
+       r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+       if (r)
+               return r;
 
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
        if (tmp) {
@@ -1706,17 +1730,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        uint64_t eaddr;
        int r;
 
-       /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
-               return -EINVAL;
-       if (saddr + size <= saddr || offset + size <= offset)
-               return -EINVAL;
-
-       /* make sure object fit at this offset */
-       eaddr = saddr + size - 1;
-       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
-           (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
-               return -EINVAL;
+       r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+       if (r)
+               return r;
 
        /* Allocate all the needed memory */
        mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -1730,7 +1746,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        }
 
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        mapping->start = saddr;
        mapping->last = eaddr;
@@ -1817,10 +1833,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
        struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
        LIST_HEAD(removed);
        uint64_t eaddr;
+       int r;
+
+       r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+       if (r)
+               return r;
 
-       eaddr = saddr + size - 1;
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        /* Allocate all the needed memory */
        before = kzalloc(sizeof(*before), GFP_KERNEL);
index d6f808acfb17b79d98664d0fedaa95d8e29a4270..fbb43ae7624f44ebd13ddbe5a78865ea2dba10ab 100644 (file)
@@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
        adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
 }
 
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+       return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
 static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
                             uint32_t inst_idx, struct amdgpu_ring *ring)
 {
@@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
        case AMDGPU_RING_TYPE_VCN_ENC:
        case AMDGPU_RING_TYPE_VCN_JPEG:
                ip_blk = AMDGPU_XCP_VCN;
-               if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+               if (aqua_vanjaram_xcp_vcn_shared(adev))
                        inst_mask = 1 << (inst_idx * 2);
                break;
        default:
@@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update(
 
                aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
 
-               /* VCN is shared by two partitions under CPX MODE */
+               /* VCN may be shared by two partitions under CPX MODE in certain
+                * configs.
+                */
                if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
-                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
-                       adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+                    ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+                   aqua_vanjaram_xcp_vcn_shared(adev))
                        aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
        }
 
index 1770e496c1b7ce21198fdb80d3051c4c961e9b5f..f7325b02a191f726196d4ad0ac6fa3d090ab9977 100644 (file)
@@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
                        active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
        }
 
-       active_rb_bitmap |= global_active_rb_bitmap;
+       active_rb_bitmap &= global_active_rb_bitmap;
        adev->gfx.config.backend_enable_mask = active_rb_bitmap;
        adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
 }
@@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
        /* Make sure that we can't skip the SET_Q_MODE packets when the VM
         * changed in any way.
         */
+       ring->set_q_mode_offs = 0;
        ring->set_q_mode_ptr = NULL;
 }
 
index 072c478665ade1a838f810bfadc10b32bf44a5eb..63f281a9984d986961d70511c83b6e65272979b7 100644 (file)
@@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
        mes_set_hw_res_pkt.enable_reg_active_poll = 1;
        mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
        mes_set_hw_res_pkt.oversubscription_timer = 50;
-       mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
-       mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+       if (amdgpu_mes_log_enable) {
+               mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+               mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+                                       mes->event_log_gpu_addr;
+       }
 
        return mes_v11_0_submit_pkt_and_poll_completion(mes,
                        &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
index 34237a1b1f2e45c40989c2070bdc0ae071ee0c4b..82eab49be82bb99807e5caabf5079b32dfd4cb26 100644 (file)
@@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
        u32 sdma_cntl;
 
        sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
-       switch (state) {
-       case AMDGPU_IRQ_STATE_DISABLE:
-               sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
-                                         DRAM_ECC_INT_ENABLE, 0);
-               WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
-               break;
-       /* sdma ecc interrupt is enabled by default
-        * driver doesn't need to do anything to
-        * enable the interrupt */
-       case AMDGPU_IRQ_STATE_ENABLE:
-       default:
-               break;
-       }
+       sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+                                       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+       WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
 
        return 0;
 }
index 581a3bd11481cc8d44a4f22188551d5a2803cff5..43ca63fe85ac3b0f9236a27766f3a78f42c2fbfb 100644 (file)
@@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
 {
        switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
        case IP_VERSION(11, 0, 0):
-               return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
        case IP_VERSION(11, 0, 2):
        case IP_VERSION(11, 0, 3):
-               return false;
        default:
                return true;
        }
@@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle)
                        AMD_PG_SUPPORT_VCN |
                        AMD_PG_SUPPORT_JPEG |
                        AMD_PG_SUPPORT_GFX_PG;
-               adev->external_rev_id = adev->rev_id + 0x1;
+               if (adev->rev_id == 0)
+                       adev->external_rev_id = 0x1;
+               else
+                       adev->external_rev_id = adev->rev_id + 0x10;
                break;
        case IP_VERSION(11, 5, 1):
                adev->cg_flags =
@@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle)
        return soc21_common_hw_fini(adev);
 }
 
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+       u32 sol_reg1, sol_reg2;
+
+       /* Will reset for the following suspend abort cases.
+        * 1) Only reset dGPU side.
+        * 2) S3 suspend got aborted and TOS is active.
+        */
+       if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+           !adev->suspend_complete) {
+               sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+               msleep(100);
+               sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+               return (sol_reg1 != sol_reg2);
+       }
+
+       return false;
+}
+
 static int soc21_common_resume(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (soc21_need_reset_on_resume(adev)) {
+               dev_info(adev->dev, "S3 suspend aborted, resetting...");
+               soc21_asic_reset(adev);
+       }
+
        return soc21_common_hw_init(adev);
 }
 
index 84368cf1e17535c16c031ce6677f53769f9e8f94..bd57896ab85d565770bd75484b5443de9891d601 100644 (file)
@@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
 
        WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
 
+       ring->wptr = 0;
+
        data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
        data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
        WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
index f9631f4b1a02ca5121d7b382fe128c47c7718ec5..55aa74cbc5325e23451aa255dd5ce016e0aa4df8 100644 (file)
@@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
         * nodes, but not more than args->num_of_nodes as that is
         * the amount of memory allocated by user
         */
-       pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
-                               args->num_of_nodes), GFP_KERNEL);
+       pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+                    GFP_KERNEL);
        if (!pa)
                return -ENOMEM;
 
index 041ec3de55e72f24a6cce44e8a75682bf3381531..719d6d365e15016abca596bb7d9d1994b6e54996 100644 (file)
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 {
        struct kfd_node *node;
        int i;
-       int count;
 
        if (!kfd->init_complete)
                return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
        /* for runtime suspend, skip locking kfd */
        if (!run_pm) {
                mutex_lock(&kfd_processes_mutex);
-               count = ++kfd_locked;
-               mutex_unlock(&kfd_processes_mutex);
-
                /* For first KFD device suspend all the KFD processes */
-               if (count == 1)
+               if (++kfd_locked == 1)
                        kfd_suspend_all_processes();
+               mutex_unlock(&kfd_processes_mutex);
        }
 
        for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 
 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 {
-       int ret, count, i;
+       int ret, i;
 
        if (!kfd->init_complete)
                return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
        /* for runtime resume, skip unlocking kfd */
        if (!run_pm) {
                mutex_lock(&kfd_processes_mutex);
-               count = --kfd_locked;
-               mutex_unlock(&kfd_processes_mutex);
-
-               WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
-               if (count == 0)
+               if (--kfd_locked == 0)
                        ret = kfd_resume_all_processes();
+               WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+               mutex_unlock(&kfd_processes_mutex);
        }
 
        return ret;
index f4d395e38683db7c85f3a7f5fc922e93b1222f88..0b655555e1678643fb84fa8b3e1640cd35a9a74e 100644 (file)
@@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
                while (halt_if_hws_hang)
                        schedule();
+               kfd_hws_hang(dqm);
                return -ETIME;
        }
 
index 717a60d7a4ea953b8dfc369b09d855ad74b49659..b79986412cd839bc89741a0b3bc1986daa2b10e4 100644 (file)
@@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
        mutex_lock(&kfd_processes_mutex);
 
        if (kfd_is_locked()) {
-               mutex_unlock(&kfd_processes_mutex);
                pr_debug("KFD is locked! Cannot create process");
-               return ERR_PTR(-EINVAL);
+               process = ERR_PTR(-EINVAL);
+               goto out;
        }
 
        /* A prior open of /dev/kfd could have already created the process. */
index 71d2d44681b218fc5146f3354464ba0f9c08610e..6d2f60c61decc36711953fa5b0dd67888c652a32 100644 (file)
@@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
 #define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
 MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
 
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
 /* Number of bytes in PSP header for firmware. */
 #define PSP_HEADER_BYTES 0x100
 
@@ -3044,6 +3047,10 @@ static int dm_resume(void *handle)
        /* Do mst topology probing after resuming cached state*/
        drm_connector_list_iter_begin(ddev, &iter);
        drm_for_each_connector_iter(connector, &iter) {
+
+               if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+                       continue;
+
                aconnector = to_amdgpu_dm_connector(connector);
                if (aconnector->dc_link->type != dc_connection_mst_branch ||
                    aconnector->mst_root)
@@ -4820,9 +4827,11 @@ static int dm_init_microcode(struct amdgpu_device *adev)
                fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
                break;
        case IP_VERSION(3, 5, 0):
-       case IP_VERSION(3, 5, 1):
                fw_name_dmub = FIRMWARE_DCN_35_DMUB;
                break;
+       case IP_VERSION(3, 5, 1):
+               fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+               break;
        default:
                /* ASIC doesn't support DMUB. */
                return 0;
@@ -5921,6 +5930,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
                &aconnector->base.probed_modes :
                &aconnector->base.modes;
 
+       if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+               return NULL;
+
        if (aconnector->freesync_vid_base.clock != 0)
                return &aconnector->freesync_vid_base;
 
@@ -6306,19 +6318,16 @@ create_stream_for_sink(struct drm_connector *connector,
        if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
                mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
 
-       if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+           stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+           stream->signal == SIGNAL_TYPE_EDP) {
                //
                // should decide stream support vsc sdp colorimetry capability
                // before building vsc info packet
                //
-               stream->use_vsc_sdp_for_colorimetry = false;
-               if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
-                       stream->use_vsc_sdp_for_colorimetry =
-                               aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
-               } else {
-                       if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
-                               stream->use_vsc_sdp_for_colorimetry = true;
-               }
+               stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+                                                     stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
                if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
                        tf = TRANSFER_FUNC_GAMMA_22;
                mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
@@ -8762,10 +8771,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
                if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
                        continue;
 
+notify:
                if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
                        continue;
 
-notify:
                aconnector = to_amdgpu_dm_connector(connector);
 
                mutex_lock(&adev->dm.audio_lock);
index 16e72d623630caa74e22bcb6052b162c3ff8f6c6..08c494a7a21bad10929eb3f367a465349bc9ae5c 100644 (file)
@@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
 
 static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
 {
-       struct drm_device *dev = connector->dev;
-
-       return drm_add_modes_noedid(connector, dev->mode_config.max_width,
-                                   dev->mode_config.max_height);
+       /* Maximum resolution supported by DWB */
+       return drm_add_modes_noedid(connector, 3840, 2160);
 }
 
 static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
index 12f3e8aa46d8dfae21b5dd1e9f4ef167ee314f2d..6ad4f4efec5dd3e684428a0fb5b3c7b4a5234075 100644 (file)
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
        return display_count;
 }
 
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+               bool safe_to_lower, bool disable)
 {
        struct dc *dc = clk_mgr_base->ctx->dc;
        int i;
 
        for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-               struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *pipe = safe_to_lower
+                       ? &context->res_ctx.pipe_ctx[i]
+                       : &dc->current_state->res_ctx.pipe_ctx[i];
 
                if (pipe->top_pipe || pipe->prev_odm_pipe)
                        continue;
-               if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
-                                    dc_is_virtual_signal(pipe->stream->signal))) {
+               if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+                                    !pipe->stream->link_enc)) {
                        if (disable) {
-                               pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+                               if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+                                       pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
                                reset_sync_context_for_pipe(dc, context, i);
                        } else
                                pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
        }
 
        if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-               dcn316_disable_otg_wa(clk_mgr_base, context, true);
+               dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
                clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
                dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-               dcn316_disable_otg_wa(clk_mgr_base, context, false);
+               dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
                update_dispclk = true;
        }
index 101fe96287cb480bf9ee142ceb998a84ab1027f8..d9c5692c86c21ac15b85af1ba0cae92f4274a255 100644 (file)
 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK            0x00000007L
 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK            0x000F0000L
 
+#define regCLK5_0_CLK5_spll_field_8                            0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX   0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT   0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK             0x00002000L
+
 #define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
 
 #define REG(reg_name) \
@@ -411,6 +417,17 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
 {
 }
 
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+       struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+       struct dc_context *ctx = clk_mgr->base.ctx;
+       uint32_t ssc_enable;
+
+       REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+       return ssc_enable == 1;
+}
+
 static void init_clk_states(struct clk_mgr *clk_mgr)
 {
        struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
@@ -428,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
 
 void dcn35_init_clocks(struct clk_mgr *clk_mgr)
 {
+       struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
        init_clk_states(clk_mgr);
+
+       // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+       if (dcn35_is_spll_ssc_enabled(clk_mgr))
+               clk_mgr->dp_dto_source_clock_in_khz =
+                       dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+       else
+               clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
 }
 static struct clk_bw_params dcn35_bw_params = {
        .vram_type = Ddr4MemType,
@@ -517,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks;
 
 static struct dcn35_watermarks dummy_wms = { 0 };
 
+static struct dcn35_ss_info_table ss_info_table = {
+       .ss_divider = 1000,
+       .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+       struct dc_context *ctx = clk_mgr->base.ctx;
+       uint32_t clock_source;
+
+       REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+       // If it's DFS mode, clock_source is 0.
+       if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+               clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+               if (clk_mgr->dprefclk_ss_percentage != 0) {
+                       clk_mgr->ss_on_dprefclk = true;
+                       clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+               }
+       }
+}
+
 static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
 {
        int i, num_valid_sets;
@@ -1061,6 +1109,8 @@ void dcn35_clk_mgr_construct(
        dce_clock_read_ss_info(&clk_mgr->base);
        /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
+       dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
        clk_mgr->base.base.bw_params = &dcn35_bw_params;
 
        if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
index 5cc7f8da209c599f7585e8f10e499ef2118f34ff..61986e5cb491967643b61832c8e35dd7a4818d41 100644 (file)
@@ -436,6 +436,15 @@ bool dc_state_add_plane(
                goto out;
        }
 
+       if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+               /* ODM combine could prevent us from supporting more planes
+                * we will reset ODM slice count back to 1 when all planes have
+                * been removed to maximize the amount of planes supported when
+                * new planes are added.
+                */
+               resource_update_pipes_for_stream_with_slice_count(
+                               state, dc->current_state, dc->res_pool, stream, 1);
+
        otg_master_pipe = resource_get_otg_master_for_stream(
                        &state->res_ctx, stream);
        if (otg_master_pipe)
index 970644b695cd4f1d96f166cc1786987b460cdafd..b5e0289d2fe82aed149fab851ebc1b73213406ac 100644 (file)
@@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk(
        struct bp_pixel_clock_parameters bp_pc_params = {0};
        enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
 
-       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+       // Apply ssed(spread spectrum) dpref clock for edp only.
+       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+               && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+               && encoding == DP_8b_10b_ENCODING)
                dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
        // For these signal types Driver to program DP_DTO without calling VBIOS Command table
        if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
@@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz(
        unsigned int modulo_hz = 0;
        unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
 
-       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
-               dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
        if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
                clock_hz = REG_READ(PHASE[inst]);
 
index e224a028d68accaf083a76a93eb7f0cdb940aedf..8a0460e86309775e83775093b04527f022e4a91c 100644 (file)
@@ -248,14 +248,12 @@ void dcn32_link_encoder_construct(
        enc10->base.hpd_source = init_data->hpd_source;
        enc10->base.connector = init_data->connector;
 
-       enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
-
-       enc10->base.features = *enc_features;
        if (enc10->base.connector.id == CONNECTOR_ID_USBC)
                enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+       enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+       enc10->base.features = *enc_features;
 
        enc10->base.transmitter = init_data->transmitter;
 
index 81e349d5835bbed499f03ef6eb33e5210c83d64b..da94e5309fbaf0f8e06a4a1aad4ce431a8d9f2cc 100644 (file)
@@ -184,6 +184,8 @@ void dcn35_link_encoder_construct(
        enc10->base.hpd_source = init_data->hpd_source;
        enc10->base.connector = init_data->connector;
 
+       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
        enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
 
@@ -238,8 +240,6 @@ void dcn35_link_encoder_construct(
        }
 
        enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
        if (bp_funcs->get_connector_speed_cap_info)
                result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
index f07a4c7e48bc23ed0d2351aef46ef38907ee265f..52eab8fccb7f16e9b1f02d541c030f5736b79ace 100644 (file)
@@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
                                OTG_V_TOTAL_MAX_SEL, 1,
                                OTG_FORCE_LOCK_ON_EVENT, 0,
                                OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
-               // Setup manual flow control for EOF via TRIG_A
-               optc->funcs->setup_manual_trigger(optc);
        }
 }
 
index 246b211b1e85f74d362efac0e38384a2cafb59fc..65333141b1c1b05645f9ba374896dc3ee5a682e5 100644 (file)
@@ -735,7 +735,7 @@ static int smu_early_init(void *handle)
        smu->adev = adev;
        smu->pm_enabled = !!amdgpu_dpm;
        smu->is_apu = false;
-       smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+       smu->smu_baco.state = SMU_BACO_STATE_NONE;
        smu->smu_baco.platform_support = false;
        smu->user_dpm_profile.fan_mode = -1;
 
@@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
        return 0;
 }
 
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+       int ret = 0;
+
+       if ((!adev->in_runpm) && (!adev->in_suspend) &&
+               (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+                                                                       IP_VERSION(13, 0, 10) &&
+               !amdgpu_device_has_display_hardware(adev))
+               ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+       return ret;
+}
+
 static int smu_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret;
 
        if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
                return 0;
@@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle)
 
        adev->pm.dpm_enabled = false;
 
-       return smu_smc_hw_cleanup(smu);
+       ret = smu_smc_hw_cleanup(smu);
+       if (ret)
+               return ret;
+
+       ret = smu_reset_mp1_state(smu);
+       if (ret)
+               return ret;
+
+       return 0;
 }
 
 static void smu_late_fini(void *handle)
index a870bdd49a4e3cd4741e1fe852c7a337117451fb..1fa81575788c545a39178275ab036cbe6dcdb0c9 100644 (file)
@@ -424,6 +424,7 @@ enum smu_reset_mode {
 enum smu_baco_state {
        SMU_BACO_STATE_ENTER = 0,
        SMU_BACO_STATE_EXIT,
+       SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
index 5bb7a63c0602b79012017bb9cfc7705fb581b38d..97522c0852589d63a84009a518b0af4719021ba5 100644 (file)
@@ -144,6 +144,37 @@ typedef struct {
   uint32_t MaxGfxClk;
 } DpmClocks_t;
 
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+  uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+  uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+  uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+  uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+  uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+  uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+  uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+  uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+  uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+  uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+  MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+  uint8_t  NumDcfClkLevelsEnabled;
+  uint8_t  NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+  uint8_t  NumSocClkLevelsEnabled;
+  uint8_t  Vcn0ClkLevelsEnabled;     //Applies to both Vclk0 and Dclk0
+  uint8_t  Vcn1ClkLevelsEnabled;     //Applies to both Vclk1 and Dclk1
+  uint8_t  VpeClkLevelsEnabled;
+  uint8_t  NumMemPstatesEnabled;
+  uint8_t  NumFclkLevelsEnabled;
+  uint8_t  spare;
+
+  uint32_t MinGfxClk;
+  uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
 typedef struct {
   uint16_t CoreFrequency[16];          //Target core frequency [MHz]
   uint16_t CorePower[16];              //CAC calculated core power [mW]
@@ -224,7 +255,7 @@ typedef enum {
 #define TABLE_CUSTOM_DPM            2 // Called by Driver
 #define TABLE_BIOS_GPIO_CONFIG      3 // Called by BIOS
 #define TABLE_DPMCLOCKS             4 // Called by Driver and VBIOS
-#define TABLE_SPARE0                5 // Unused
+#define TABLE_MOMENTARY_PM          5 // Called by Tools
 #define TABLE_MODERN_STDBY          6 // Called by Tools for Modern Standby Log
 #define TABLE_SMU_METRICS           7 // Called by Driver and SMF/PMF
 #define TABLE_COUNT                 8
index 356e0f57a426ffa051fb40611947d9b50355ad87..ddb62586008319ba7c95758e562ca4118ddb5f48 100644 (file)
@@ -42,7 +42,7 @@
 #define FEATURE_EDC_BIT                      7
 #define FEATURE_PLL_POWER_DOWN_BIT           8
 #define FEATURE_VDDOFF_BIT                   9
-#define FEATURE_VCN_DPM_BIT                 10
+#define FEATURE_VCN_DPM_BIT                 10   /* this is for both VCN0 and VCN1 */
 #define FEATURE_DS_MPM_BIT                  11
 #define FEATURE_FCLK_DPM_BIT                12
 #define FEATURE_SOCCLK_DPM_BIT              13
@@ -56,9 +56,9 @@
 #define FEATURE_DS_GFXCLK_BIT               21
 #define FEATURE_DS_SOCCLK_BIT               22
 #define FEATURE_DS_LCLK_BIT                 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT       24  // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT       24
 #define FEATURE_DS_SHUBCLK_BIT              25
-#define FEATURE_SPARE0_BIT                  26  //SPARE
+#define FEATURE_RESERVED0_BIT               26
 #define FEATURE_ZSTATES_BIT                 27
 #define FEATURE_IOMMUL2_PG_BIT              28
 #define FEATURE_DS_FCLK_BIT                 29
@@ -66,8 +66,8 @@
 #define FEATURE_DS_MP1CLK_BIT               31
 #define FEATURE_WHISPER_MODE_BIT            32
 #define FEATURE_SMU_LOW_POWER_BIT           33
-#define FEATURE_SMART_L3_RINSER_BIT         34
-#define FEATURE_SPARE1_BIT                  35  //SPARE
+#define FEATURE_RESERVED1_BIT               34  /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT                 35  /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
 #define FEATURE_PSI_BIT                     36
 #define FEATURE_PROCHOT_BIT                 37
 #define FEATURE_CPUOFF_BIT                  38
 #define FEATURE_PERF_LIMIT_BIT              42
 #define FEATURE_CORE_DLDO_BIT               43
 #define FEATURE_DVO_BIT                     44
-#define FEATURE_DS_VCN_BIT                  45
+#define FEATURE_DS_VCN_BIT                  45  /* v14_0_1 this is for both VCN0 and VCN1 */
 #define FEATURE_CPPC_BIT                    46
 #define FEATURE_CPPC_PREFERRED_CORES        47
 #define FEATURE_DF_CSTATES_BIT              48
-#define FEATURE_SPARE2_BIT                  49  //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT        49  /* v14_0_0 SPARE */
 #define FEATURE_ATHUB_PG_BIT                50
 #define FEATURE_VDDOFF_ECO_BIT              51
 #define FEATURE_ZSTATES_ECO_BIT             52
@@ -93,8 +93,8 @@
 #define FEATURE_DS_IPUCLK_BIT               58
 #define FEATURE_DS_VPECLK_BIT               59
 #define FEATURE_VPE_DPM_BIT                 60
-#define FEATURE_SPARE_61                    61
-#define FEATURE_FP_DIDT                     62
+#define FEATURE_SMART_L3_RINSER_BIT         61  /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT                     62  /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
 #define NUM_FEATURES                        63
 
 // Firmware Header/Footer
@@ -151,6 +151,43 @@ typedef struct {
   // MP1_EXT_SCRATCH7 = RTOS Current Job
 } FwStatus_t;
 
+typedef struct {
+  // MP1_EXT_SCRATCH0
+  uint32_t DpmHandlerID         : 8;
+  uint32_t ActivityMonitorID    : 8;
+  uint32_t DpmTimerID           : 8;
+  uint32_t DpmHubID             : 4;
+  uint32_t DpmHubTask           : 4;
+  // MP1_EXT_SCRATCH1
+  uint32_t CclkSyncStatus       : 8;
+  uint32_t ZstateStatus         : 4;
+  uint32_t Cpu1VddOff           : 4;
+  uint32_t DstateFun            : 4;
+  uint32_t DstateDev            : 4;
+  uint32_t GfxOffStatus         : 2;
+  uint32_t Cpu0Off              : 2;
+  uint32_t Cpu1Off              : 2;
+  uint32_t Cpu0VddOff           : 2;
+  // MP1_EXT_SCRATCH2
+  uint32_t P2JobHandler         :32;
+  // MP1_EXT_SCRATCH3
+  uint32_t PostCode             :32;
+  // MP1_EXT_SCRATCH4
+  uint32_t MsgPortBusy          :15;
+  uint32_t RsmuPmiP1Pending     : 1;
+  uint32_t RsmuPmiP2PendingCnt  : 8;
+  uint32_t DfCstateExitPending  : 1;
+  uint32_t Pc6EntryPending      : 1;
+  uint32_t Pc6ExitPending       : 1;
+  uint32_t WarmResetPending     : 1;
+  uint32_t Mp0ClkPending        : 1;
+  uint32_t InWhisperMode        : 1;
+  uint32_t spare2               : 2;
+  // MP1_EXT_SCRATCH5
+  uint32_t IdleMask             :32;
+  // MP1_EXT_SCRATCH6 = RTOS threads' status
+  // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
 
 #pragma pack(pop)
 
index ca7ce4251482dbdf22b5ea39a5e6ca55e763896d..c4dc5881d8df0953054cf6972d88f212e0c6872c 100644 (file)
 #define PPSMC_MSG_SetHardMinSocclkByFreq        0x13 ///< Set hard min for SOC CLK
 #define PPSMC_MSG_SetSoftMinFclk                0x14 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinVcn0                0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
-
 #define PPSMC_MSG_EnableGfxImu                  0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17                    0x17
-#define PPSMC_MSG_spare_0x18                    0x18
+#define PPSMC_MSG_spare_0x17                    0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18                    0x18 ///< Get FCLK frequency
 #define PPSMC_MSG_AllowGfxOff                   0x19 ///< Inform PMFW of allowing GFXOFF entry
 #define PPSMC_MSG_DisallowGfxOff                0x1A ///< Inform PMFW of disallowing GFXOFF entry
 #define PPSMC_MSG_SetSoftMaxGfxClk              0x1B ///< Set soft max for GFX CLK
 #define PPSMC_MSG_SetHardMinGfxClk              0x1C ///< Set hard min for GFX CLK
-
 #define PPSMC_MSG_SetSoftMaxSocclkByFreq        0x1D ///< Set soft max for SOC CLK
 #define PPSMC_MSG_SetSoftMaxFclkByFreq          0x1E ///< Set soft max for FCLK
 #define PPSMC_MSG_SetSoftMaxVcn0                0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
-#define PPSMC_MSG_spare_0x20                    0x20
+#define PPSMC_MSG_spare_0x20                    0x20 ///< Set power limit percentage
 #define PPSMC_MSG_PowerDownJpeg0                0x21 ///< Power down Jpeg of VCN0
 #define PPSMC_MSG_PowerUpJpeg0                  0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
-
 #define PPSMC_MSG_SetHardMinFclkByFreq          0x23 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinSocclkByFreq        0x24 ///< Set soft min for SOC CLK
 #define PPSMC_MSG_AllowZstates                  0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
@@ -99,8 +95,8 @@
 #define PPSMC_MSG_PowerUpIspByTile              0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
 #define PPSMC_MSG_SetHardMinIspiclkByFreq       0x2B ///< Set HardMin by frequency for ISPICLK
 #define PPSMC_MSG_SetHardMinIspxclkByFreq       0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch                0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch                  0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch                0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch                  0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
 #define PPSMC_Message_IspStutterOn_MmhubPgDis   0x2F ///< ISP StutterOn mmHub PgDis
 #define PPSMC_Message_IspStutterOff_MmhubPgEn   0x30 ///< ISP StufferOff mmHub PgEn
 #define PPSMC_MSG_PowerUpVpe                    0x31 ///< Power up VPE
 #define PPSMC_MSG_DisableLSdma                  0x35 ///< Disable LSDMA
 #define PPSMC_MSG_SetSoftMaxVpe                 0x36 ///<
 #define PPSMC_MSG_SetSoftMinVpe                 0x37 ///<
-#define PPSMC_Message_Count                     0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache                0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache              0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count                     0x3A ///< Total number of PPSMC messages
 /** @}*/
 
 /**
index 3f7463c1c1a91948588ae8ece2fd6c4cbffb1406..4af1985ae44668edf74b40c4f26dbd1bcd83c376 100644 (file)
@@ -27,6 +27,7 @@
 
 #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
 
 #define FEATURE_MASK(feature) (1ULL << feature)
index 9c03296f92cdd41c868406dfd861bf56a77c2e81..67117ced7c6ae65405fb3a5338743d31270e8cd3 100644 (file)
@@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
 
        switch (mp1_state) {
        case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_set_mp1_state(smu, mp1_state);
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                                         SMU_MSG_PrepareMp1ForUnload,
+                                                                                         0x55, NULL);
+
+               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+                       ret = smu_v13_0_disable_pmfw_state(smu);
+
                break;
        default:
                /* Ignore others */
index bb98156b2fa1d5fff3d71bcea59b2b63f9265b9e..949131bd1ecb215c960b7aabb9ad690da715d90c 100644 (file)
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;
 
-       if (!en && !adev->in_s0ix)
+       if (!en && !adev->in_s0ix) {
+               /* Adds a GFX reset as workaround just before sending the
+                * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+                * an invalid state.
+                */
+               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+                                                     SMU_RESET_MODE_2, NULL);
+               if (ret)
+                       return ret;
+
                ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+       }
 
        return ret;
 }
index 9e39f99154f94df84495dbce069e2651f2b7f104..07a65e005785d6d0fceddd2564d63e84d08e755e 100644 (file)
@@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
                smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
                break;
        case IP_VERSION(14, 0, 1):
-               smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+               smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
                break;
 
        default:
index d6de6d97286c6990e24c79b318f533168c967bd0..63399c00cc28ffaa88725068496f35625b9807cc 100644 (file)
@@ -161,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
 
        SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-       SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+       SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -171,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
                goto err0_out;
        smu_table->metrics_time = 0;
 
-       smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+       smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
        if (!smu_table->clocks_table)
                goto err1_out;
 
@@ -593,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
        return ret;
 }
 
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+                                               enum smu_clk_type clk_type,
+                                               uint32_t dpm_level,
+                                               uint32_t *freq)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       if (!clk_table || clk_type >= SMU_CLK_COUNT)
+               return -EINVAL;
+
+       switch (clk_type) {
+       case SMU_SOCCLK:
+               if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->SocClocks[dpm_level];
+               break;
+       case SMU_VCLK:
+               if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->VClocks0[dpm_level];
+               break;
+       case SMU_DCLK:
+               if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->DClocks0[dpm_level];
+               break;
+       case SMU_VCLK1:
+               if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->VClocks1[dpm_level];
+               break;
+       case SMU_DCLK1:
+               if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->DClocks1[dpm_level];
+               break;
+       case SMU_UCLK:
+       case SMU_MCLK:
+               if (dpm_level >= clk_table->NumMemPstatesEnabled)
+                       return -EINVAL;
+               *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+               break;
+       case SMU_FCLK:
+               if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->FclkClocks_Freq[dpm_level];
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
                                                enum smu_clk_type clk_type,
                                                uint32_t dpm_level,
@@ -637,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+                                               enum smu_clk_type clk_type,
+                                               uint32_t dpm_level,
+                                               uint32_t *freq)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+       return 0;
+}
+
 static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
                                                enum smu_clk_type clk_type)
 {
@@ -657,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
                break;
        case SMU_VCLK:
        case SMU_DCLK:
+       case SMU_VCLK1:
+       case SMU_DCLK1:
                feature_id = SMU_FEATURE_VCN_DPM_BIT;
                break;
        default:
@@ -666,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
        return smu_cmn_feature_is_enabled(smu, feature_id);
 }
 
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+                                                       enum smu_clk_type clk_type,
+                                                       uint32_t *min,
+                                                       uint32_t *max)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+       uint32_t clock_limit;
+       uint32_t max_dpm_level, min_dpm_level;
+       int ret = 0;
+
+       if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+               switch (clk_type) {
+               case SMU_MCLK:
+               case SMU_UCLK:
+                       clock_limit = smu->smu_table.boot_values.uclk;
+                       break;
+               case SMU_FCLK:
+                       clock_limit = smu->smu_table.boot_values.fclk;
+                       break;
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       clock_limit = smu->smu_table.boot_values.gfxclk;
+                       break;
+               case SMU_SOCCLK:
+                       clock_limit = smu->smu_table.boot_values.socclk;
+                       break;
+               case SMU_VCLK:
+               case SMU_VCLK1:
+                       clock_limit = smu->smu_table.boot_values.vclk;
+                       break;
+               case SMU_DCLK:
+               case SMU_DCLK1:
+                       clock_limit = smu->smu_table.boot_values.dclk;
+                       break;
+               default:
+                       clock_limit = 0;
+                       break;
+               }
+
+               /* clock in Mhz unit */
+               if (min)
+                       *min = clock_limit / 100;
+               if (max)
+                       *max = clock_limit / 100;
+
+               return 0;
+       }
+
+       if (max) {
+               switch (clk_type) {
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       *max = clk_table->MaxGfxClk;
+                       break;
+               case SMU_MCLK:
+               case SMU_UCLK:
+               case SMU_FCLK:
+                       max_dpm_level = 0;
+                       break;
+               case SMU_SOCCLK:
+                       max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+                       break;
+               case SMU_VCLK:
+               case SMU_DCLK:
+                       max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+                       break;
+               case SMU_VCLK1:
+               case SMU_DCLK1:
+                       max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto failed;
+               }
+
+               if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+                       if (ret)
+                               goto failed;
+               }
+       }
+
+       if (min) {
+               switch (clk_type) {
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       *min = clk_table->MinGfxClk;
+                       break;
+               case SMU_MCLK:
+               case SMU_UCLK:
+                       min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+                       break;
+               case SMU_FCLK:
+                       min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+                       break;
+               case SMU_SOCCLK:
+                       min_dpm_level = 0;
+                       break;
+               case SMU_VCLK:
+               case SMU_DCLK:
+               case SMU_VCLK1:
+               case SMU_DCLK1:
+                       min_dpm_level = 0;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto failed;
+               }
+
+               if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+                       if (ret)
+                               goto failed;
+               }
+       }
+
+failed:
+       return ret;
+}
+
 static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                                                        enum smu_clk_type clk_type,
                                                        uint32_t *min,
@@ -736,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                }
 
                if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
                        if (ret)
                                goto failed;
                }
@@ -768,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                }
 
                if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
                        if (ret)
                                goto failed;
                }
@@ -778,6 +967,19 @@ failed:
        return ret;
 }
 
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+                                                       enum smu_clk_type clk_type,
+                                                       uint32_t *min,
+                                                       uint32_t *max)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
                                            enum smu_clk_type clk_type,
                                            uint32_t *value)
@@ -811,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
        return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
 }
 
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+                                          enum smu_clk_type clk_type,
+                                          uint32_t *count)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       switch (clk_type) {
+       case SMU_SOCCLK:
+               *count = clk_table->NumSocClkLevelsEnabled;
+               break;
+       case SMU_VCLK:
+       case SMU_DCLK:
+               *count = clk_table->Vcn0ClkLevelsEnabled;
+               break;
+       case SMU_VCLK1:
+       case SMU_DCLK1:
+               *count = clk_table->Vcn1ClkLevelsEnabled;
+               break;
+       case SMU_MCLK:
+               *count = clk_table->NumMemPstatesEnabled;
+               break;
+       case SMU_FCLK:
+               *count = clk_table->NumFclkLevelsEnabled;
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
                                           enum smu_clk_type clk_type,
                                           uint32_t *count)
@@ -840,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+                                          enum smu_clk_type clk_type,
+                                          uint32_t *count)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+       return 0;
+}
+
 static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
                                        enum smu_clk_type clk_type, char *buf)
 {
@@ -866,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
        case SMU_SOCCLK:
        case SMU_VCLK:
        case SMU_DCLK:
+       case SMU_VCLK1:
+       case SMU_DCLK1:
        case SMU_MCLK:
        case SMU_FCLK:
                ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
                if (ret)
                        break;
 
-               ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+               ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
                if (ret)
                        break;
 
                for (i = 0; i < count; i++) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
                        if (ret)
                                break;
 
@@ -940,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu,
                break;
        case SMU_VCLK:
        case SMU_DCLK:
-               msg_set_min = SMU_MSG_SetHardMinVcn;
-               msg_set_max = SMU_MSG_SetSoftMaxVcn;
+               msg_set_min = SMU_MSG_SetHardMinVcn0;
+               msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+               break;
+       case SMU_VCLK1:
+       case SMU_DCLK1:
+               msg_set_min = SMU_MSG_SetHardMinVcn1;
+               msg_set_max = SMU_MSG_SetSoftMaxVcn1;
                break;
        default:
                return -EINVAL;
@@ -971,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu,
        case SMU_FCLK:
        case SMU_VCLK:
        case SMU_DCLK:
-               ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+               ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
                if (ret)
                        break;
 
-               ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+               ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
                if (ret)
                        break;
 
@@ -1000,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
 
        switch (level) {
        case AMD_DPM_FORCED_LEVEL_HIGH:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
                sclk_min = sclk_max;
                fclk_min = fclk_max;
                socclk_min = socclk_max;
                break;
        case AMD_DPM_FORCED_LEVEL_LOW:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
                sclk_max = sclk_min;
                fclk_max = fclk_min;
                socclk_max = socclk_min;
                break;
        case AMD_DPM_FORCED_LEVEL_AUTO:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
                break;
        case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
        case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -1067,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
        return ret;
 }
 
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+       smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+       smu->gfx_actual_hard_min_freq = 0;
+       smu->gfx_actual_soft_max_freq = 0;
+
+       return 0;
+}
+
 static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
 {
        DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1079,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
        return 0;
 }
 
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+       return 0;
+}
+
 static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
                                      bool enable)
 {
@@ -1095,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
                                               0, NULL);
 }
 
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+       uint8_t idx;
+
+       /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+       for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+               clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+               clock_table->SocClocks[idx].Vol = 0;
+       }
+
+       for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+               clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+               clock_table->VPEClocks[idx].Vol = 0;
+       }
+
+       return 0;
+}
+
 static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
 {
        DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1114,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_14_0_0_get_dpm_table(smu, clock_table);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_14_0_1_get_dpm_table(smu, clock_table);
+
+       return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
        .check_fw_status = smu_v14_0_check_fw_status,
        .check_fw_version = smu_v14_0_check_fw_version,
@@ -1135,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
        .set_driver_table_location = smu_v14_0_set_driver_table_location,
        .gfx_off_control = smu_v14_0_gfx_off_control,
        .mode2_reset = smu_v14_0_0_mode2_reset,
-       .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+       .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
        .od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
        .print_clk_levels = smu_v14_0_0_print_clk_levels,
        .force_clk_levels = smu_v14_0_0_force_clk_levels,
        .set_performance_level = smu_v14_0_0_set_performance_level,
-       .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+       .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
        .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
        .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
        .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
-       .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+       .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
 };
 
 static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
index ebb6d8ebd44eb6f70480b9655e6f253e41c77c04..1e9259416980ec49cce1b7fc080f562f002e29c5 100644 (file)
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
 {
        struct ast_device *ast = to_ast_device(dev);
        u8 video_on_off = on;
+       u32 i = 0;
 
        // Video On/Off
        ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
                                                ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
                        // wait 1 ms
                        mdelay(1);
+                       if (++i > 200)
+                               break;
                }
        }
 }
index 871e4e2129d6daac8dadcb3262227451c59296c8..0683a129b36285cc96c25d57d3115cb111fc2003 100644 (file)
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
        unsigned int total_modes_count = 0;
        struct drm_client_offset *offsets;
        unsigned int connector_count = 0;
+       /* points to modes protected by mode_config.mutex */
        struct drm_display_mode **modes;
        struct drm_crtc **crtcs;
        int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
                drm_client_pick_crtcs(client, connectors, connector_count,
                                      crtcs, modes, 0, width, height);
        }
-       mutex_unlock(&dev->mode_config.mutex);
 
        drm_client_modeset_release(client);
 
@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
                        modeset->y = offset->y;
                }
        }
+       mutex_unlock(&dev->mode_config.mutex);
 
        mutex_unlock(&client->modeset_mutex);
 out:
index ed89b86ea625aaa408064916b982ffa92f9ef4b5..f672bfd70d455156aed1a17c2fb4929c7771962f 100644 (file)
@@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
                intel_atomic_get_old_cdclk_state(state);
        const struct intel_cdclk_state *new_cdclk_state =
                intel_atomic_get_new_cdclk_state(state);
-       enum pipe pipe = new_cdclk_state->pipe;
+       struct intel_cdclk_config cdclk_config;
+       enum pipe pipe;
 
        if (!intel_cdclk_changed(&old_cdclk_state->actual,
                                 &new_cdclk_state->actual))
@@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
        if (IS_DG2(i915))
                intel_cdclk_pcode_pre_notify(state);
 
-       if (pipe == INVALID_PIPE ||
-           old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
-               drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+       if (new_cdclk_state->disable_pipes) {
+               cdclk_config = new_cdclk_state->actual;
+               pipe = INVALID_PIPE;
+       } else {
+               if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+                       cdclk_config = new_cdclk_state->actual;
+                       pipe = new_cdclk_state->pipe;
+               } else {
+                       cdclk_config = old_cdclk_state->actual;
+                       pipe = INVALID_PIPE;
+               }
 
-               intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+               cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+                                                old_cdclk_state->actual.voltage_level);
        }
+
+       drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+       intel_set_cdclk(i915, &cdclk_config, pipe);
 }
 
 /**
@@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
                intel_atomic_get_old_cdclk_state(state);
        const struct intel_cdclk_state *new_cdclk_state =
                intel_atomic_get_new_cdclk_state(state);
-       enum pipe pipe = new_cdclk_state->pipe;
+       enum pipe pipe;
 
        if (!intel_cdclk_changed(&old_cdclk_state->actual,
                                 &new_cdclk_state->actual))
@@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
        if (IS_DG2(i915))
                intel_cdclk_pcode_post_notify(state);
 
-       if (pipe != INVALID_PIPE &&
-           old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
-               drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+       if (!new_cdclk_state->disable_pipes &&
+           new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+               pipe = new_cdclk_state->pipe;
+       else
+               pipe = INVALID_PIPE;
+
+       drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
 
-               intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
-       }
+       intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
 }
 
 static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
@@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
                return NULL;
 
        cdclk_state->pipe = INVALID_PIPE;
+       cdclk_state->disable_pipes = false;
 
        return &cdclk_state->base;
 }
@@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
                if (ret)
                        return ret;
 
+               new_cdclk_state->disable_pipes = true;
+
                drm_dbg_kms(&dev_priv->drm,
                            "Modeset required for cdclk change\n");
        }
index 48fd7d39e0cd9c4f6d57970f35531b91aa6ab055..71bc032bfef16efd359757373f256dfc88bd86fc 100644 (file)
@@ -51,6 +51,9 @@ struct intel_cdclk_state {
 
        /* bitmask of active pipes */
        u8 active_pipes;
+
+       /* update cdclk with pipes disabled */
+       bool disable_pipes;
 };
 
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
index c587a8efeafcf5e561429d925c2893208908e03f..c17462b4c2ac1930a085eff2256f8642b9ce8830 100644 (file)
@@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
 static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
                                       const struct intel_crtc_state *crtc_state2)
 {
+       /*
+        * FIXME the modeset sequence is currently wrong and
+        * can't deal with bigjoiner + port sync at the same time.
+        */
        return crtc_state1->hw.active && crtc_state2->hw.active &&
+               !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
                crtc_state1->output_types == crtc_state2->output_types &&
                crtc_state1->output_format == crtc_state2->output_format &&
                crtc_state1->lane_count == crtc_state2->lane_count &&
index abd62bebc46d0e58d5bc78d8f4500ddcbc6098f1..e583515f9b25a33da4825d10cf42a9f73fa17990 100644 (file)
@@ -2725,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
                intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
        int pixel_clock;
 
-       if (has_seamless_m_n(connector))
+       /*
+        * FIXME all joined pipes share the same transcoder.
+        * Need to account for that when updating M/N live.
+        */
+       if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
                pipe_config->update_m_n = true;
 
        if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
index b98a87883fefb016be68ceb72a408258868b55ec..9db43bd81ce2fabe51963e129f135d3e8dd71fa7 100644 (file)
@@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector,
        u8 bcaps;
        int ret;
 
+       *hdcp_capable = false;
+       *hdcp2_capable = false;
        if (!intel_encoder_is_mst(connector->encoder))
                return -EINVAL;
 
        ret =  _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
        if (ret)
-               return ret;
+               drm_dbg_kms(&i915->drm,
+                           "HDCP2 DPCD capability read failed err: %d\n", ret);
 
        ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
        if (ret)
index b6e539f1342c29ad97f5f46de8b51d9a358375bb..aabd018bd73743ff354353506b2ce007268a88c5 100644 (file)
@@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
                return;
        }
 
+       /*
+        * FIXME figure out what is wrong with PSR+bigjoiner and
+        * fix it. Presumably something related to the fact that
+        * PSR is a transcoder level feature.
+        */
+       if (crtc_state->bigjoiner_pipes) {
+               drm_dbg_kms(&dev_priv->drm,
+                           "PSR disabled due to bigjoiner\n");
+               return;
+       }
+
        if (CAN_PANEL_REPLAY(intel_dp))
                crtc_state->has_panel_replay = true;
        else
index eb5bd0743902065d9b4bcac060d1fd340d448069..f542ee1db1d97047eedfffff76c04cbbaf3435ea 100644 (file)
@@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
        const struct drm_display_info *info = &connector->base.display_info;
        int vmin, vmax;
 
+       /*
+        * FIXME all joined pipes share the same transcoder.
+        * Need to account for that during VRR toggle/push/etc.
+        */
+       if (crtc_state->bigjoiner_pipes)
+               return;
+
        if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
                return;
 
index f3dcae4b9d455ed37d3cc3fd1635760cd9e264af..0f83c6d4376ffba646279586479f1710161d6633 100644 (file)
@@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc)
         * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
         * every possible call stack is unfeasible. It would be too intrusive to many
         * areas that really don't care about the GuC backend. However, there is the
-        * 'reset_in_progress' flag available, so just use that.
+        * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+        * So just use those. Note that testing both is required due to the hideously
+        * complex nature of the i915 driver's reset code paths.
         *
         * And note that in the case of a reset occurring during driver unload
-        * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
-        * is fine because there is another cancel in _finish (when the reset flag is
-        * not).
+        * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+        * reset flag/mutex are set) is fine because there is another explicit cancel in
+        * intel_guc_submission_fini (when the reset flag/mutex are not).
         */
-       if (guc_to_gt(guc)->uc.reset_in_progress)
+       if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+           test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
                cancel_delayed_work(&guc->timestamp.work);
        else
                cancel_delayed_work_sync(&guc->timestamp.work);
@@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
        unsigned long flags;
        ktime_t unused;
 
-       guc_cancel_busyness_worker(guc);
-
        spin_lock_irqsave(&guc->timestamp.lock, flags);
 
        guc_update_pm_timestamp(guc, &unused);
@@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
 
 void intel_guc_submission_reset_finish(struct intel_guc *guc)
 {
-       /*
-        * Ensure the busyness worker gets cancelled even on a fatal wedge.
-        * Note that reset_prepare is not allowed to because it confuses lockdep.
-        */
-       if (guc_submission_initialized(guc))
-               guc_cancel_busyness_worker(guc);
-
        /* Reset called during driver load or during wedge? */
        if (unlikely(!guc_submission_initialized(guc) ||
                     !intel_guc_is_fw_running(guc) ||
@@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
        if (!guc->submission_initialized)
                return;
 
+       guc_fini_engine_stats(guc);
        guc_flush_destroyed_contexts(guc);
        guc_lrc_desc_pool_destroy_v69(guc);
        i915_sched_engine_put(guc->sched_engine);
index 6dfe5d9456c69e06987be23367c243bb1f8f908e..399bc319180b042cdcf78e2415b16ef52d980c61 100644 (file)
@@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc)
 {
        struct intel_guc *guc = &uc->guc;
 
+       /*
+        * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+        * So this function is sometimes called with the in-progress flag not set.
+        */
        uc->reset_in_progress = false;
 
        /* Firmware expected to be running when this function is called */
index 0674aca0f8a3f593bad4dbe929be4260f5a6219a..cf0b1de1c07124d2fe45d2f7f220f5cebed71227 100644 (file)
@@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
        if (adreno_is_a618(gpu))
                gpu->ubwc_config.highest_bank_bit = 14;
 
+       if (adreno_is_a619(gpu))
+               /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+               gpu->ubwc_config.highest_bank_bit = 13;
+
        if (adreno_is_a619_holi(gpu))
                gpu->ubwc_config.highest_bank_bit = 13;
 
index 1f5245fc2cdc6ca6ffd109fa6844eda84f79cd32..a847a0f7a73c9f61fde92fcf75f36a4f37dadf07 100644 (file)
@@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu,
                        (block->type << 8) | i);
 
                in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
-                       block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+                       block->size, out);
 
                out += block->size * sizeof(u32);
        }
index 9a9f7092c526a630c8cb8099e7ae0921d6b1d3a1..a3e60ac70689e7f8af8813d978626cd7d4c9fb3e 100644 (file)
@@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = {
        },
 };
 
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
 static const struct dpu_intf_cfg x1e80100_intf[] = {
        {
                .name = "intf_0", .id = INTF_0,
@@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .name = "intf_3", .id = INTF_3,
                .base = 0x37000, .len = 0x280,
                .features = INTF_SC7280_MASK,
-               .type = INTF_DP,
-               .controller_id = MSM_DP_CONTROLLER_1,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_0,   /* pair with intf_0 for DP MST */
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
@@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .base = 0x38000, .len = 0x280,
                .features = INTF_SC7280_MASK,
                .type = INTF_DP,
-               .controller_id = MSM_DP_CONTROLLER_2,
+               .controller_id = MSM_DP_CONTROLLER_1,
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
@@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+       }, {
+               .name = "intf_6", .id = INTF_6,
+               .base = 0x3A000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_DP,
+               .controller_id = MSM_DP_CONTROLLER_2,
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+       }, {
+               .name = "intf_7", .id = INTF_7,
+               .base = 0x3b000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_2,   /* pair with intf_6 for DP MST */
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+       }, {
+               .name = "intf_8", .id = INTF_8,
+               .base = 0x3c000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_1,   /* pair with intf_4 for DP MST */
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
        },
 };
 
index ef871239adb2a37e11c6d364d85f7384403459ee..68fae048a9a837410eb6051f9af52a6e0c399585 100644 (file)
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
                        &perf->core_clk_rate);
        debugfs_create_u32("enable_bw_release", 0600, entry,
                        (u32 *)&perf->enable_bw_release);
-       debugfs_create_u32("threshold_low", 0600, entry,
+       debugfs_create_u32("threshold_low", 0400, entry,
                        (u32 *)&perf->perf_cfg->max_bw_low);
-       debugfs_create_u32("threshold_high", 0600, entry,
+       debugfs_create_u32("threshold_high", 0400, entry,
                        (u32 *)&perf->perf_cfg->max_bw_high);
-       debugfs_create_u32("min_core_ib", 0600, entry,
+       debugfs_create_u32("min_core_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_core_ib);
-       debugfs_create_u32("min_llcc_ib", 0600, entry,
+       debugfs_create_u32("min_llcc_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_llcc_ib);
-       debugfs_create_u32("min_dram_ib", 0600, entry,
+       debugfs_create_u32("min_dram_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_dram_ib);
        debugfs_create_file("perf_mode", 0600, entry,
                        (u32 *)perf, &dpu_core_perf_mode_fops);
index 946dd0135dffcf7dcd2b7f6445c62c048a044e8d..6a0a74832fb64d95adc6b0524ba15bd1faaa0bb1 100644 (file)
@@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
        int ret;
 
        if (!irq_cb) {
-               DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
-                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+               DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
                return -EINVAL;
        }
 
        if (!dpu_core_irq_is_valid(irq_idx)) {
-               DPU_ERROR("invalid IRQ=[%d, %d]\n",
-                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+               DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
                return -EINVAL;
        }
 
index c4cb82af5c2f2f77ae7c9804f4fd6a12c42d42c0..ffbfde9225898619c11b6fd3d59062ed1a65b719 100644 (file)
@@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp)
        }
 }
 
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
 {
        int rc = 0;
 
@@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev)
                drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
                                        dp->hpd_state, sink_request);
                if (sink_request & DS_PORT_STATUS_CHANGED)
-                       rc = dp_display_handle_port_ststus_changed(dp);
+                       rc = dp_display_handle_port_status_changed(dp);
                else
                        rc = dp_display_handle_irq_hpd(dp);
        }
@@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
        ret = dp_display_usbpd_configure_cb(&pdev->dev);
        if (ret) {      /* link train failed */
                dp->hpd_state = ST_DISCONNECTED;
+               pm_runtime_put_sync(&pdev->dev);
        } else {
                dp->hpd_state = ST_MAINLINK_READY;
        }
@@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
                dp_display_host_phy_exit(dp);
                dp->hpd_state = ST_DISCONNECTED;
                dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+               pm_runtime_put_sync(&pdev->dev);
                mutex_unlock(&dp->event_mutex);
                return 0;
        }
index e3f61c39df69b4c31ffae28ea7f2ecab500f8863..80166f702a0dbab3a36a489c3c853e35533b4fe2 100644 (file)
@@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
 
        for (i = 0; i < n; i++) {
                ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
-               drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+               drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
                              fb->base.id, i, msm_fb->iova[i], ret);
                if (ret)
                        return ret;
@@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
        const struct msm_format *format;
        int ret, i, n;
 
-       drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+       drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
                        mode_cmd, mode_cmd->width, mode_cmd->height,
                        (char *)&mode_cmd->pixel_format);
 
@@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
 
        refcount_set(&msm_fb->dirtyfb, 1);
 
-       drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+       drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
 
        return fb;
 
index 84c21ec2ceeae08d8506688f73acf530ef40012b..af6a6fcb11736f6dc7637805647b9c717e684a09 100644 (file)
@@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc)
        struct msm_kms *kms = priv->kms;
        if (!kms)
                return -ENXIO;
-       drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+       drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
        return vblank_ctrl_queue_work(priv, crtc, true);
 }
 
@@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc)
        struct msm_kms *kms = priv->kms;
        if (!kms)
                return;
-       drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+       drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
        vblank_ctrl_queue_work(priv, crtc, false);
 }
 
index 479effcf607e261fac73361958a0a855cf90d315..79cfab53f80e259093b7ae0f04310f6470a3c930 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include "nouveau_drv.h"
+#include "nouveau_bios.h"
 #include "nouveau_reg.h"
 #include "dispnv04/hw.h"
 #include "nouveau_encoder.h"
@@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
         */
        if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) {
                if (*conn == 0xf2005014 && *conf == 0xffffffff) {
-                       fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1);
+                       fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B);
                        return false;
                }
        }
@@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios)
 #ifdef __powerpc__
        /* Apple iMac G4 NV17 */
        if (of_machine_is_compatible("PowerMac4,5")) {
-               fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1);
-               fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2);
+               fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B);
+               fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C);
                return;
        }
 #endif
 
        /* Make up some sane defaults */
        fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG,
-                            bios->legacy.i2c_indices.crt, 1, 1);
+                            bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B);
 
        if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
                fabricate_dcb_output(dcb, DCB_OUTPUT_TV,
                                     bios->legacy.i2c_indices.tv,
-                                    all_heads, 0);
+                                    all_heads, DCB_OUTPUT_A);
 
        else if (bios->tmds.output0_script_ptr ||
                 bios->tmds.output1_script_ptr)
                fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS,
                                     bios->legacy.i2c_indices.panel,
-                                    all_heads, 1);
+                                    all_heads, DCB_OUTPUT_B);
 }
 
 static int
index 7de7707ec6a895ee2a914150008425a21041bf9c..a72c45809484ab58023dfa0dc5172f67adcfdc23 100644 (file)
@@ -225,12 +225,18 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
        u8 *dpcd = nv_encoder->dp.dpcd;
        int ret = NOUVEAU_DP_NONE, hpd;
 
-       /* If we've already read the DPCD on an eDP device, we don't need to
-        * reread it as it won't change
+       /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we
+        * haven't probed them once before.
         */
-       if (connector->connector_type == DRM_MODE_CONNECTOR_eDP &&
-           dpcd[DP_DPCD_REV] != 0)
-               return NOUVEAU_DP_SST;
+       if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+               if (connector->status == connector_status_connected)
+                       return NOUVEAU_DP_SST;
+               else if (connector->status == connector_status_disconnected)
+                       return NOUVEAU_DP_NONE;
+       }
+
+       // Ensure that the aux bus is enabled for probing
+       drm_dp_dpcd_set_powered(&nv_connector->aux, true);
 
        mutex_lock(&nv_encoder->dp.hpd_irq_lock);
        if (mstm) {
@@ -293,6 +299,13 @@ out:
        if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST)
                nv50_mstm_remove(mstm);
 
+       /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected,
+        * and since we don't really have a usecase for that anyway - just disable the aux bus here
+        * if we've decided the connector is disconnected
+        */
+       if (ret == NOUVEAU_DP_NONE)
+               drm_dp_dpcd_set_powered(&nv_connector->aux, false);
+
        mutex_unlock(&nv_encoder->dp.hpd_irq_lock);
        return ret;
 }
index 4bf486b57101367708bba2b6fe4bdd1d985f1d19..cb05f7f48a98bb53fc3e03b57166466c675acd7c 100644 (file)
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
        return ERR_PTR(-EINVAL);
 }
 
+static void of_fini(void *p)
+{
+       kfree(p);
+}
+
 const struct nvbios_source
 nvbios_of = {
        .name = "OpenFirmware",
        .init = of_init,
-       .fini = (void(*)(void *))kfree,
+       .fini = of_fini,
        .read = of_read,
        .size = of_size,
        .rw = false,
index 7bcbc4895ec22196acecfd46d0b29490d2c93ee2..271bfa038f5bc90974acd1ed2709d5cbae51ed94 100644 (file)
@@ -25,6 +25,7 @@
 
 #include <subdev/bios.h>
 #include <subdev/bios/init.h>
+#include <subdev/gsp.h>
 
 void
 gm107_devinit_disable(struct nvkm_devinit *init)
@@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
        u32 r021c00 = nvkm_rd32(device, 0x021c00);
        u32 r021c04 = nvkm_rd32(device, 0x021c04);
 
-       if (r021c00 & 0x00000001)
-               nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
-       if (r021c00 & 0x00000004)
-               nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+       /* gsp only wants to enable/disable display */
+       if (!nvkm_gsp_rm(device->gsp)) {
+               if (r021c00 & 0x00000001)
+                       nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+               if (r021c00 & 0x00000004)
+                       nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+       }
        if (r021c04 & 0x00000001)
                nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
 }
index 11b4c9c274a1a597cb3592019d873345c241d1cd..666eb93b1742ca5435cf0567e28e1664122bad8b 100644 (file)
@@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
 
        rm->dtor = r535_devinit_dtor;
        rm->post = hw->post;
+       rm->disable = hw->disable;
 
        ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
        if (ret)
index 9994cbd6f1c40c0c798498687f4f5d7168e883c5..9858c1438aa7feda7d84ff5442f611b23f101b2d 100644 (file)
@@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
        rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
 
        str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
-       strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+       strings = (char *)rpc + str_offset;
        for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
                int name_len = strlen(r535_registry_entries[i].name) + 1;
 
index a7f3fc342d87e03b031b5008d939c2eb46f49404..dd5b5a17ece0beed225888888d6c01a0afcf67c9 100644 (file)
@@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
        void __iomem *map = NULL;
 
        /* Already mapped? */
-       if (refcount_inc_not_zero(&iobj->maps))
+       if (refcount_inc_not_zero(&iobj->maps)) {
+               /* read barrier match the wmb on refcount set */
+               smp_rmb();
                return iobj->map;
+       }
 
        /* Take the lock, and re-check that another thread hasn't
         * already mapped the object in the meantime.
@@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                        iobj->base.memory.ptrs = &nv50_instobj_fast;
                else
                        iobj->base.memory.ptrs = &nv50_instobj_slow;
+               /* barrier to ensure the ptrs are written before refcount is set */
+               smp_wmb();
                refcount_set(&iobj->maps, 1);
        }
 
index cb7406d7446695ebd3566230f3e11fca3b4cc323..c39fe0fc5d69c646915561bc3d4cb5cfc5411ac1 100644 (file)
@@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi)
        struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi);
 
        mipi_dsi_detach(ctx->dsi);
-       mipi_dsi_device_unregister(ctx->dsi);
-
        drm_panel_remove(&ctx->panel);
 }
 
index 775144695283f54dcb1c527e58c9604cfd6da207..b15ca56a09a74a06f8bfcd0b4053d554ced9b58d 100644 (file)
@@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi)
        struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
 
        mipi_dsi_detach(ctx->dsi);
-       mipi_dsi_device_unregister(ctx->dsi);
-
        drm_panel_remove(&ctx->panel);
 }
 
index f38385fe76bbb45d92bf75cf078faec1f8be52ff..b91019cd5acb191a560b7217ff792cf4222004fa 100644 (file)
@@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
        mapping_set_unevictable(mapping);
 
        for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+               /* Can happen if the last fault only partially filled this
+                * section of the pages array before failing. In that case
+                * we skip already filled pages.
+                */
+               if (pages[i])
+                       continue;
+
                pages[i] = shmem_read_mapping_page(mapping, i);
                if (IS_ERR(pages[i])) {
                        ret = PTR_ERR(pages[i]);
                        pages[i] = NULL;
-                       goto err_pages;
+                       goto err_unlock;
                }
        }
 
@@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
        ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
                                        NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
        if (ret)
-               goto err_pages;
+               goto err_unlock;
 
        ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
        if (ret)
@@ -537,8 +544,6 @@ out:
 
 err_map:
        sg_free_table(sgt);
-err_pages:
-       drm_gem_shmem_put_pages(&bo->base);
 err_unlock:
        dma_resv_unlock(obj->resv);
 err_bo:
index 368d26da0d6a233467cdc8ef5820ebf4b7ddb964..9febc8b73f09efaaaac9d6fb8d2776f2148aed89 100644 (file)
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
                           signed long timeout)
 {
        struct qxl_device *qdev;
+       struct qxl_release *release;
+       int count = 0, sc = 0;
+       bool have_drawable_releases;
        unsigned long cur, end = jiffies + timeout;
 
        qdev = container_of(fence->lock, struct qxl_device, release_lock);
+       release = container_of(fence, struct qxl_release, base);
+       have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
 
-       if (!wait_event_timeout(qdev->release_event,
-                               (dma_fence_is_signaled(fence) ||
-                                (qxl_io_notify_oom(qdev), 0)),
-                               timeout))
-               return 0;
+retry:
+       sc++;
+
+       if (dma_fence_is_signaled(fence))
+               goto signaled;
+
+       qxl_io_notify_oom(qdev);
+
+       for (count = 0; count < 11; count++) {
+               if (!qxl_queue_garbage_collect(qdev, true))
+                       break;
+
+               if (dma_fence_is_signaled(fence))
+                       goto signaled;
+       }
+
+       if (dma_fence_is_signaled(fence))
+               goto signaled;
+
+       if (have_drawable_releases || sc < 4) {
+               if (sc > 2)
+                       /* back off */
+                       usleep_range(500, 1000);
+
+               if (time_after(jiffies, end))
+                       return 0;
+
+               if (have_drawable_releases && sc > 300) {
+                       DMA_FENCE_WARN(fence,
+                                      "failed to wait on release %llu after spincount %d\n",
+                                      fence->context & ~0xf0000000, sc);
+                       goto signaled;
+               }
+               goto retry;
+       }
+       /*
+        * yeah, original sync_obj_wait gave up after 3 spins when
+        * have_drawable_releases is not set.
+        */
 
+signaled:
        cur = jiffies;
        if (time_after(cur, end))
                return 0;
index 94947229888ba7888aa6992116af8ab985219dbe..b7f22597ee95e798bb104894052997e332c298c6 100644 (file)
@@ -424,7 +424,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
 typedef struct _ATOM_PPLIB_STATE_V2
 {
       //number of valid dpm levels in this state; Driver uses it to calculate the whole 
-      //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+      //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels)
       UCHAR ucNumDPMLevels;
       
       //a index to the array of nonClockInfos
@@ -432,14 +432,14 @@ typedef struct _ATOM_PPLIB_STATE_V2
       /**
       * Driver will read the first ucNumDPMLevels in this array
       */
-      UCHAR clockInfoIndex[1];
+      UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels);
 } ATOM_PPLIB_STATE_V2;
 
 typedef struct _StateArray{
     //how many states we have 
     UCHAR ucNumEntries;
     
-    ATOM_PPLIB_STATE_V2 states[1];
+    ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries);
 }StateArray;
 
 
@@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{
     //sizeof(ATOM_PPLIB_CLOCK_INFO)
     UCHAR ucEntrySize;
     
-    UCHAR clockInfo[1];
+    UCHAR clockInfo[] __counted_by(ucNumEntries);
 }ClockInfoArray;
 
 typedef struct _NonClockInfoArray{
@@ -460,7 +460,7 @@ typedef struct _NonClockInfoArray{
     //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
     UCHAR ucEntrySize;
     
-    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
 }NonClockInfoArray;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
index bb1f0a3371ab5de484a81ad040347c9a5a8d4e76..10793a433bf58697fcdfce8e850ebfdd55ec7284 100644 (file)
@@ -923,8 +923,12 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
                max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO;
 
        for (i = 0; i < max_device; i++) {
-               ATOM_CONNECTOR_INFO_I2C ci =
-                   supported_devices->info.asConnInfo[i];
+               ATOM_CONNECTOR_INFO_I2C ci;
+
+               if (frev > 1)
+                       ci = supported_devices->info_2d1.asConnInfo[i];
+               else
+                       ci = supported_devices->info.asConnInfo[i];
 
                bios_connectors[i].valid = false;
 
index 112438d965ffbefd4fa2cce5f246cc03a63759f9..6e1fd6985ffcb730eb7057c4509aec971dfa8266 100644 (file)
@@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
                                                  enum ttm_caching caching,
                                                  unsigned int order)
 {
-       if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+       if (pool->use_dma_alloc)
                return &pool->caching[caching].orders[order];
 
 #ifdef CONFIG_X86
        switch (caching) {
        case ttm_write_combined:
+               if (pool->nid != NUMA_NO_NODE)
+                       return &pool->caching[caching].orders[order];
+
                if (pool->use_dma32)
                        return &global_dma32_write_combined[order];
 
                return &global_write_combined[order];
        case ttm_uncached:
+               if (pool->nid != NUMA_NO_NODE)
+                       return &pool->caching[caching].orders[order];
+
                if (pool->use_dma32)
                        return &global_dma32_uncached[order];
 
@@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
        pool->use_dma_alloc = use_dma_alloc;
        pool->use_dma32 = use_dma32;
 
-       if (use_dma_alloc || nid != NUMA_NO_NODE) {
-               for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-                       for (j = 0; j < NR_PAGE_ORDERS; ++j)
-                               ttm_pool_type_init(&pool->caching[i].orders[j],
-                                                  pool, i, j);
+       for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+               for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+                       struct ttm_pool_type *pt;
+
+                       /* Initialize only pool types which are actually used */
+                       pt = ttm_pool_select_type(pool, i, j);
+                       if (pt != &pool->caching[i].orders[j])
+                               continue;
+
+                       ttm_pool_type_init(pt, pool, i, j);
+               }
        }
 }
 EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
 {
        unsigned int i, j;
 
-       if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
-               for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-                       for (j = 0; j < NR_PAGE_ORDERS; ++j)
-                               ttm_pool_type_fini(&pool->caching[i].orders[j]);
+       for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+               for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+                       struct ttm_pool_type *pt;
+
+                       pt = ttm_pool_select_type(pool, i, j);
+                       if (pt != &pool->caching[i].orders[j])
+                               continue;
+
+                       ttm_pool_type_fini(pt);
+               }
        }
 
        /* We removed the pool types from the LRU, but we need to also make sure
index 2e04f6cb661e4f42eeaaef3c5d7fcdaee501d457..ce6b2fb341d1f8a85bab6f0ed19fd3ccde39757b 100644 (file)
@@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_BIN];
 
-               file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
                file->jobs_sent[V3D_BIN]++;
                v3d->queue[V3D_BIN].jobs_sent++;
 
@@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
 
-               file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
                file->jobs_sent[V3D_RENDER]++;
                v3d->queue[V3D_RENDER].jobs_sent++;
 
@@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_CSD];
 
-               file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
                file->jobs_sent[V3D_CSD]++;
                v3d->queue[V3D_CSD].jobs_sent++;
 
@@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_TFU];
 
-               file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
                file->jobs_sent[V3D_TFU]++;
                v3d->queue[V3D_TFU].jobs_sent++;
 
index c52c7bf1485b1fa95b1e9ca3f1e05135167a8c5c..717d624e9a052298d5d5070551e909cc65ee0cc5 100644 (file)
@@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
                .no_wait_gpu = false
        };
        u32 j, initial_line = dst_offset / dst_stride;
-       struct vmw_bo_blit_line_data d;
+       struct vmw_bo_blit_line_data d = {0};
        int ret = 0;
+       struct page **dst_pages = NULL;
+       struct page **src_pages = NULL;
 
        /* Buffer objects need to be either pinned or reserved: */
        if (!(dst->pin_count))
@@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
                        return ret;
        }
 
+       if (!src->ttm->pages && src->ttm->sg) {
+               src_pages = kvmalloc_array(src->ttm->num_pages,
+                                          sizeof(struct page *), GFP_KERNEL);
+               if (!src_pages)
+                       return -ENOMEM;
+               ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages,
+                                                src->ttm->num_pages);
+               if (ret)
+                       goto out;
+       }
+       if (!dst->ttm->pages && dst->ttm->sg) {
+               dst_pages = kvmalloc_array(dst->ttm->num_pages,
+                                          sizeof(struct page *), GFP_KERNEL);
+               if (!dst_pages) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages,
+                                                dst->ttm->num_pages);
+               if (ret)
+                       goto out;
+       }
+
        d.mapped_dst = 0;
        d.mapped_src = 0;
        d.dst_addr = NULL;
        d.src_addr = NULL;
-       d.dst_pages = dst->ttm->pages;
-       d.src_pages = src->ttm->pages;
+       d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages;
+       d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages;
        d.dst_num_pages = PFN_UP(dst->resource->size);
        d.src_num_pages = PFN_UP(src->resource->size);
        d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL);
@@ -504,6 +529,10 @@ out:
                kunmap_atomic(d.src_addr);
        if (d.dst_addr)
                kunmap_atomic(d.dst_addr);
+       if (src_pages)
+               kvfree(src_pages);
+       if (dst_pages)
+               kvfree(dst_pages);
 
        return ret;
 }
index bfd41ce3c8f4fca1f5a659e4513e08f72ea95966..e5eb21a471a6010aa956c811522956f27b99a096 100644 (file)
@@ -377,7 +377,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
 {
        struct ttm_operation_ctx ctx = {
                .interruptible = params->bo_type != ttm_bo_type_kernel,
-               .no_wait_gpu = false
+               .no_wait_gpu = false,
+               .resv = params->resv,
        };
        struct ttm_device *bdev = &dev_priv->bdev;
        struct drm_device *vdev = &dev_priv->drm;
@@ -394,8 +395,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
 
        vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain);
        ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type,
-                                  &vmw_bo->placement, 0, &ctx, NULL,
-                                  NULL, destroy);
+                                  &vmw_bo->placement, 0, &ctx,
+                                  params->sg, params->resv, destroy);
        if (unlikely(ret))
                return ret;
 
index 0d496dc9c6af7a352c0432f50f4dd9be37448b5e..f349642e6190d6933031d08ccd7f353231f0f1da 100644 (file)
@@ -55,6 +55,8 @@ struct vmw_bo_params {
        enum ttm_bo_type bo_type;
        size_t size;
        bool pin;
+       struct dma_resv *resv;
+       struct sg_table *sg;
 };
 
 /**
index c7d90f96d16a67beddf9395cf1ad611cb6f1cf34..58fb40c93100a84ec8b1dd769f35ab31c00bd0dc 100644 (file)
@@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
                [vmw_dma_map_populate] = "Caching DMA mappings.",
                [vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
-       /* TTM currently doesn't fully support SEV encryption. */
-       if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
-               return -EINVAL;
-
-       if (vmw_force_coherent)
+       /*
+        * When running with SEV we always want dma mappings, because
+        * otherwise ttm tt pool pages will bounce through swiotlb running
+        * out of available space.
+        */
+       if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
                dev_priv->map_mode = vmw_dma_alloc_coherent;
        else if (vmw_restrict_iommu)
                dev_priv->map_mode = vmw_dma_map_bind;
@@ -1627,6 +1628,7 @@ static const struct drm_driver driver = {
 
        .prime_fd_to_handle = vmw_prime_fd_to_handle,
        .prime_handle_to_fd = vmw_prime_handle_to_fd,
+       .gem_prime_import_sg_table = vmw_prime_import_sg_table,
 
        .fops = &vmwgfx_driver_fops,
        .name = VMWGFX_DRIVER_NAME,
index 12efecc17df664968056906da40cdd46b5665ddf..b019a1a1787af59e3fca37f560db905d7b2b6ab0 100644 (file)
@@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev,
                                  struct drm_file *file_priv,
                                  uint32_t handle, uint32_t flags,
                                  int *prime_fd);
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+                                                struct dma_buf_attachment *attach,
+                                                struct sg_table *table);
 
 /*
  * MemoryOBject management -  vmwgfx_mob.c
index 12787bb9c111d10db997b9db650c6bb1069c26ef..d6bcaf078b1f40bbf75bdfb63fd1e00b7901e20f 100644 (file)
@@ -149,6 +149,38 @@ out_no_bo:
        return ret;
 }
 
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+                                                struct dma_buf_attachment *attach,
+                                                struct sg_table *table)
+{
+       int ret;
+       struct vmw_private *dev_priv = vmw_priv(dev);
+       struct drm_gem_object *gem = NULL;
+       struct vmw_bo *vbo;
+       struct vmw_bo_params params = {
+               .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM,
+               .busy_domain = VMW_BO_DOMAIN_SYS,
+               .bo_type = ttm_bo_type_sg,
+               .size = attach->dmabuf->size,
+               .pin = false,
+               .resv = attach->dmabuf->resv,
+               .sg = table,
+
+       };
+
+       dma_resv_lock(params.resv, NULL);
+
+       ret = vmw_bo_create(dev_priv, &params, &vbo);
+       if (ret != 0)
+               goto out_no_bo;
+
+       vbo->tbo.base.funcs = &vmw_gem_object_funcs;
+
+       gem = &vbo->tbo.base;
+out_no_bo:
+       dma_resv_unlock(params.resv);
+       return gem;
+}
 
 int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *filp)
index cd4925346ed45a1c10ae4e9d9b13c0066c71f168..84ae4e10a2ebec20c52a7eb42ea5455a0d22dfa5 100644 (file)
@@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
 int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
                             struct drm_atomic_state *state)
 {
+       struct vmw_private *vmw = vmw_priv(crtc->dev);
        struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state,
                                                                         crtc);
        struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc);
@@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
        bool has_primary = new_state->plane_mask &
                           drm_plane_mask(crtc->primary);
 
-       /* We always want to have an active plane with an active CRTC */
-       if (has_primary != new_state->enable)
-               return -EINVAL;
+       /*
+        * This is fine in general, but broken userspace might expect
+        * some actual rendering so give a clue as why it's blank.
+        */
+       if (new_state->enable && !has_primary)
+               drm_dbg_driver(&vmw->drm,
+                              "CRTC without a primary plane will be blank.\n");
 
 
        if (new_state->connector_mask != connector_mask &&
index a94947b588e85f2c764aab60e11a84e59dd2a2ea..19a843da87b789b62279ecb9dccb8b2ddb19fe2f 100644 (file)
@@ -243,10 +243,10 @@ struct vmw_framebuffer_bo {
 
 
 static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
-       DRM_FORMAT_XRGB1555,
-       DRM_FORMAT_RGB565,
        DRM_FORMAT_XRGB8888,
        DRM_FORMAT_ARGB8888,
+       DRM_FORMAT_RGB565,
+       DRM_FORMAT_XRGB1555,
 };
 
 static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = {
index 2d72a5ee7c0c710339d5d25c0a9376745a90f7af..c99cad444991579f6e665453b74f56cb35de2e15 100644 (file)
@@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev,
                           int fd, u32 *handle)
 {
        struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+       int ret = ttm_prime_fd_to_handle(tfile, fd, handle);
 
-       return ttm_prime_fd_to_handle(tfile, fd, handle);
+       if (ret)
+               ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle);
+
+       return ret;
 }
 
 int vmw_prime_handle_to_fd(struct drm_device *dev,
@@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev,
                           int *prime_fd)
 {
        struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-       return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+       int ret;
+
+       if (handle > VMWGFX_NUM_MOB)
+               ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+       else
+               ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+
+       return ret;
 }
index 4d23d0a70bcb7ef4901e9128b84cd7112ae8913a..621d98b376bbbc4b40cef6b9c6759b975610dd56 100644 (file)
@@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
        switch (dev_priv->map_mode) {
        case vmw_dma_map_bind:
        case vmw_dma_map_populate:
-               vsgt->sgt = &vmw_tt->sgt;
-               ret = sg_alloc_table_from_pages_segment(
-                       &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
-                       (unsigned long)vsgt->num_pages << PAGE_SHIFT,
-                       dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
-               if (ret)
-                       goto out_sg_alloc_fail;
+               if (vmw_tt->dma_ttm.page_flags  & TTM_TT_FLAG_EXTERNAL) {
+                       vsgt->sgt = vmw_tt->dma_ttm.sg;
+               } else {
+                       vsgt->sgt = &vmw_tt->sgt;
+                       ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt,
+                               vsgt->pages, vsgt->num_pages, 0,
+                               (unsigned long)vsgt->num_pages << PAGE_SHIFT,
+                               dma_get_max_seg_size(dev_priv->drm.dev),
+                               GFP_KERNEL);
+                       if (ret)
+                               goto out_sg_alloc_fail;
+               }
 
                ret = vmw_ttm_map_for_dma(vmw_tt);
                if (unlikely(ret != 0))
@@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
        return 0;
 
 out_map_fail:
-       sg_free_table(vmw_tt->vsgt.sgt);
-       vmw_tt->vsgt.sgt = NULL;
+       drm_warn(&dev_priv->drm, "VSG table map failed!");
+       sg_free_table(vsgt->sgt);
+       vsgt->sgt = NULL;
 out_sg_alloc_fail:
        return ret;
 }
@@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 static int vmw_ttm_populate(struct ttm_device *bdev,
                            struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
-       int ret;
+       bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
 
-       /* TODO: maybe completely drop this ? */
        if (ttm_tt_is_populated(ttm))
                return 0;
 
-       ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
+       if (external && ttm->sg)
+               return  drm_prime_sg_to_dma_addr_array(ttm->sg,
+                                                      ttm->dma_address,
+                                                      ttm->num_pages);
 
-       return ret;
+       return ttm_pool_alloc(&bdev->pool, ttm, ctx);
 }
 
 static void vmw_ttm_unpopulate(struct ttm_device *bdev,
@@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev,
 {
        struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
                                                 dma_ttm);
+       bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+       if (external)
+               return;
 
        vmw_ttm_unbind(bdev, ttm);
 
@@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
 {
        struct vmw_ttm_tt *vmw_be;
        int ret;
+       bool external = bo->type == ttm_bo_type_sg;
 
        vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
        if (!vmw_be)
@@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
        vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev);
        vmw_be->mob = NULL;
 
-       if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+       if (external)
+               page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+       if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external)
                ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags,
                                     ttm_cached);
        else
index b21da7b745a5e7cd6b3e34e4fb8d42a45b2b6466..a9c1f9885c6bb4d2727cbce81d5be93cb9458a38 100644 (file)
@@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 
        ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
        if (ret)
-               return ret;
+               goto err;
 
        if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
                /*
@@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
                 */
                if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
                        ttm_bo_unreserve(&bo->ttm);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err;
                }
                bo->flags |= XE_BO_SCANOUT_BIT;
        }
        ttm_bo_unreserve(&bo->ttm);
+       return 0;
 
+err:
+       xe_bo_put(bo);
        return ret;
 }
 
index e4db069f0db3f1fd27ed80eb84fc4544ea0831df..6ec375c1c4b6c05aed07ba8432214b3de270c56e 100644 (file)
@@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe)
        xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
 
        drmm_mutex_init(&xe->drm, &xe->sb_lock);
-       drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
-       drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
        xe->enabled_irq_mask = ~0;
 
        err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
index 0b1266c88a6af39cba103e3447697c0540c0cc0d..deddc8be48c0af2133969c7452d12cd2e104f291 100644 (file)
 #define RING_EXECLIST_STATUS_LO(base)          XE_REG((base) + 0x234)
 #define RING_EXECLIST_STATUS_HI(base)          XE_REG((base) + 0x234 + 4)
 
-#define RING_CONTEXT_CONTROL(base)             XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base)             XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
 #define          CTX_CTRL_INHIBIT_SYN_CTX_SWITCH       REG_BIT(3)
 #define          CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   REG_BIT(0)
 
index b82233a4160624d2d3dad941327bf7ecff5a3382..9ac7fbe201b3c22fa25959f98af87d453a962a17 100644 (file)
@@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
         * As y can be < 2, we compute tau4 = (4 | x) << y
         * and then add 2 when doing the final right shift to account for units
         */
-       tau4 = ((1 << x_w) | x) << y;
+       tau4 = (u64)((1 << x_w) | x) << y;
 
        /* val in hwmon interface units (millisec) */
        out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
@@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
        r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
        x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
        y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
-       tau4 = ((1 << x_w) | x) << y;
+       tau4 = (u64)((1 << x_w) | x) << y;
        max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
        if (val > max_win)
index 1426febe86eb676305772d7ee444b70af8254848..57066faf575eec7edebf335da434b6c1615d935f 100644 (file)
@@ -525,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 
 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 {
-       regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
-                                   _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-                                   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+       regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+                                                      CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
        /* TODO: Timestamp */
 }
index ee1bb938c493487415445cd41c8b771080464522..2ba4fb9511f63fa894796dec90c89963a3dae1b0 100644 (file)
@@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
                if (vm->flags & XE_VM_FLAG_64K && level == 1)
                        flags = XE_PDE_64K;
 
-               entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+               entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
                                                  XE_PAGE_SIZE, pat_index);
                xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
                          entry | flags);
@@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
        /* Write PDE's that point to our BO. */
        for (i = 0; i < num_entries - num_level; i++) {
-               entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+               entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
                                                  pat_index);
 
                xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
@@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 #define VM_SA_UPDATE_UNIT_SIZE         (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
 #define NUM_VMUSA_WRITES_PER_UNIT      (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
        drm_suballoc_manager_init(&m->vm_update_sa,
-                                 (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+                                 (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
                                  NUM_VMUSA_UNIT_PER_PAGE, 0);
 
        m->pt_bo = bo;
@@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m,
        struct xe_vm *vm = m->q->vm;
        u16 pat_index;
        u32 ptes;
-       u64 ofs = at_pt * XE_PAGE_SIZE;
+       u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
        u64 cur_ofs;
 
        /* Indirect access needs compression enabled uncached PAT index */
index 62d1ef8867a84351ae7444d63113d8867dfbb0c5..3d4c8f342e215ed39263ba5c4c01079072dfcbbd 100644 (file)
@@ -1577,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
                xe->usm.num_vm_in_fault_mode--;
        else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
                xe->usm.num_vm_in_non_fault_mode--;
+
+       if (vm->usm.asid) {
+               void *lookup;
+
+               xe_assert(xe, xe->info.has_asid);
+               xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+               lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+               xe_assert(xe, lookup == vm);
+       }
        mutex_unlock(&xe->usm.lock);
 
        for_each_tile(tile, xe, id)
@@ -1592,24 +1602,15 @@ static void vm_destroy_work_func(struct work_struct *w)
        struct xe_device *xe = vm->xe;
        struct xe_tile *tile;
        u8 id;
-       void *lookup;
 
        /* xe_vm_close_and_put was not called? */
        xe_assert(xe, !vm->size);
 
        mutex_destroy(&vm->snap_mutex);
 
-       if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+       if (!(vm->flags & XE_VM_FLAG_MIGRATION))
                xe_device_mem_access_put(xe);
 
-               if (xe->info.has_asid && vm->usm.asid) {
-                       mutex_lock(&xe->usm.lock);
-                       lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-                       xe_assert(xe, lookup == vm);
-                       mutex_unlock(&xe->usm.lock);
-               }
-       }
-
        for_each_tile(tile, xe, id)
                XE_WARN_ON(vm->pt_root[id]);
 
index 783975d1384fc4d8e780cb2cdf450b5bab8b55de..7c52757a89db9abde6fb211178b9cedb4b1c7740 100644 (file)
@@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev,
        return 0;
 }
 
-static int host1x_dma_configure(struct device *dev)
-{
-       return of_dma_configure(dev, dev->of_node, true);
-}
-
 static const struct dev_pm_ops host1x_device_pm_ops = {
        .suspend = pm_generic_suspend,
        .resume = pm_generic_resume,
@@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = {
        .name = "host1x",
        .match = host1x_device_match,
        .uevent = host1x_device_uevent,
-       .dma_configure = host1x_dma_configure,
        .pm = &host1x_device_pm_ops,
 };
 
@@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x,
        device->dev.bus = &host1x_bus_type;
        device->dev.parent = host1x->dev;
 
-       of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
        device->dev.dma_parms = &device->dma_parms;
        dma_set_max_seg_size(&device->dev, UINT_MAX);
 
index adbf674355b2b8a472c03bd60092960cb0c742cf..fb8cd8469328ee094619c91eb227a04e24bf66cb 100644 (file)
@@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel)
        hv_ringbuffer_cleanup(&channel->inbound);
 
        if (channel->ringbuffer_page) {
-               __free_pages(channel->ringbuffer_page,
+               /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+               if (!channel->ringbuffer_gpadlhandle.decrypted)
+                       __free_pages(channel->ringbuffer_page,
                             get_order(channel->ringbuffer_pagecount
                                       << PAGE_SHIFT));
                channel->ringbuffer_page = NULL;
@@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
                (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
 
        ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
-       if (ret)
+       if (ret) {
+               gpadl->decrypted = false;
                return ret;
+       }
 
+       /*
+        * Set the "decrypted" flag to true for the set_memory_decrypted()
+        * success case. In the failure case, the encryption state of the
+        * memory is unknown. Leave "decrypted" as true to ensure the
+        * memory will be leaked instead of going back on the free list.
+        */
+       gpadl->decrypted = true;
        ret = set_memory_decrypted((unsigned long)kbuffer,
                                   PFN_UP(size));
        if (ret) {
@@ -527,9 +538,15 @@ cleanup:
 
        kfree(msginfo);
 
-       if (ret)
-               set_memory_encrypted((unsigned long)kbuffer,
-                                    PFN_UP(size));
+       if (ret) {
+               /*
+                * If set_memory_encrypted() fails, the decrypted flag is
+                * left as true so the memory is leaked instead of being
+                * put back on the free list.
+                */
+               if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+                       gpadl->decrypted = false;
+       }
 
        return ret;
 }
@@ -850,6 +867,8 @@ post_msg_err:
        if (ret)
                pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
 
+       gpadl->decrypted = ret;
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
index 3cabeeabb1cacf0627b02110d6f4fc17abc7e4a0..f001ae880e1dbefc6243e6d902e529db43291987 100644 (file)
@@ -237,8 +237,17 @@ int vmbus_connect(void)
                                vmbus_connection.monitor_pages[0], 1);
        ret |= set_memory_decrypted((unsigned long)
                                vmbus_connection.monitor_pages[1], 1);
-       if (ret)
+       if (ret) {
+               /*
+                * If set_memory_decrypted() fails, the encryption state
+                * of the memory is unknown. So leak the memory instead
+                * of risking returning decrypted memory to the free list.
+                * For simplicity, always handle both pages the same.
+                */
+               vmbus_connection.monitor_pages[0] = NULL;
+               vmbus_connection.monitor_pages[1] = NULL;
                goto cleanup;
+       }
 
        /*
         * Set_memory_decrypted() will change the memory contents if
@@ -337,13 +346,19 @@ void vmbus_disconnect(void)
                vmbus_connection.int_page = NULL;
        }
 
-       set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
-       set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+       if (vmbus_connection.monitor_pages[0]) {
+               if (!set_memory_encrypted(
+                       (unsigned long)vmbus_connection.monitor_pages[0], 1))
+                       hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+               vmbus_connection.monitor_pages[0] = NULL;
+       }
 
-       hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
-       hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
-       vmbus_connection.monitor_pages[0] = NULL;
-       vmbus_connection.monitor_pages[1] = NULL;
+       if (vmbus_connection.monitor_pages[1]) {
+               if (!set_memory_encrypted(
+                       (unsigned long)vmbus_connection.monitor_pages[1], 1))
+                       hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+               vmbus_connection.monitor_pages[1] = NULL;
+       }
 }
 
 /*
index 4cb17603a8289b259e64dc6a5be215cb1e1a8a57..12a707ab73f85cf363e6503346741a85bc9b82df 100644 (file)
@@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
 }
 static DEVICE_ATTR_RO(id);
 
@@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->state);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
 }
 static DEVICE_ATTR_RO(state);
 
@@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
 }
 static DEVICE_ATTR_RO(monitor_id);
 
@@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "{%pUl}\n",
-                      &hv_dev->channel->offermsg.offer.if_type);
+       return sysfs_emit(buf, "{%pUl}\n",
+                         &hv_dev->channel->offermsg.offer.if_type);
 }
 static DEVICE_ATTR_RO(class_id);
 
@@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "{%pUl}\n",
-                      &hv_dev->channel->offermsg.offer.if_instance);
+       return sysfs_emit(buf, "{%pUl}\n",
+                         &hv_dev->channel->offermsg.offer.if_instance);
 }
 static DEVICE_ATTR_RO(device_id);
 
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+       return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
 }
 static DEVICE_ATTR_RO(modalias);
 
@@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev,
        if (!hv_dev->channel)
                return -ENODEV;
 
-       return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+       return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
 }
 static DEVICE_ATTR_RO(numa_node);
 #endif
@@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_pending(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_pending);
 
@@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_pending(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_pending);
 
@@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_latency(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_latency);
 
@@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_latency(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_latency);
 
@@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_conn_id(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_conn_id);
 
@@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_conn_id(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_conn_id);
 
@@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+       return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(out_intr_mask);
 
@@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.current_read_index);
+       return sysfs_emit(buf, "%d\n", outbound.current_read_index);
 }
 static DEVICE_ATTR_RO(out_read_index);
 
@@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.current_write_index);
+       return sysfs_emit(buf, "%d\n", outbound.current_write_index);
 }
 static DEVICE_ATTR_RO(out_write_index);
 
@@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+       return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(out_read_bytes_avail);
 
@@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+       return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(out_write_bytes_avail);
 
@@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+       return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(in_intr_mask);
 
@@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_read_index);
+       return sysfs_emit(buf, "%d\n", inbound.current_read_index);
 }
 static DEVICE_ATTR_RO(in_read_index);
 
@@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_write_index);
+       return sysfs_emit(buf, "%d\n", inbound.current_write_index);
 }
 static DEVICE_ATTR_RO(in_write_index);
 
@@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+       return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(in_read_bytes_avail);
 
@@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+       return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(in_write_bytes_avail);
 
@@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
        struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
-       int buf_size = PAGE_SIZE, n_written, tot_written;
+       int n_written;
        struct list_head *cur;
 
        if (!channel)
@@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
 
        mutex_lock(&vmbus_connection.channel_mutex);
 
-       tot_written = snprintf(buf, buf_size, "%u:%u\n",
-               channel->offermsg.child_relid, channel->target_cpu);
+       n_written = sysfs_emit(buf, "%u:%u\n",
+                              channel->offermsg.child_relid,
+                              channel->target_cpu);
 
        list_for_each(cur, &channel->sc_list) {
-               if (tot_written >= buf_size - 1)
-                       break;
 
                cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
-               n_written = scnprintf(buf + tot_written,
-                                    buf_size - tot_written,
-                                    "%u:%u\n",
-                                    cur_sc->offermsg.child_relid,
-                                    cur_sc->target_cpu);
-               tot_written += n_written;
+               n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+                                         cur_sc->offermsg.child_relid,
+                                         cur_sc->target_cpu);
        }
 
        mutex_unlock(&vmbus_connection.channel_mutex);
 
-       return tot_written;
+       return n_written;
 }
 static DEVICE_ATTR_RO(channel_vp_mapping);
 
@@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+       return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
 }
 static DEVICE_ATTR_RO(vendor);
 
@@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "0x%x\n", hv_dev->device_id);
+       return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
 }
 static DEVICE_ATTR_RO(device);
 
@@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev,
        ssize_t len;
 
        device_lock(dev);
-       len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+       len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
        device_unlock(dev);
 
        return len;
index 33228c1c8980f32a5e8af323587601a4783b5b7f..ac6754a85f3507ee88bd3847359a53292a14dc9f 100644 (file)
@@ -3232,28 +3232,29 @@ static void iommu_snp_enable(void)
                return;
        /*
         * The SNP support requires that IOMMU must be enabled, and is
-        * not configured in the passthrough mode.
+        * configured with V1 page table (DTE[Mode] = 0 is not supported).
         */
        if (no_iommu || iommu_default_passthrough()) {
-               pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
-               cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
-               return;
+               pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+               goto disable_snp;
+       }
+
+       if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+               pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+               goto disable_snp;
        }
 
        amd_iommu_snp_en = check_feature(FEATURE_SNP);
        if (!amd_iommu_snp_en) {
-               pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
-               cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
-               return;
+               pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+               goto disable_snp;
        }
 
        pr_info("IOMMU SNP support enabled.\n");
+       return;
 
-       /* Enforce IOMMU v1 pagetable when SNP is enabled. */
-       if (amd_iommu_pgtable != AMD_IOMMU_V1) {
-               pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
-               amd_iommu_pgtable = AMD_IOMMU_V1;
-       }
+disable_snp:
+       cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
 #endif
 }
 
index d35c1b8c8e65ce5a9c6f6ae3aae555d91eb4d3d0..e692217fcb28011478139d7dc146d74dcd9456e8 100644 (file)
@@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 
 static u16 domain_id_alloc(void)
 {
+       unsigned long flags;
        int id;
 
-       spin_lock(&pd_bitmap_lock);
+       spin_lock_irqsave(&pd_bitmap_lock, flags);
        id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
        BUG_ON(id == 0);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __set_bit(id, amd_iommu_pd_alloc_bitmap);
        else
                id = 0;
-       spin_unlock(&pd_bitmap_lock);
+       spin_unlock_irqrestore(&pd_bitmap_lock, flags);
 
        return id;
 }
 
 static void domain_id_free(int id)
 {
-       spin_lock(&pd_bitmap_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&pd_bitmap_lock, flags);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __clear_bit(id, amd_iommu_pd_alloc_bitmap);
-       spin_unlock(&pd_bitmap_lock);
+       spin_unlock_irqrestore(&pd_bitmap_lock, flags);
 }
 
 static void free_gcr3_tbl_level1(u64 *tbl)
index 50eb9aed47cc585e1307b3d0f47252b2edcdaeb0..a7ecd90303dc42f9fbe120e75f2053b1390c5445 100644 (file)
@@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
        }
 
        dev_iommu_priv_set(dev, info);
-       ret = device_rbtree_insert(iommu, info);
-       if (ret)
-               goto free;
+       if (pdev && pci_ats_supported(pdev)) {
+               ret = device_rbtree_insert(iommu, info);
+               if (ret)
+                       goto free;
+       }
 
        if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
                ret = intel_pasid_alloc_table(dev);
@@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev)
        struct intel_iommu *iommu = info->iommu;
 
        mutex_lock(&iommu->iopf_lock);
-       device_rbtree_remove(info);
+       if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+               device_rbtree_remove(info);
        mutex_unlock(&iommu->iopf_lock);
 
        if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
index cf43e798eca49936e79a20ea5397a6b0e9f1cc82..44083d01852dbf997f8cc4001f3b278ea5d7fa07 100644 (file)
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
        iommu_pmu_set_filter(domain, event->attr.config1,
                             IOMMU_PMU_FILTER_DOMAIN, idx,
                             event->attr.config1);
-       iommu_pmu_set_filter(pasid, event->attr.config1,
+       iommu_pmu_set_filter(pasid, event->attr.config2,
                             IOMMU_PMU_FILTER_PASID, idx,
                             event->attr.config1);
        iommu_pmu_set_filter(ats, event->attr.config2,
index c1bed89b102614adf6f71070080aa513729f4409..ee3b469e2da1551889ba0e200f386e010bc6f68f 100644 (file)
@@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
        struct page *pages;
        int irq, ret;
 
-       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+       pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
        if (!pages) {
                pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
                        iommu->name);
index b8c47f18bc2612407cf58bb80bc041e27967d139..6a2707fe7a78c09d04f84a78d0b498d7a960d73d 100644 (file)
@@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
        { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
        {}
 };
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
 
 static struct platform_driver mtk_iommu_driver = {
        .probe  = mtk_iommu_probe,
index a9fa2a54dc9b39a981ccc4e66f72eff5329de49e..d6e4002200bd33d6219ed09f1c90ccac0e3404e4 100644 (file)
@@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = {
        { .compatible = "mediatek,mt2701-m4u", },
        {}
 };
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
 
 static const struct component_master_ops mtk_iommu_v1_com_ops = {
        .bind           = mtk_iommu_v1_bind,
index fca888b36680df813c952d8d29e1cf74cd81e167..2a537cbfcb077246c0aee43a5b9f1885a3e0b5f2 100644 (file)
@@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
                                           struct its_cmd_block *cmd,
                                           struct its_cmd_desc *desc)
 {
+       struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
        unsigned long vpt_addr, vconf_addr;
        u64 target;
        bool alloc;
@@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
                if (is_v4_1(its)) {
                        alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
                        its_encode_alloc(cmd, alloc);
+                       /*
+                        * Unmapping a VPE is self-synchronizing on GICv4.1,
+                        * no need to issue a VSYNC.
+                        */
+                       vpe = NULL;
                }
 
                goto out;
@@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
 out:
        its_fixup_cmd(cmd);
 
-       return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+       return vpe;
 }
 
 static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
index 2776ca5fc33f39019062b3d9fb8f02547a5e4139..b215b28cad7b76a5764bda8021cece74ec5cd40f 100644 (file)
@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 
 static int data_sock_setsockopt(struct socket *sock, int level, int optname,
-                               sockptr_t optval, unsigned int len)
+                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
        int err = 0, opt = 0;
 
        if (*debug & DEBUG_SOCKET)
                printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
-                      level, optname, len);
+                      level, optname, optlen);
 
        lock_sock(sk);
 
        switch (optname) {
        case MISDN_TIME_STAMP:
-               if (copy_from_sockptr(&opt, optval, sizeof(int))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        _pms(sk)->cmask |= MISDN_TIME_STAMP;
index be8ac24f50b6ad651fd107f9af9a448bb1f7780a..7b8a71ca66dde0f4f6f3c2728107cb48cfcaa706 100644 (file)
@@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-               free_r1bio(r1_bio);
+               mempool_free(r1_bio, &conf->r1bio_pool);
                allow_barrier(conf, bio->bi_iter.bi_sector);
 
                if (bio->bi_opf & REQ_NOWAIT) {
index 4c34344dc7dcb876e29d66358bcfcc79e1e77705..d7027d600208fc2f7233c5ca01ab7d590ef33042 100644 (file)
@@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv)
 
        dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
 
-       mutex_lock(&dev->dev_mutex);
+       mutex_lock(&dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dev->ctx_list, list) {
                ctx->state = MTK_STATE_ABORT;
                mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
        }
-       mutex_unlock(&dev->dev_mutex);
+       mutex_unlock(&dev->dev_ctx_lock);
 }
 
 static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
@@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
 
        dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
 
-       mutex_lock(&dev->dev_mutex);
+       mutex_lock(&dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dev->ctx_list, list) {
                ctx->state = MTK_STATE_ABORT;
                mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
        }
-       mutex_unlock(&dev->dev_mutex);
+       mutex_unlock(&dev->dev_ctx_lock);
 }
 
 static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
index f47c98faf068b6250de0c46a45efbca641a0e0ad..2073781ccadb156116b1cbe86c49b3e06b7a93f3 100644 (file)
@@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file)
 
        ctx->dev->vdec_pdata->init_vdec_params(ctx);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_add(&ctx->list, &dev->ctx_list);
+       mutex_unlock(&dev->dev_ctx_lock);
        mtk_vcodec_dbgfs_create(ctx);
 
        mutex_unlock(&dev->dev_mutex);
@@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file)
        v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
 
        mtk_vcodec_dbgfs_remove(dev, ctx->id);
+       mutex_lock(&dev->dev_ctx_lock);
        list_del_init(&ctx->list);
+       mutex_unlock(&dev->dev_ctx_lock);
        kfree(ctx);
        mutex_unlock(&dev->dev_mutex);
        return 0;
@@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
        for (i = 0; i < MTK_VDEC_HW_MAX; i++)
                mutex_init(&dev->dec_mutex[i]);
        mutex_init(&dev->dev_mutex);
+       mutex_init(&dev->dev_ctx_lock);
        spin_lock_init(&dev->irqlock);
 
        snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
index 849b89dd205c21d686d7fcfc3624df79f99e4449..85b2c0d3d8bcdd3a59027ddccd1efeb4371292c9 100644 (file)
@@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx {
  *
  * @dec_mutex: decoder hardware lock
  * @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
  * @decode_workqueue: decode work queue
  *
  * @irqlock: protect data access by irq handler and work thread
@@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev {
        /* decoder hardware mutex lock */
        struct mutex dec_mutex[MTK_VDEC_HW_MAX];
        struct mutex dev_mutex;
+       struct mutex dev_ctx_lock;
        struct workqueue_struct *decode_workqueue;
 
        spinlock_t irqlock;
index 06ed47df693bfd049fe5537abb6b994c1b740b85..21836dd6ef85a36f4bfc7e781f0a5b57f6c1962d 100644 (file)
@@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
        inst->vpu.codec_type = ctx->current_codec;
        inst->vpu.capture_type = ctx->capture_fourcc;
 
-       ctx->drv_handle = inst;
        err = vpu_dec_init(&inst->vpu);
        if (err) {
                mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
@@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
        mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
                       inst, inst->vpu.codec_type);
 
+       ctx->drv_handle = inst;
        return 0;
 error_free_inst:
        kfree(inst);
index 19407f9bc773c34445613ed8311fb86b1b565d38..987b3d71b662ac98495604e535f6ece7b733b8dd 100644 (file)
@@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
                       inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
 
        inst->cur_fb = fb;
-       dec->bs_dma = (unsigned long)bs->dma_addr;
+       dec->bs_dma = (uint64_t)bs->dma_addr;
        dec->bs_sz = bs->size;
        dec->cur_y_fb_dma = y_fb_dma;
        dec->cur_c_fb_dma = c_fb_dma;
index 55355fa7009083cacba971e0e3f0981e09f80300..039082f600c813f8e703fd283843ee1bddbe31c8 100644 (file)
@@ -16,6 +16,7 @@
 #include "../vdec_drv_base.h"
 #include "../vdec_vpu_if.h"
 
+#define VP9_MAX_SUPER_FRAMES_NUM 8
 #define VP9_SUPER_FRAME_BS_SZ 64
 #define MAX_VP9_DPB_SIZE       9
 
@@ -133,11 +134,11 @@ struct vp9_sf_ref_fb {
  */
 struct vdec_vp9_vsi {
        unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
-       struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+       struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
        int sf_next_ref_fb_idx;
        unsigned int sf_frm_cnt;
-       unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
-       unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+       unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+       unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
        unsigned int sf_frm_idx;
        unsigned int sf_init;
        struct vdec_fb fb;
@@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
        /* if this super frame and it is not last sub-frame, get next fb for
         * sub-frame decode
         */
-       if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+       if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
                vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
 }
 
@@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
 
 static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
                struct vdec_vp9_vsi *vsi) {
-       if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+       if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
                mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
                return -EIO;
        }
index cf48d09b78d7a156440e1343448af946342d26e9..eea709d93820919d33d13184af7281fe9f0035fc 100644 (file)
@@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
        unsigned int mi_row;
        unsigned int mi_col;
        unsigned int offset;
-       unsigned int pa;
+       dma_addr_t pa;
        unsigned int size;
        struct vdec_vp9_slice_tiles *tiles;
        unsigned char *pos;
@@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
        pos = va + offset;
        end = va + bs->size;
        /* truncated */
-       pa = (unsigned int)bs->dma_addr + offset;
+       pa = bs->dma_addr + offset;
        tb = instance->tile.va;
        for (i = 0; i < rows; i++) {
                for (j = 0; j < cols; j++) {
index 82e57ae983d55777463b4d7b08ac6fc18f3ec675..da6be556727bb18a458e1e59235615dc9b42c05f 100644 (file)
@@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde
        struct mtk_vcodec_dec_ctx *ctx;
        int ret = false;
 
+       mutex_lock(&dec_dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
                if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
                        ret = true;
                        break;
                }
        }
+       mutex_unlock(&dec_dev->dev_ctx_lock);
 
        return ret;
 }
index 6319f24bc714b5eb3a7018f1e612afcf2dadf25e..3cb8a16222220e2d5480b48b48879112a68fc11f 100644 (file)
@@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file)
        mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
                          ctx->id, ctx, ctx->m2m_ctx);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_add(&ctx->list, &dev->ctx_list);
+       mutex_unlock(&dev->dev_ctx_lock);
 
        mutex_unlock(&dev->dev_mutex);
        mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
@@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file)
        v4l2_fh_exit(&ctx->fh);
        v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_del_init(&ctx->list);
+       mutex_unlock(&dev->dev_ctx_lock);
        kfree(ctx);
        mutex_unlock(&dev->dev_mutex);
        return 0;
@@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
 
        mutex_init(&dev->enc_mutex);
        mutex_init(&dev->dev_mutex);
+       mutex_init(&dev->dev_ctx_lock);
        spin_lock_init(&dev->irqlock);
 
        snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
index a042f607ed8d1645a9dc3cf199b89e4280bc8337..0bd85d0fb379acbba3ac07c01e780cf57bef0305 100644 (file)
@@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx {
  *
  * @enc_mutex: encoder hardware lock.
  * @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
  * @encode_workqueue: encode work queue
  *
  * @enc_irq: h264 encoder irq resource
@@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev {
        /* encoder hardware mutex lock */
        struct mutex enc_mutex;
        struct mutex dev_mutex;
+       struct mutex dev_ctx_lock;
        struct workqueue_struct *encode_workqueue;
 
        int enc_irq;
index 84ad1cc6ad171ef2ea2767653d60e6d779e5604e..51bb7ee141b9e58ac98f940f5e419d9ef4df37ca 100644 (file)
@@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven
        struct mtk_vcodec_enc_ctx *ctx;
        int ret = false;
 
+       mutex_lock(&enc_dev->dev_ctx_lock);
        list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
                if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
                        ret = true;
                        break;
                }
        }
+       mutex_unlock(&enc_dev->dev_ctx_lock);
 
        return ret;
 }
index 088f8ed4fdc4640d706a98d317e79668a6942748..a8ee0df471482393214c379169b3c7a340282296 100644 (file)
@@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
 
        host = slot->host;
 
-       if (slot->vsd)
-               gpiod_set_value(slot->vsd, power_on);
-       if (slot->vio)
-               gpiod_set_value(slot->vio, power_on);
+       if (power_on) {
+               if (slot->vsd) {
+                       gpiod_set_value(slot->vsd, power_on);
+                       msleep(1);
+               }
+               if (slot->vio) {
+                       gpiod_set_value(slot->vio, power_on);
+                       msleep(1);
+               }
+       } else {
+               if (slot->vio) {
+                       gpiod_set_value(slot->vio, power_on);
+                       msleep(50);
+               }
+               if (slot->vsd) {
+                       gpiod_set_value(slot->vsd, power_on);
+                       msleep(50);
+               }
+       }
 
        if (slot->pdata->set_power != NULL)
                slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
        slot->pdata = &host->pdata->slots[id];
 
        /* Check for some optional GPIO controls */
-       slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
-                                            id, GPIOD_OUT_LOW);
+       slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+                                                 id, GPIOD_OUT_LOW);
        if (IS_ERR(slot->vsd))
                return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
                                     "error looking up VSD GPIO\n");
-       slot->vio = gpiod_get_index_optional(host->dev, "vio",
-                                            id, GPIOD_OUT_LOW);
+       slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+                                                 id, GPIOD_OUT_LOW);
        if (IS_ERR(slot->vio))
                return dev_err_probe(host->dev, PTR_ERR(slot->vio),
                                     "error looking up VIO GPIO\n");
-       slot->cover = gpiod_get_index_optional(host->dev, "cover",
-                                               id, GPIOD_IN);
+       slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+                                                   id, GPIOD_IN);
        if (IS_ERR(slot->cover))
                return dev_err_probe(host->dev, PTR_ERR(slot->cover),
                                     "error looking up cover switch GPIO\n");
@@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
        if (IS_ERR(host->virt_base))
                return PTR_ERR(host->virt_base);
 
-       host->slot_switch = gpiod_get_optional(host->dev, "switch",
-                                              GPIOD_OUT_LOW);
-       if (IS_ERR(host->slot_switch))
-               return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
-                                    "error looking up slot switch GPIO\n");
-
-
        INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
        INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
 
@@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
        host->dev = &pdev->dev;
        platform_set_drvdata(pdev, host);
 
+       host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+                                                   GPIOD_OUT_LOW);
+       if (IS_ERR(host->slot_switch))
+               return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+                                    "error looking up slot switch GPIO\n");
+
        host->id = pdev->id;
        host->irq = irq;
        host->phys_base = res->start;
index 1035820c2377af7d73d401824734e949b7679c8d..8090390edaf9dbb6832c6e30c25bb0ad1068e6cc 100644 (file)
@@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
        mutex_unlock(&priv->reg_mutex);
 }
 
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
  *
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ *   The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ *   conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ *   component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ *   PDUs transmitted using this destination address, or any other addresses
+ *   that appear in Table 8-1, Table 8-2, and Table 8-3
+ *   (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ *   therefore travel no further than those stations that can be reached via a
+ *   single individual LAN from the originating station.
+ *
+ *   The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ *   address that no conformant S-VLAN component, C-VLAN component, or MAC
+ *   Bridge can forward; however, this address is relayed by a TPMR component.
+ *   PDUs using this destination address, or any of the other addresses that
+ *   appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ *   (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ *   any TPMRs but will propagate no further than the nearest S-VLAN component,
+ *   C-VLAN component, or MAC Bridge.
+ *
+ *   The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ *   that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ *   relayed by TPMR components and S-VLAN components. PDUs using this
+ *   destination address, or any of the other addresses that appear in Table 8-1
+ *   but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ *   will be relayed by TPMR components and S-VLAN components but will propagate
+ *   no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ *   Link-local frames with these destination addresses won't be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ *   Link-local frames with these destination addresses will be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-00
+ *
+ *   Link-local frames with these destination addresses won't be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
  */
 static void
 mt753x_trap_frames(struct mt7530_priv *priv)
@@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
        /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
         * VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
-                  MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
-                  MT753X_BPDU_PORT_FW_MASK,
-                  MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_BPC,
+                  MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+                          MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+                          MT753X_BPDU_PORT_FW_MASK,
+                  MT753X_PAE_BPDU_FR |
+                          MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 
        /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
         * them VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
-                  MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
-                  MT753X_R01_PORT_FW_MASK,
-                  MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_RGAC1,
+                  MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+                          MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+                          MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+                  MT753X_R02_BPDU_FR |
+                          MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_R01_BPDU_FR |
+                          MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 
        /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
         * them VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
-                  MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
-                  MT753X_R03_PORT_FW_MASK,
-                  MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_RGAC2,
+                  MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+                          MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+                          MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+                  MT753X_R0E_BPDU_FR |
+                          MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_R03_BPDU_FR |
+                          MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 }
 
 static void
@@ -1722,14 +1883,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
 
 static int mt753x_mirror_port_get(unsigned int id, u32 val)
 {
-       return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
-                                  MIRROR_PORT(val);
+       return (id == ID_MT7531 || id == ID_MT7988) ?
+                      MT7531_MIRROR_PORT_GET(val) :
+                      MIRROR_PORT(val);
 }
 
 static int mt753x_mirror_port_set(unsigned int id, u32 val)
 {
-       return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
-                                  MIRROR_PORT(val);
+       return (id == ID_MT7531 || id == ID_MT7988) ?
+                      MT7531_MIRROR_PORT_SET(val) :
+                      MIRROR_PORT(val);
 }
 
 static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
@@ -2319,6 +2482,9 @@ mt7530_setup(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Allow mirroring frames received on the local port (monitor port). */
+       mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
        /* Setup VLAN ID 0 for VLAN-unaware bridges */
        ret = mt7530_setup_vlan0(priv);
        if (ret)
@@ -2430,6 +2596,9 @@ mt7531_setup_common(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Allow mirroring frames received on the local port (monitor port). */
+       mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
        /* Flush the FDB table */
        ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
        if (ret < 0)
@@ -2505,18 +2674,25 @@ mt7531_setup(struct dsa_switch *ds)
        mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
                   MT7531_GPIO0_INTERRUPT);
 
-       /* Enable PHY core PLL, since phy_device has not yet been created
-        * provided for phy_[read,write]_mmd_indirect is called, we provide
-        * our own mt7531_ind_mmd_phy_[read,write] to complete this
-        * function.
+       /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+        * phy_device has not yet been created provided for
+        * phy_[read,write]_mmd_indirect is called, we provide our own
+        * mt7531_ind_mmd_phy_[read,write] to complete this function.
         */
        val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
                                      MDIO_MMD_VEND2, CORE_PLL_GROUP4);
-       val |= MT7531_PHY_PLL_BYPASS_MODE;
+       val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
        val &= ~MT7531_PHY_PLL_OFF;
        mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
                                 CORE_PLL_GROUP4, val);
 
+       /* Disable EEE advertisement on the switch PHYs. */
+       for (i = MT753X_CTRL_PHY_ADDR;
+            i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+               mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+                                        0);
+       }
+
        mt7531_setup_common(ds);
 
        /* Setup VLAN ID 0 for VLAN-unaware bridges */
index d17b318e6ee4882ed8b6f6668eaa57a99a38d184..a08053390b285e3f27d0bc08a3acba3effd17007 100644 (file)
@@ -32,6 +32,10 @@ enum mt753x_id {
 #define SYSC_REG_RSTCTRL               0x34
 #define  RESET_MCM                     BIT(2)
 
+/* Register for ARL global control */
+#define MT753X_AGC                     0xc
+#define  LOCAL_EN                      BIT(7)
+
 /* Registers to mac forward control for unknown frames */
 #define MT7530_MFC                     0x10
 #define  BC_FFP(x)                     (((x) & 0xff) << 24)
@@ -65,6 +69,7 @@ enum mt753x_id {
 
 /* Registers for BPDU and PAE frame control*/
 #define MT753X_BPC                     0x24
+#define  MT753X_PAE_BPDU_FR            BIT(25)
 #define  MT753X_PAE_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_PAE_EG_TAG(x)          FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
 #define  MT753X_PAE_PORT_FW_MASK       GENMASK(18, 16)
@@ -75,20 +80,24 @@ enum mt753x_id {
 
 /* Register for :01 and :02 MAC DA frame control */
 #define MT753X_RGAC1                   0x28
+#define  MT753X_R02_BPDU_FR            BIT(25)
 #define  MT753X_R02_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_R02_EG_TAG(x)          FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
 #define  MT753X_R02_PORT_FW_MASK       GENMASK(18, 16)
 #define  MT753X_R02_PORT_FW(x)         FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define  MT753X_R01_BPDU_FR            BIT(9)
 #define  MT753X_R01_EG_TAG_MASK                GENMASK(8, 6)
 #define  MT753X_R01_EG_TAG(x)          FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
 #define  MT753X_R01_PORT_FW_MASK       GENMASK(2, 0)
 
 /* Register for :03 and :0E MAC DA frame control */
 #define MT753X_RGAC2                   0x2c
+#define  MT753X_R0E_BPDU_FR            BIT(25)
 #define  MT753X_R0E_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_R0E_EG_TAG(x)          FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
 #define  MT753X_R0E_PORT_FW_MASK       GENMASK(18, 16)
 #define  MT753X_R0E_PORT_FW(x)         FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define  MT753X_R03_BPDU_FR            BIT(9)
 #define  MT753X_R03_EG_TAG_MASK                GENMASK(8, 6)
 #define  MT753X_R03_EG_TAG(x)          FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
 #define  MT753X_R03_PORT_FW_MASK       GENMASK(2, 0)
@@ -616,6 +625,7 @@ enum mt7531_clk_skew {
 #define  RG_SYSPLL_DDSFBK_EN           BIT(12)
 #define  RG_SYSPLL_BIAS_EN             BIT(11)
 #define  RG_SYSPLL_BIAS_LPF_EN         BIT(10)
+#define  MT7531_RG_SYSPLL_DMY2         BIT(6)
 #define  MT7531_PHY_PLL_OFF            BIT(5)
 #define  MT7531_PHY_PLL_BYPASS_MODE    BIT(4)
 
index 9e9e4a03f1a8c9bd4c8d68cb20340157c4547e80..2d8a66ea82fab7f0a023ab469ccc33321d0b4ba3 100644 (file)
@@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
                        ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
                io_sq->bounce_buf_ctrl.next_to_use = 0;
 
-               size = io_sq->bounce_buf_ctrl.buffer_size *
+               size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
                        io_sq->bounce_buf_ctrl.buffers_num;
 
                dev_node = dev_to_node(ena_dev->dmadev);
index 09e7da1a69c9f0c8141e03be445c2589e9d3a999..be5acfa41ee0ce4d80605e0bcdc6dc743c421f42 100644 (file)
@@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring,
 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 {
        bool print_once = true;
+       bool is_xdp_ring;
        u32 i;
 
+       is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
        for (i = 0; i < tx_ring->ring_size; i++) {
                struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
 
@@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 
                ena_unmap_tx_buff(tx_ring, tx_info);
 
-               dev_kfree_skb_any(tx_info->skb);
+               if (is_xdp_ring)
+                       xdp_return_frame(tx_info->xdpf);
+               else
+                       dev_kfree_skb_any(tx_info->skb);
        }
-       netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                 tx_ring->qid));
+
+       if (!is_xdp_ring)
+               netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+                                                         tx_ring->qid));
 }
 
 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 {
        struct ena_ring *tx_ring;
        struct ena_ring *rx_ring;
-       int i, budget, rc;
+       int qid, budget, rc;
        int io_queue_count;
 
        io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
        /* Make sure the driver doesn't turn the device in other process */
        smp_rmb();
 
@@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
        if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
                return;
 
-       budget = ENA_MONITORED_TX_QUEUES;
+       budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
 
-       for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
-               tx_ring = &adapter->tx_ring[i];
-               rx_ring = &adapter->rx_ring[i];
+       qid = adapter->last_monitored_tx_qid;
+
+       while (budget) {
+               qid = (qid + 1) % io_queue_count;
+
+               tx_ring = &adapter->tx_ring[qid];
+               rx_ring = &adapter->rx_ring[qid];
 
                rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
                if (unlikely(rc))
                        return;
 
-               rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
+               rc =  !ENA_IS_XDP_INDEX(adapter, qid) ?
                        check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
                if (unlikely(rc))
                        return;
 
                budget--;
-               if (!budget)
-                       break;
        }
 
-       adapter->last_monitored_tx_qid = i % io_queue_count;
+       adapter->last_monitored_tx_qid = qid;
 }
 
 /* trigger napi schedule after 2 consecutive detections */
index 337c435d3ce998b1b8f69a86f8be7997e1ff99c8..5b175e7e92a10ba19917b9c5e63d89bc1f2a8dd5 100644 (file)
@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
 
        rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
        if (unlikely(rc))
-               return rc;
+               goto err;
 
        ena_tx_ctx.req_id = req_id;
 
@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
 
 error_unmap_dma:
        ena_unmap_tx_buff(tx_ring, tx_info);
+err:
        tx_info->xdpf = NULL;
+
        return rc;
 }
 
index 9662ee72814c0c64fab3ca08db99e888faa51124..536635e5772799e17ef31857c655735c5ef88865 100644 (file)
@@ -593,6 +593,16 @@ err_out:
        pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+       struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+       struct pci_dev *pdev = pdsc->pdev;
+
+       pci_dev_get(pdev);
+       pci_reset_function(pdev);
+       pci_dev_put(pdev);
+}
+
 static void pdsc_check_pci_health(struct pdsc *pdsc)
 {
        u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
        if (fw_status != PDS_RC_BAD_PCI)
                return;
 
-       pci_reset_function(pdsc->pdev);
+       /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+       queue_work(pdsc->wq, &pdsc->pci_reset_work);
 }
 
 void pdsc_health_thread(struct work_struct *work)
index 92d7657dd6147e7770b7d72f8ee25deb303370b9..a3e17a0c187a6a4a5eaab7207f7089b2172b8e0e 100644 (file)
@@ -197,6 +197,7 @@ struct pdsc {
        struct pdsc_qcq notifyqcq;
        u64 last_eid;
        struct pdsc_viftype *viftype_status;
+       struct work_struct pci_reset_work;
 };
 
 /** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
 
 void pdsc_fw_down(struct pdsc *pdsc);
 void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
 
 #endif /* _PDSC_H_ */
index e494e1298dc9a36c175a6b86623450c0da55ad7b..495ef4ef8c103d6fcacd8b155dbc09e42d68345c 100644 (file)
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
                .reset.opcode = PDS_CORE_CMD_RESET,
        };
 
+       if (!pdsc_is_fw_running(pdsc))
+               return 0;
+
        return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
 }
 
index ab6133e7db422d3579291e3476efb48e5e0be06c..660268ff95623fbe9c86ee2078913c85a3579a5a 100644 (file)
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
        snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
        pdsc->wq = create_singlethread_workqueue(wq_name);
        INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+       INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
        timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
        pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
 
index 493b724848c8f44122abd1b1fb340e97abc30a09..57e61f9631678edf31a2ff237fe0301254a396e5 100644 (file)
@@ -11758,6 +11758,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* VF-reps may need to be re-opened after the PF is re-opened */
        if (BNXT_PF(bp))
                bnxt_vf_reps_open(bp);
+       if (bp->ptp_cfg)
+               atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
        bnxt_ptp_init_rtc(bp, true);
        bnxt_ptp_cfg_tstamp_filters(bp);
        bnxt_cfg_usr_fltrs(bp);
index 93f9bd55020f277f02fb6ed959bd5e82ec35fd93..195c02dc0683054e03680abff45f3f42d9605192 100644 (file)
@@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
        if (err)
                return;
 
+       if (edev->ulp_tbl->msix_requested)
+               bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
        if (aux_priv) {
                struct auxiliary_device *adev;
 
@@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
        if (!edev)
                goto aux_dev_uninit;
 
+       aux_priv->edev = edev;
+
        ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
        if (!ulp)
                goto aux_dev_uninit;
 
        edev->ulp_tbl = ulp;
-       aux_priv->edev = edev;
        bp->edev = edev;
        bnxt_set_edev_info(edev, bp);
 
index b890410a2bc0bacd27eab5acd6f87cd8b0110939..688ccb0615ab9f87e7caf9e6fa522444613b2bf3 100644 (file)
@@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
         * - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
         * - Tunnel flag (present if tunnel)
         */
+       if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+               lkups_cnt++;
 
        if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
                lkups_cnt++;
@@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
        /* Always add direction metadata */
        ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
 
+       if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+               ice_rule_add_src_vsi_metadata(&list[i]);
+               i++;
+       }
+
        rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
        if (tc_fltr->tunnel_type != TNL_LAST) {
                i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
@@ -772,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
        int ret;
        int i;
 
-       if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+       if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
                return -EOPNOTSUPP;
        }
@@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
 
        /* specify the cookie as filter_rule_id */
        rule_info.fltr_rule_id = fltr->cookie;
+       rule_info.src_vsi = vsi->idx;
 
        ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
        if (ret == -EEXIST) {
@@ -1481,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                  (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
                   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
                   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
-                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
                return -EOPNOTSUPP;
        } else {
index d39001cdc707ee3f72e80a9be503b438e8ff9eca..00af8888e3291a061e547fe4824df04005276302 100644 (file)
@@ -4819,18 +4819,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
                 */
                rvu_write64(rvu, blkaddr, NIX_AF_CFG,
                            rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+       }
 
-               /* Set chan/link to backpressure TL3 instead of TL2 */
-               rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+       /* Set chan/link to backpressure TL3 instead of TL2 */
+       rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
 
-               /* Disable SQ manager's sticky mode operation (set TM6 = 0)
-                * This sticky mode is known to cause SQ stalls when multiple
-                * SQs are mapped to same SMQ and transmitting pkts at a time.
-                */
-               cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
-               cfg &= ~BIT_ULL(15);
-               rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
-       }
+       /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+        * This sticky mode is known to cause SQ stalls when multiple
+        * SQs are mapped to same SMQ and transmitting pkts at a time.
+        */
+       cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+       cfg &= ~BIT_ULL(15);
+       rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
 
        ltdefs = rvu->kpu.lt_def;
        /* Calibrate X2P bus to check if CGX/LBK links are fine */
index 87bdb93cb066e9afba84e5a93556c1efa0d780d9..f4655a8c0705d70b3a4aff580ccb1e437069ca64 100644 (file)
@@ -689,6 +689,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
                struct flow_match_control match;
+               u32 val;
 
                flow_rule_match_control(rule, &match);
                if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
@@ -697,12 +698,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
                }
 
                if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+                       val = match.key->flags & FLOW_DIS_IS_FRAGMENT;
                        if (ntohs(flow_spec->etype) == ETH_P_IP) {
-                               flow_spec->ip_flag = IPV4_FLAG_MORE;
+                               flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0;
                                flow_mask->ip_flag = IPV4_FLAG_MORE;
                                req->features |= BIT_ULL(NPC_IPFRAG_IPV4);
                        } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) {
-                               flow_spec->next_header = IPPROTO_FRAGMENT;
+                               flow_spec->next_header = val ?
+                                                        IPPROTO_FRAGMENT : 0;
                                flow_mask->next_header = 0xff;
                                req->features |= BIT_ULL(NPC_IPFRAG_IPV6);
                        } else {
index 1e77bbf5d22a1a193602b199c90d205e219595b0..1723e9912ae07ca8c58bfed17e959ab4e0eb4fe5 100644 (file)
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
                otx2_qos_read_txschq_cfg_tl(node, cfg);
                cnt = cfg->static_node_pos[node->level];
                cfg->schq_contig_list[node->level][cnt] = node->schq;
+               cfg->schq_index_used[node->level][cnt] = true;
                cfg->schq_contig[node->level]++;
                cfg->static_node_pos[node->level]++;
                otx2_qos_read_txschq_cfg_schq(node, cfg);
index c895e265ae0ebcde930acf3785ba9ab1b63b65e5..61334a71058c7594a61ca768ce041f92ab238d24 100644 (file)
@@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
 static void
 mtk_wed_stop(struct mtk_wed_device *dev)
 {
+       mtk_wed_dma_disable(dev);
        mtk_wed_set_ext_int(dev, false);
 
        wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
        wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
        wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
        wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
-       wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
 
        if (!mtk_wed_get_rx_capa(dev))
                return;
@@ -1093,7 +1093,6 @@ static void
 mtk_wed_deinit(struct mtk_wed_device *dev)
 {
        mtk_wed_stop(dev);
-       mtk_wed_dma_disable(dev);
 
        wed_clr(dev, MTK_WED_CTRL,
                MTK_WED_CTRL_WDMA_INT_AGENT_EN |
@@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
 static void
 mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
 {
-       if (!dev->running)
-               return;
-
        mtk_wed_set_ext_int(dev, !!mask);
        wed_w32(dev, MTK_WED_INT_MASK, mask);
 }
index 86f1854698b4e80816b1b55e1d4f4d31fdf0737f..883c044852f1df39852b50b50a80ab31c7bfb091 100644 (file)
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
 }
 
 static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+       return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
 mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
 {
-       return fifo->data[fifo->mask & fifo->cc++];
+       fifo->cc++;
 }
 
 static inline void
index e87e26f2c669c2e39f59a9f656e643fce2b48aae..6743806b8480602a8d0a3d02cddcf2d2c1c82199 100644 (file)
@@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
 
        txq_ix = mlx5e_qid_from_qos(chs, node_qid);
 
-       WARN_ON(node_qid > priv->htb_max_qos_sqs);
-       if (node_qid == priv->htb_max_qos_sqs) {
-               struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
-               if (priv->htb_max_qos_sqs == 0) {
-                       stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
-                                             sizeof(*stats_list),
-                                             GFP_KERNEL);
-                       if (!stats_list)
-                               return -ENOMEM;
-               }
+       WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+       if (!priv->htb_qos_sq_stats) {
+               struct mlx5e_sq_stats **stats_list;
+
+               stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+                                     sizeof(*stats_list), GFP_KERNEL);
+               if (!stats_list)
+                       return -ENOMEM;
+
+               WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+       }
+
+       if (!priv->htb_qos_sq_stats[node_qid]) {
+               struct mlx5e_sq_stats *stats;
+
                stats = kzalloc(sizeof(*stats), GFP_KERNEL);
-               if (!stats) {
-                       kvfree(stats_list);
+               if (!stats)
                        return -ENOMEM;
-               }
-               if (stats_list)
-                       WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
                WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
                /* Order htb_max_qos_sqs increment after writing the array pointer.
                 * Pairs with smp_load_acquire in en_stats.c.
index 0ab9db319530258fab6c7e6ad5648e13550069f8..22918b2ef7f128849be838063819ed12509abb45 100644 (file)
@@ -108,7 +108,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
        mlx5e_reset_txqsq_cc_pc(sq);
        sq->stats->recover++;
        clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+       rtnl_lock();
        mlx5e_activate_txqsq(sq);
+       rtnl_unlock();
+
        if (sq->channel)
                mlx5e_trigger_napi_icosq(sq->channel);
        else
@@ -179,12 +182,16 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
        carrier_ok = netif_carrier_ok(netdev);
        netif_carrier_off(netdev);
 
+       rtnl_lock();
        mlx5e_deactivate_priv_channels(priv);
+       rtnl_unlock();
 
        mlx5e_ptp_close(chs->ptp);
        err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
 
+       rtnl_lock();
        mlx5e_activate_priv_channels(priv);
+       rtnl_unlock();
 
        /* return carrier back if needed */
        if (carrier_ok)
index bcafb4bf94154ff01969fc851852d4234f5b0c04..8d9a3b5ec973b39aaa1addc8f6c5e3a568a7ab1a 100644 (file)
@@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
        return min_t(u32, rqt_size, max_cap_rqt_size);
 }
 
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+       return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
 void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
 {
        mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
index e0bc30308c77000038d151a7caa206c516a6fe9a..2f9e04a8418f143fbf3d01423721d85b2d5b5a2a 100644 (file)
@@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
 }
 
 u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
 int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
 int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
                             unsigned int num_rqns,
index f675b1926340f9ca4218aa47febac7c5139ab0e9..f66bbc8464645efabc08ebf923fada5e1f79c5fe 100644 (file)
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
 
 void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
 {
+       mutex_lock(selq->state_lock);
        WARN_ON_ONCE(selq->is_prepared);
 
        kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
 
        kvfree(selq->standby);
        selq->standby = NULL;
+       mutex_unlock(selq->state_lock);
 }
 
 void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
index c7f542d0b8f08c635a6fad868a364a8f5f91ba8c..93cf23278d93c2629977f38ee7a39e7cd6c0aaa6 100644 (file)
@@ -46,6 +46,10 @@ struct arfs_table {
        struct hlist_head        rules_hash[ARFS_HASH_SIZE];
 };
 
+enum {
+       MLX5E_ARFS_STATE_ENABLED,
+};
+
 enum arfs_type {
        ARFS_IPV4_TCP,
        ARFS_IPV6_TCP,
@@ -60,6 +64,7 @@ struct mlx5e_arfs_tables {
        spinlock_t                     arfs_lock;
        int                            last_filter_id;
        struct workqueue_struct        *wq;
+       unsigned long                  state;
 };
 
 struct arfs_tuple {
@@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
                        return err;
                }
        }
+       set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
        return 0;
 }
 
@@ -455,6 +462,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs)
        int i;
        int j;
 
+       clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
        spin_lock_bh(&arfs->arfs_lock);
        mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
                hlist_del_init(&rule->hlist);
@@ -627,17 +636,8 @@ static void arfs_handle_work(struct work_struct *work)
        struct mlx5_flow_handle *rule;
 
        arfs = mlx5e_fs_get_arfs(priv->fs);
-       mutex_lock(&priv->state_lock);
-       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               spin_lock_bh(&arfs->arfs_lock);
-               hlist_del(&arfs_rule->hlist);
-               spin_unlock_bh(&arfs->arfs_lock);
-
-               mutex_unlock(&priv->state_lock);
-               kfree(arfs_rule);
-               goto out;
-       }
-       mutex_unlock(&priv->state_lock);
+       if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state))
+               return;
 
        if (!arfs_rule->rule) {
                rule = arfs_add_rule(priv, arfs_rule);
@@ -753,6 +753,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
                return -EPROTONOSUPPORT;
 
        spin_lock_bh(&arfs->arfs_lock);
+       if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) {
+               spin_unlock_bh(&arfs->arfs_lock);
+               return -EPERM;
+       }
+
        arfs_rule = arfs_find_rule(arfs_t, &fk);
        if (arfs_rule) {
                if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) {
index cc51ce16df14abe530910e063b9072c9e23ff49c..67a29826bb5702b8fd5e81e8673da5b6291bf7f3 100644 (file)
@@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 
        mutex_lock(&priv->state_lock);
 
+       if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+               unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+               if (count > xor8_max_channels) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+                                  __func__, count, xor8_max_channels);
+                       goto out;
+               }
+       }
+
+       /* If RXFH is configured, changing the channels number is allowed only if
+        * it does not require resizing the RSS table. This is because the previous
+        * configuration may no longer be compatible with the new RSS table.
+        */
+       if (netif_is_rxfh_configured(priv->netdev)) {
+               int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+               int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+               if (new_rqt_size != cur_rqt_size) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev,
+                                  "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+                                  __func__, new_rqt_size, cur_rqt_size);
+                       goto out;
+               }
+       }
+
        /* Don't allow changing the number of channels if HTB offload is active,
         * because the numeration of the QoS SQs will change, while per-queue
         * qdiscs are attached.
@@ -561,12 +589,12 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 static void
 mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        int tc;
        int i;
 
        for (i = 0; i < priv->channels.num; ++i) {
                struct mlx5e_channel *c = priv->channels.c[i];
+               struct mlx5_core_dev *mdev = c->mdev;
 
                for (tc = 0; tc < c->num_tc; tc++) {
                        mlx5_core_modify_cq_moderation(mdev,
@@ -580,11 +608,11 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal
 static void
 mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        int i;
 
        for (i = 0; i < priv->channels.num; ++i) {
                struct mlx5e_channel *c = priv->channels.c[i];
+               struct mlx5_core_dev *mdev = c->mdev;
 
                mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
                                               coal->rx_coalesce_usecs,
@@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
        struct mlx5e_priv *priv = netdev_priv(dev);
        u32 *rss_context = &rxfh->rss_context;
        u8 hfunc = rxfh->hfunc;
+       unsigned int count;
        int err;
 
        mutex_lock(&priv->state_lock);
+
+       count = priv->channels.params.num_channels;
+
+       if (hfunc == ETH_RSS_HASH_XOR) {
+               unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+               if (count > xor8_max_channels) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+                                  __func__, count, xor8_max_channels);
+                       goto unlock;
+               }
+       }
+
        if (*rss_context && rxfh->rss_delete) {
                err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
                goto unlock;
        }
 
        if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-               unsigned int count = priv->channels.params.num_channels;
-
                err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
                if (err)
                        goto unlock;
index 91848eae45655fd57d7dcc3365f8ad5094f61f3d..319930c04093ba2d15d498006ad9b3d060a883b7 100644 (file)
@@ -209,8 +209,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data)
                                                      *data,
                                                      mlx5e_devcom_event_mpv,
                                                      priv);
-       if (IS_ERR_OR_NULL(priv->devcom))
-               return -EOPNOTSUPP;
+       if (IS_ERR(priv->devcom))
+               return PTR_ERR(priv->devcom);
 
        if (mlx5_core_is_mp_master(priv->mdev)) {
                mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP,
@@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
        kfree(priv->tx_rates);
        kfree(priv->txq2sq);
        destroy_workqueue(priv->wq);
-       mutex_lock(&priv->state_lock);
        mlx5e_selq_cleanup(&priv->selq);
-       mutex_unlock(&priv->state_lock);
        free_cpumask_var(priv->scratchpad.cpumask);
 
        for (i = 0; i < priv->htb_max_qos_sqs; i++)
index 2fa076b23fbead06bceb6697e0ebb0238bb5be7e..e21a3b4128ce880478795b023e1ff314e9336dd0 100644 (file)
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                     (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
                u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
 
+               mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
                mlx5e_skb_cb_hwtstamp_init(skb);
                mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
                                           metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 err_drop:
        stats->dropped++;
-       if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
-               mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
-                                            be32_to_cpu(eseg->flow_table_metadata));
        dev_kfree_skb_any(skb);
        mlx5e_tx_flush(sq);
 }
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
 {
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
                eseg->flow_table_metadata =
-                       cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+                       cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
 }
 
 static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
index 3047d7015c5256726338904432ce56845c59c39c..1789800faaeb62841387ed69b0a82aab3283bf46 100644 (file)
@@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        if (err)
                goto abort;
 
+       dev->priv.eswitch = esw;
        err = esw_offloads_init(esw);
        if (err)
                goto reps_err;
@@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
        else
                esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-       if (MLX5_ESWITCH_MANAGER(dev) &&
-           mlx5_esw_vport_match_metadata_supported(esw))
-               esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
-       dev->priv.eswitch = esw;
        BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
 
        esw_info(dev,
@@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 reps_err:
        mlx5_esw_vports_cleanup(esw);
+       dev->priv.eswitch = NULL;
 abort:
        if (esw->work_queue)
                destroy_workqueue(esw->work_queue);
@@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
        esw_info(esw->dev, "cleanup\n");
 
-       esw->dev->priv.eswitch = NULL;
        destroy_workqueue(esw->work_queue);
        WARN_ON(refcount_read(&esw->qos.refcnt));
        mutex_destroy(&esw->state_lock);
@@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
        mutex_destroy(&esw->offloads.encap_tbl_lock);
        mutex_destroy(&esw->offloads.decap_tbl_lock);
        esw_offloads_cleanup(esw);
+       esw->dev->priv.eswitch = NULL;
        mlx5_esw_vports_cleanup(esw);
        debugfs_remove_recursive(esw->debugfs_root);
        devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
index baaae628b0a0f6510e2c350cbab0b6309b32da52..844d3e3a65ddf04c6e326127b1b1c05ed351b3a7 100644 (file)
@@ -43,6 +43,7 @@
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
+#include "lib/mlx5.h"
 #include "lib/devcom.h"
 #include "lib/eq.h"
 #include "lib/fs_chains.h"
@@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
        if (err)
                return err;
 
+       if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+           mlx5_esw_vport_match_metadata_supported(esw))
+               esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
        err = devl_params_register(priv_to_devlink(esw->dev),
                                   esw_devlink_params,
                                   ARRAY_SIZE(esw_devlink_params));
@@ -3055,7 +3060,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
                                                     key,
                                                     mlx5_esw_offloads_devcom_event,
                                                     esw);
-       if (IS_ERR_OR_NULL(esw->devcom))
+       if (IS_ERR(esw->devcom))
                return;
 
        mlx5_devcom_send_event(esw->devcom,
@@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
        if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
+       if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+               return -EPERM;
+       }
+
        mlx5_lag_disable_change(esw->dev);
        err = mlx5_esw_try_lock(esw);
        if (err < 0) {
index e6bfa7e4f146caf5b05506beaa6c9aabc6c4f74d..cf085a478e3e4c69ffdd4ee9bb24f0036e27c66d 100644 (file)
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
        return err;
 }
 
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+                                 struct mlx5_pkt_reformat *p2)
+{
+       return p1->owner == p2->owner &&
+               (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+                p1->id == p2->id :
+                mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+                mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                                struct mlx5_flow_destination *d2)
 {
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
                      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
                     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
-                     (d1->vport.pkt_reformat->id ==
-                      d2->vport.pkt_reformat->id) : true)) ||
+                     mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+                                           d2->vport.pkt_reformat) : true)) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
                     d1->ft == d2->ft) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
        }
        trace_mlx5_fs_set_fte(fte, false);
 
+       /* Link newly added rules into the tree. */
        for (i = 0; i < handle->num_rules; i++) {
-               if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+               if (!handle->rule[i]->node.parent) {
                        tree_add_node(&handle->rule[i]->node, &fte->node);
                        trace_mlx5_fs_add_rule(handle->rule[i]);
                }
index d14459e5c04fc515ad682e11ee322aa3891e382f..69d482f7c5a29916688ac0d79d324df5f2596586 100644 (file)
@@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
                return err;
        }
 
-       if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+       if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
                mlx5_lag_port_sel_destroy(ldev);
+               ldev->buckets = 1;
+       }
        if (mlx5_lag_has_drop_rule(ldev))
                mlx5_lag_drop_rule_cleanup(ldev);
 
index e7d59cfa8708e1617f78b28974977a9588026d1f..7b0766c89f4cf0aac5560e9eb041e564d6531e65 100644 (file)
@@ -220,7 +220,7 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
        struct mlx5_devcom_comp *comp;
 
        if (IS_ERR_OR_NULL(devc))
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        mutex_lock(&comp_list_lock);
        comp = devcom_component_get(devc, id, key, handler);
index 5b28084e8a03c77936e180a06246f9ef0a8dc4bd..dd5d186dc6148f065b986ee5d2363940314816db 100644 (file)
@@ -213,8 +213,8 @@ static int sd_register(struct mlx5_core_dev *dev)
        sd = mlx5_get_sd(dev);
        devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
                                                sd->group_id, NULL, dev);
-       if (!devcom)
-               return -ENOMEM;
+       if (IS_ERR(devcom))
+               return PTR_ERR(devcom);
 
        sd->devcom = devcom;
 
index c2593625c09ad6a9150e03baeda0ae41a1a010be..331ce47f51a17a386213d88db9aa7b3cb15d6b41 100644 (file)
@@ -956,7 +956,7 @@ static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev)
                mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
                                               mlx5_query_nic_system_image_guid(dev),
                                               NULL, dev);
-       if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
+       if (IS_ERR(dev->priv.hca_devcom_comp))
                mlx5_core_err(dev, "Failed to register devcom HCA component\n");
 }
 
@@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
        if (err)
                goto err_register;
 
+       err = mlx5_crdump_enable(dev);
+       if (err)
+               mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+       err = mlx5_hwmon_dev_register(dev);
+       if (err)
+               mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
        mutex_unlock(&dev->intf_state_mutex);
        return 0;
 
@@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
        int err;
 
        devl_lock(devlink);
+       devl_register(devlink);
        err = mlx5_init_one_devl_locked(dev);
+       if (err)
+               devl_unregister(devlink);
        devl_unlock(devlink);
        return err;
 }
@@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
        devl_lock(devlink);
        mutex_lock(&dev->intf_state_mutex);
 
+       mlx5_hwmon_dev_unregister(dev);
+       mlx5_crdump_disable(dev);
        mlx5_unregister_device(dev);
 
        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
        mlx5_function_teardown(dev, true);
 out:
        mutex_unlock(&dev->intf_state_mutex);
+       devl_unregister(devlink);
        devl_unlock(devlink);
 }
 
@@ -1680,16 +1694,23 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
        }
 
        devl_lock(devlink);
+       devl_register(devlink);
+
        err = mlx5_devlink_params_register(priv_to_devlink(dev));
-       devl_unlock(devlink);
        if (err) {
                mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
-               goto query_hca_caps_err;
+               goto params_reg_err;
        }
 
+       devl_unlock(devlink);
        return 0;
 
+params_reg_err:
+       devl_unregister(devlink);
+       devl_unlock(devlink);
 query_hca_caps_err:
+       devl_unregister(devlink);
+       devl_unlock(devlink);
        mlx5_function_disable(dev, true);
 out:
        dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1702,6 +1723,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
 
        devl_lock(devlink);
        mlx5_devlink_params_unregister(priv_to_devlink(dev));
+       devl_unregister(devlink);
        devl_unlock(devlink);
        if (dev->state != MLX5_DEVICE_STATE_UP)
                return;
@@ -1943,16 +1965,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_init_one;
        }
 
-       err = mlx5_crdump_enable(dev);
-       if (err)
-               dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
-       err = mlx5_hwmon_dev_register(dev);
-       if (err)
-               mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
        pci_save_state(pdev);
-       devlink_register(devlink);
        return 0;
 
 err_init_one:
@@ -1973,16 +1986,9 @@ static void remove_one(struct pci_dev *pdev)
        struct devlink *devlink = priv_to_devlink(dev);
 
        set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
-       /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
-        * devlink notify APIs.
-        * Hence, we must drain them before unregistering the devlink.
-        */
        mlx5_drain_fw_reset(dev);
        mlx5_drain_health_wq(dev);
-       devlink_unregister(devlink);
        mlx5_sriov_disable(pdev, false);
-       mlx5_hwmon_dev_unregister(dev);
-       mlx5_crdump_disable(dev);
        mlx5_uninit_one(dev);
        mlx5_pci_close(dev);
        mlx5_mdev_uninit(dev);
index 4dcf995cb1a2042c39938ee2f166a6c3d3e6ef24..6bac8ad70ba60bf9982a110f7e115183858e0497 100644 (file)
@@ -19,6 +19,7 @@
 #define MLX5_IRQ_CTRL_SF_MAX 8
 /* min num of vectors for SFs to be enabled */
 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
 
 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
                return;
        }
 
+       vecidx -= MLX5_IRQ_VEC_COMP_BASE;
        snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 }
 
@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
        struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
        struct mlx5_irq_pool *pool = table->pcif_pool;
        struct irq_affinity_desc af_desc;
-       int offset = 1;
+       int offset = MLX5_IRQ_VEC_COMP_BASE;
 
        if (!pool->xa_num_irqs.max)
                offset = 0;
index bc863e1f062e6bd316f6b54f87850e11123bbfea..7ebe712808275a7a1db290040d86c2cd5983c9d7 100644 (file)
@@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
                goto peer_devlink_set_err;
        }
 
-       devlink_register(devlink);
        return 0;
 
 peer_devlink_set_err:
@@ -101,7 +100,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
        devlink = priv_to_devlink(mdev);
        set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
        mlx5_drain_health_wq(mdev);
-       devlink_unregister(devlink);
        if (mlx5_dev_is_lightweight(mdev))
                mlx5_uninit_one_light(mdev);
        else
index 64f4cc284aea41715abecb1167439efe401951f8..030a5776c937406540645462b5950cd209c37974 100644 (file)
@@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
 }
 
 static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
                        struct mlx5dr_rule_action_member *action_mem)
 {
        struct mlx5dr_action *action = action_mem->action;
        const u64 action_id = DR_DBG_PTR_TO_ID(action);
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        u64 hit_tbl_ptr, miss_tbl_ptr;
        u32 hit_tbl_id, miss_tbl_id;
        int ret;
@@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
 }
 
 static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
                 bool is_rx, const u64 rule_id, u8 format_ver)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        char hw_ste_dump[DR_HEX_SIZE];
        u32 mem_rec_type;
        int ret;
@@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
 }
 
 static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+                  struct mlx5dr_rule_rx_tx *rule_rx_tx,
                   bool is_rx, const u64 rule_id, u8 format_ver)
 {
        struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
@@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
                return 0;
 
        while (i--) {
-               ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+               ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
                                       format_ver);
                if (ret < 0)
                        return ret;
@@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
        return 0;
 }
 
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
 {
        struct mlx5dr_rule_action_member *action_mem;
        const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
@@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
                return ret;
 
        if (rx->nic_matcher) {
-               ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+               ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_matcher) {
-               ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+               ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
                if (ret < 0)
                        return ret;
        }
 
        list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
-               ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+               ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
                if (ret < 0)
                        return ret;
        }
@@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
 }
 
 static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+                    struct mlx5dr_match_param *mask,
                     u8 criteria, const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        char dump[DR_HEX_SIZE];
        int ret;
 
@@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
 }
 
 static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+                       struct mlx5dr_ste_build *builder,
                        u32 index, bool is_rx, const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
 }
 
 static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
                      struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
                      const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        enum dr_dump_rec_type rec_type;
        u64 s_icm_addr, e_icm_addr;
        int i, ret;
@@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
                return ret;
 
        for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
-               ret = dr_dump_matcher_builder(file,
+               ret = dr_dump_matcher_builder(file, buff,
                                              &matcher_rx_tx->ste_builder[i],
                                              i, is_rx, matcher_id);
                if (ret < 0)
@@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
        return 0;
 }
 
-static int
+static noinline_for_stack int
 dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
 {
        struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
@@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
        if (ret)
                return ret;
 
-       ret = dr_dump_matcher_mask(file, &matcher->mask,
+       ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
                                   matcher->match_criteria, matcher_id);
        if (ret < 0)
                return ret;
 
        if (rx->nic_tbl) {
-               ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+               ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_tbl) {
-               ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+               ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
                if (ret < 0)
                        return ret;
        }
@@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
 }
 
 static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
                    struct mlx5dr_table_rx_tx *table_rx_tx,
                    const u64 table_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        enum dr_dump_rec_type rec_type;
        u64 s_icm_addr;
        int ret;
@@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
        return 0;
 }
 
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
 {
        struct mlx5dr_table_rx_tx *rx = &table->rx;
        struct mlx5dr_table_rx_tx *tx = &table->tx;
@@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
                return ret;
 
        if (rx->nic_dmn) {
-               ret = dr_dump_table_rx_tx(file, true, rx,
+               ret = dr_dump_table_rx_tx(file, buff, true, rx,
                                          DR_DBG_PTR_TO_ID(table));
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_dmn) {
-               ret = dr_dump_table_rx_tx(file, false, tx,
+               ret = dr_dump_table_rx_tx(file, buff, false, tx,
                                          DR_DBG_PTR_TO_ID(table));
                if (ret < 0)
                        return ret;
@@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
 }
 
 static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+                 struct mlx5dr_send_ring *ring,
                  const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
        return 0;
 }
 
-static noinline_for_stack int
+static int
 dr_dump_domain_info_flex_parser(struct seq_file *file,
+                               char *buff,
                                const char *flex_parser_name,
                                const u8 flex_parser_value,
                                const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file,
        return 0;
 }
 
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+                        struct mlx5dr_cmd_caps *caps,
                         const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        struct mlx5dr_cmd_vport_cap *vport_caps;
        unsigned long i, vports_num;
        int ret;
@@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
 }
 
 static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+                   struct mlx5dr_domain_info *info,
                    const u64 domain_id)
 {
        int ret;
 
-       ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+       ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
                                              info->caps.flex_parser_id_icmp_dw0,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
                                              info->caps.flex_parser_id_icmp_dw1,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
                                              info->caps.flex_parser_id_icmpv6_dw0,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
                                              info->caps.flex_parser_id_icmpv6_dw1,
                                              domain_id);
        if (ret < 0)
@@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
        if (ret)
                return ret;
 
-       ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+       ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
        if (ret < 0)
                return ret;
 
        if (dmn->info.supp_sw_steering) {
-               ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+               ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
                if (ret < 0)
                        return ret;
        }
index e5ec0a363aff84d44470b8cf2374ef36a98c6b06..31f75b4a67fd79eb1c7c08096a5a11a163a96b73 100644 (file)
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
  * @rdfifo: FIFO read callback
  * @wrfifo: FIFO write callback
  * @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
  * @flush_tx_work: flush_tx_work() implementation callback
  *
  * The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
                                          struct sk_buff *txp, bool irq);
        netdev_tx_t             (*start_xmit)(struct sk_buff *skb,
                                              struct net_device *dev);
-       void                    (*rx_skb)(struct ks8851_net *ks,
-                                         struct sk_buff *skb);
        void                    (*flush_tx_work)(struct ks8851_net *ks);
 };
 
index 0bf13b38b8f5b907b464649422331421337d8411..d4cdf3d4f55257ad5ace878f87abf5d96f67b001 100644 (file)
@@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
                   rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
 }
 
-/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       ks->rx_skb(ks, skb);
-}
-
 /**
  * ks8851_rx_pkts - receive packets from the host
  * @ks: The device information.
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
                                        ks8851_dbg_dumpkkt(ks, rxpkt);
 
                                skb->protocol = eth_type_trans(skb, ks->netdev);
-                               ks8851_rx_skb(ks, skb);
+                               __netif_rx(skb);
 
                                ks->netdev->stats.rx_packets++;
                                ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
        unsigned long flags;
        unsigned int status;
 
+       local_bh_disable();
+
        ks8851_lock(ks, &flags);
 
        status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
        if (status & IRQ_LCI)
                mii_check_link(&ks->mii);
 
+       local_bh_enable();
+
        return IRQ_HANDLED;
 }
 
index 2a7f29854267030d3503a477fc5aef361761cdd8..381b9cd285ebd0bd3f5cf1e568e9b23cf0ba9c85 100644 (file)
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
        iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
 }
 
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       netif_rx(skb);
-}
-
 static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
 {
        return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
        ks->rdfifo = ks8851_rdfifo_par;
        ks->wrfifo = ks8851_wrfifo_par;
        ks->start_xmit = ks8851_start_xmit_par;
-       ks->rx_skb = ks8851_rx_skb_par;
 
 #define STD_IRQ (IRQ_LCI |     /* Link Change */       \
                 IRQ_RXI |      /* RX done */           \
index 2f803377c9f9dd916153fbac269423006959e239..670c1de966db88030332a3da1d99416284a757a2 100644 (file)
@@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len)
        return ALIGN(len + 4, 4);
 }
 
-/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       netif_rx(skb);
-}
-
 /**
  * ks8851_tx_work - process tx packet(s)
  * @work: The work strucutre what was scheduled.
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
        ks->rdfifo = ks8851_rdfifo_spi;
        ks->wrfifo = ks8851_wrfifo_spi;
        ks->start_xmit = ks8851_start_xmit_spi;
-       ks->rx_skb = ks8851_rx_skb_spi;
        ks->flush_tx_work = ks8851_flush_tx_work_spi;
 
 #define STD_IRQ (IRQ_LCI |     /* Link Change */       \
index 3a1b1a1f5a1951069f9c3e5ee5e3a10c1be55eb6..60dd2fd603a8554f02f5d649e8d290dc074b5d72 100644 (file)
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
        bool sgmii = false, inband_aneg = false;
        int err;
 
-       if (port->conf.inband) {
+       if (conf->inband) {
                if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
                    conf->portmode == PHY_INTERFACE_MODE_QSGMII)
                        inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
        if (err)
                return -EINVAL;
 
-       if (port->conf.inband) {
+       if (conf->inband) {
                /* Enable/disable 1G counters in ASM */
                spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
                         ASM_PORT_CFG_CSC_STAT_DIS,
index 523e0c470894f7fdcf8a995fb821ff146f08fcd9..55f255a3c9db69b92d5743bd42c34cbaba46a0a8 100644 (file)
@@ -36,6 +36,27 @@ struct sparx5_tc_flower_template {
        u16 l3_proto; /* protocol specified in the template */
 };
 
+/* SparX-5 VCAP fragment types:
+ * 0 = no fragment, 1 = initial fragment,
+ * 2 = suspicious fragment, 3 = valid follow-up fragment
+ */
+enum {                   /* key / mask */
+       FRAG_NOT   = 0x03, /* 0 / 3 */
+       FRAG_SOME  = 0x11, /* 1 / 1 */
+       FRAG_FIRST = 0x13, /* 1 / 3 */
+       FRAG_LATER = 0x33, /* 3 / 3 */
+       FRAG_INVAL = 0xff, /* invalid */
+};
+
+/* Flower fragment flag to VCAP fragment type mapping */
+static const u8 sparx5_vcap_frag_map[4][4] = {           /* is_frag */
+       { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */
+       { FRAG_NOT,   FRAG_NOT,   FRAG_INVAL, FRAG_INVAL }, /* 0/1 */
+       { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */
+       { FRAG_SOME,  FRAG_LATER, FRAG_INVAL, FRAG_FIRST }  /* 1/1 */
+       /* 0/0        0/1         1/0         1/1 <-- first_frag */
+};
+
 static int
 sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
 {
@@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st)
        flow_rule_match_control(st->frule, &mt);
 
        if (mt.mask->flags) {
-               if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) {
-                       if (mt.key->flags & FLOW_DIS_FIRST_FRAG) {
-                               value = 1; /* initial fragment */
-                               mask = 0x3;
-                       } else {
-                               if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
-                                       value = 3; /* follow up fragment */
-                                       mask = 0x3;
-                               } else {
-                                       value = 0; /* no fragment */
-                                       mask = 0x3;
-                               }
-                       }
-               } else {
-                       if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
-                               value = 3; /* follow up fragment */
-                               mask = 0x3;
-                       } else {
-                               value = 0; /* no fragment */
-                               mask = 0x3;
-                       }
+               u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT);
+               u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT);
+               u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask;
+
+               u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG);
+               u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG);
+               u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask;
+
+               /* Lookup verdict based on the 2 + 2 input bits */
+               u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx];
+
+               if (vdt == FRAG_INVAL) {
+                       NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+                                          "Match on invalid fragment flag combination");
+                       return -EINVAL;
                }
 
+               /* Extract VCAP fragment key and mask from verdict */
+               value = (vdt >> 4) & 0x3;
+               mask = vdt & 0x3;
+
                err = vcap_rule_add_key_u32(st->vrule,
                                            VCAP_KF_L3_FRAGMENT_TYPE,
                                            value, mask);
index 4c043052198d470ce0a2f82dbe1f3be67b35c827..00882ffc7a029ef63c1c8605541f86339999e724 100644 (file)
@@ -73,6 +73,7 @@ enum mac_version {
 };
 
 struct rtl8169_private;
+struct r8169_led_classdev;
 
 void r8169_apply_firmware(struct rtl8169_private *tp);
 u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
@@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
                        char *buf, int buf_len);
 int rtl8168_get_led_mode(struct rtl8169_private *tp);
 int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
 int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
 int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
index 7c5dc9d0df855ef57592b7a25d4357232279a60f..e10bee706bc691b8c32ec2410baa8d4279de69a0 100644 (file)
@@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
        led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
 
        /* ignore errors */
-       devm_led_classdev_register(&ndev->dev, led_cdev);
+       led_classdev_register(&ndev->dev, led_cdev);
 }
 
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
 {
-       /* bind resource mgmt to netdev */
-       struct device *dev = &ndev->dev;
        struct r8169_led_classdev *leds;
        int i;
 
-       leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+       leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
        if (!leds)
-               return;
+               return NULL;
 
        for (i = 0; i < RTL8168_NUM_LEDS; i++)
                rtl8168_setup_ldev(leds + i, ndev, i);
+
+       return leds;
 }
 
 static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
@@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
        led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
 
        /* ignore errors */
-       devm_led_classdev_register(&ndev->dev, led_cdev);
+       led_classdev_register(&ndev->dev, led_cdev);
 }
 
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
 {
-       /* bind resource mgmt to netdev */
-       struct device *dev = &ndev->dev;
        struct r8169_led_classdev *leds;
        int i;
 
-       leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+       leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
        if (!leds)
-               return;
+               return NULL;
 
        for (i = 0; i < RTL8125_NUM_LEDS; i++)
                rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+       return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+       if (!leds)
+               return;
+
+       for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+               led_classdev_unregister(&l->led);
+
+       kfree(leds);
 }
index 6f1e6f386b7ba7277bc211765b8ef49d9fcd6750..0fc5fe564ae50be28bc6568f90d339d840a4b8d1 100644 (file)
@@ -647,6 +647,8 @@ struct rtl8169_private {
        const char *fw_name;
        struct rtl_fw *rtl_fw;
 
+       struct r8169_led_classdev *leds;
+
        u32 ocp_base;
 };
 
@@ -5044,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
 
        cancel_work_sync(&tp->wk.work);
 
+       if (IS_ENABLED(CONFIG_R8169_LEDS))
+               r8169_remove_leds(tp->leds);
+
        unregister_netdev(tp->dev);
 
        if (tp->dash_type != RTL_DASH_NONE)
@@ -5501,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        if (IS_ENABLED(CONFIG_R8169_LEDS)) {
                if (rtl_is_8125(tp))
-                       rtl8125_init_leds(dev);
+                       tp->leds = rtl8125_init_leds(dev);
                else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
-                       rtl8168_init_leds(dev);
+                       tp->leds = rtl8168_init_leds(dev);
        }
 
        netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
index ba01c8cc3c906d5ea9a02029dc76fabc243b277c..fcb756d77681cbaf2a17d3e21ddffa5543bc9a84 100644 (file)
@@ -769,25 +769,28 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
        dma_addr_t dma_addr;
        int rx_packets = 0;
        u8  desc_status;
-       u16 pkt_len;
+       u16 desc_len;
        u8  die_dt;
        int entry;
        int limit;
        int i;
 
-       entry = priv->cur_rx[q] % priv->num_rx_ring[q];
        limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
        stats = &priv->stats[q];
 
-       desc = &priv->rx_ring[q].desc[entry];
-       for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
+       for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+               entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+               desc = &priv->rx_ring[q].desc[entry];
+               if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+                       break;
+
                /* Descriptor type must be checked before all other reads */
                dma_rmb();
                desc_status = desc->msc;
-               pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
+               desc_len = le16_to_cpu(desc->ds_cc) & RX_DS;
 
                /* We use 0-byte descriptors to mark the DMA mapping errors */
-               if (!pkt_len)
+               if (!desc_len)
                        continue;
 
                if (desc_status & MSC_MC)
@@ -808,25 +811,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                        switch (die_dt) {
                        case DT_FSINGLE:
                                skb = ravb_get_skb_gbeth(ndev, entry, desc);
-                               skb_put(skb, pkt_len);
+                               skb_put(skb, desc_len);
                                skb->protocol = eth_type_trans(skb, ndev);
                                if (ndev->features & NETIF_F_RXCSUM)
                                        ravb_rx_csum_gbeth(skb);
                                napi_gro_receive(&priv->napi[q], skb);
                                rx_packets++;
-                               stats->rx_bytes += pkt_len;
+                               stats->rx_bytes += desc_len;
                                break;
                        case DT_FSTART:
                                priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                break;
                        case DT_FMID:
                                skb = ravb_get_skb_gbeth(ndev, entry, desc);
                                skb_copy_to_linear_data_offset(priv->rx_1st_skb,
                                                               priv->rx_1st_skb->len,
                                                               skb->data,
-                                                              pkt_len);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                                                              desc_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                dev_kfree_skb(skb);
                                break;
                        case DT_FEND:
@@ -834,23 +837,20 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                                skb_copy_to_linear_data_offset(priv->rx_1st_skb,
                                                               priv->rx_1st_skb->len,
                                                               skb->data,
-                                                              pkt_len);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                                                              desc_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                dev_kfree_skb(skb);
                                priv->rx_1st_skb->protocol =
                                        eth_type_trans(priv->rx_1st_skb, ndev);
                                if (ndev->features & NETIF_F_RXCSUM)
-                                       ravb_rx_csum_gbeth(skb);
+                                       ravb_rx_csum_gbeth(priv->rx_1st_skb);
+                               stats->rx_bytes += priv->rx_1st_skb->len;
                                napi_gro_receive(&priv->napi[q],
                                                 priv->rx_1st_skb);
                                rx_packets++;
-                               stats->rx_bytes += pkt_len;
                                break;
                        }
                }
-
-               entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
-               desc = &priv->rx_ring[q].desc[entry];
        }
 
        /* Refill the RX ring buffers. */
@@ -891,30 +891,29 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        const struct ravb_hw_info *info = priv->info;
-       int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
-       int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
-                       priv->cur_rx[q];
        struct net_device_stats *stats = &priv->stats[q];
        struct ravb_ex_rx_desc *desc;
+       unsigned int limit, i;
        struct sk_buff *skb;
        dma_addr_t dma_addr;
        struct timespec64 ts;
+       int rx_packets = 0;
        u8  desc_status;
        u16 pkt_len;
-       int limit;
+       int entry;
+
+       limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+       for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+               entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+               desc = &priv->rx_ring[q].ex_desc[entry];
+               if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+                       break;
 
-       boguscnt = min(boguscnt, *quota);
-       limit = boguscnt;
-       desc = &priv->rx_ring[q].ex_desc[entry];
-       while (desc->die_dt != DT_FEMPTY) {
                /* Descriptor type must be checked before all other reads */
                dma_rmb();
                desc_status = desc->msc;
                pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
 
-               if (--boguscnt < 0)
-                       break;
-
                /* We use 0-byte descriptors to mark the DMA mapping errors */
                if (!pkt_len)
                        continue;
@@ -960,12 +959,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
                        if (ndev->features & NETIF_F_RXCSUM)
                                ravb_rx_csum(skb);
                        napi_gro_receive(&priv->napi[q], skb);
-                       stats->rx_packets++;
+                       rx_packets++;
                        stats->rx_bytes += pkt_len;
                }
-
-               entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
-               desc = &priv->rx_ring[q].ex_desc[entry];
        }
 
        /* Refill the RX ring buffers. */
@@ -995,9 +991,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
                desc->die_dt = DT_FEMPTY;
        }
 
-       *quota -= limit - (++boguscnt);
-
-       return boguscnt <= 0;
+       stats->rx_packets += rx_packets;
+       *quota -= rx_packets;
+       return *quota == 0;
 }
 
 /* Packet receive function for Ethernet AVB */
index a6fefe675ef1520566ccdcafaac705f0ee159e42..3b7d4ac1e7be07cb2a0fc796f73b671ed535f01d 100644 (file)
@@ -553,6 +553,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp;
 extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
 
 struct mac_link {
+       u32 caps;
        u32 speed_mask;
        u32 speed10;
        u32 speed100;
index b21d99faa2d04c985427af61724dd073e3a2fe79..e1b761dcfa1dd56f2e5218312933eb1ea6bc06b1 100644 (file)
@@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
 
        priv->dev->priv_flags |= IFF_UNICAST_FLT;
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000;
        /* The loopback bit seems to be re-set when link change
         * Simply mask it each time
         * Speed 10/100/1000 are set in BIT(2)/BIT(3)
index 3927609abc44110be97903aee12e25084473b80c..8555299443f4edf2475b95c1785544a1c3b73251 100644 (file)
@@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000;
        mac->link.duplex = GMAC_CONTROL_DM;
        mac->link.speed10 = GMAC_CONTROL_PS;
        mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
index a6e8d7bd95886fc277c7e22c896ddf618e0fca97..7667d103cd0ebd9670a42360a095cfd322c8ebac 100644 (file)
@@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv)
        dev_info(priv->device, "\tDWMAC100\n");
 
        mac->pcsr = priv->ioaddr;
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100;
        mac->link.duplex = MAC_CONTROL_F;
        mac->link.speed10 = 0;
        mac->link.speed100 = 0;
index cef25efbdff99fdc07a313ab678869e83c85f79e..a38226d7cc6a99e45c39f62c81c56d8dc87a921a 100644 (file)
@@ -70,7 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
 
 static void dwmac4_phylink_get_caps(struct stmmac_priv *priv)
 {
-       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+       if (priv->plat->tx_queues_to_use > 1)
+               priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+       else
+               priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD);
 }
 
 static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -1378,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
        mac->link.duplex = GMAC_CONFIG_DM;
        mac->link.speed10 = GMAC_CONFIG_PS;
        mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
index e841e312077ef0604c5b17a5473069cc4affadff..f8e7775bb63364c589da99cb4c954a38f4411567 100644 (file)
@@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
        writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN);
 }
 
-static void xgmac_phylink_get_caps(struct stmmac_priv *priv)
-{
-       priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD |
-                                                MAC_10000FD | MAC_25000FD |
-                                                MAC_40000FD | MAC_50000FD |
-                                                MAC_100000FD;
-}
-
 static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable)
 {
        u32 tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -1540,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *
 
 const struct stmmac_ops dwxgmac210_ops = {
        .core_init = dwxgmac2_core_init,
-       .phylink_get_caps = xgmac_phylink_get_caps,
        .set_mac = dwxgmac2_set_mac,
        .rx_ipc = dwxgmac2_rx_ipc,
        .rx_queue_enable = dwxgmac2_rx_queue_enable,
@@ -1601,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode,
 
 const struct stmmac_ops dwxlgmac2_ops = {
        .core_init = dwxgmac2_core_init,
-       .phylink_get_caps = xgmac_phylink_get_caps,
        .set_mac = dwxgmac2_set_mac,
        .rx_ipc = dwxgmac2_rx_ipc,
        .rx_queue_enable = dwxlgmac2_rx_queue_enable,
@@ -1661,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_1000FD | MAC_2500FD | MAC_5000FD |
+                        MAC_10000FD;
        mac->link.duplex = 0;
        mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
        mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
@@ -1698,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_1000FD | MAC_2500FD | MAC_5000FD |
+                        MAC_10000FD | MAC_25000FD |
+                        MAC_40000FD | MAC_50000FD |
+                        MAC_100000FD;
        mac->link.duplex = 0;
        mac->link.speed1000 = XLGMAC_CONFIG_SS_1000;
        mac->link.speed2500 = XLGMAC_CONFIG_SS_2500;
index dff02d75d519713e61ffa5db651ece91242aece0..5d1ea3e07459a390f2d561b4147c7c0c3f33e4ce 100644 (file)
@@ -52,6 +52,7 @@ struct stmmac_counters {
        unsigned int mmc_tx_excessdef;
        unsigned int mmc_tx_pause_frame;
        unsigned int mmc_tx_vlan_frame_g;
+       unsigned int mmc_tx_oversize_g;
        unsigned int mmc_tx_lpi_usec;
        unsigned int mmc_tx_lpi_tran;
 
@@ -80,6 +81,7 @@ struct stmmac_counters {
        unsigned int mmc_rx_fifo_overflow;
        unsigned int mmc_rx_vlan_frames_gb;
        unsigned int mmc_rx_watchdog_error;
+       unsigned int mmc_rx_error;
        unsigned int mmc_rx_lpi_usec;
        unsigned int mmc_rx_lpi_tran;
        unsigned int mmc_rx_discard_frames_gb;
index 7eb477faa75a3853e7698e5a4aab3376d5d162e9..0fab842902a850022a3be368d4972e4f4e9bcdc9 100644 (file)
@@ -53,6 +53,7 @@
 #define MMC_TX_EXCESSDEF               0x6c
 #define MMC_TX_PAUSE_FRAME             0x70
 #define MMC_TX_VLAN_FRAME_G            0x74
+#define MMC_TX_OVERSIZE_G              0x78
 
 /* MMC RX counter registers */
 #define MMC_RX_FRAMECOUNT_GB           0x80
 #define MMC_RX_FIFO_OVERFLOW           0xd4
 #define MMC_RX_VLAN_FRAMES_GB          0xd8
 #define MMC_RX_WATCHDOG_ERROR          0xdc
+#define MMC_RX_ERROR                   0xe0
+
+#define MMC_TX_LPI_USEC                        0xec
+#define MMC_TX_LPI_TRAN                        0xf0
+#define MMC_RX_LPI_USEC                        0xf4
+#define MMC_RX_LPI_TRAN                        0xf8
+
 /* IPC*/
 #define MMC_RX_IPC_INTR_MASK           0x100
 #define MMC_RX_IPC_INTR                        0x108
@@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
        mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
        mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
        mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+       mmc->mmc_tx_oversize_g   += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+       mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+       mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
 
        /* MMC RX counter registers */
        mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
@@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
        mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
        mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
        mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+       mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+       mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+       mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
        /* IPv4 */
        mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
        mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
index e1537a57815f387082acdb954ee6b40d397990c0..542e2633a6f52223bf15ef31fb17d2377fb4583c 100644 (file)
@@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
        STMMAC_MMC_STAT(mmc_tx_excessdef),
        STMMAC_MMC_STAT(mmc_tx_pause_frame),
        STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+       STMMAC_MMC_STAT(mmc_tx_oversize_g),
        STMMAC_MMC_STAT(mmc_tx_lpi_usec),
        STMMAC_MMC_STAT(mmc_tx_lpi_tran),
        STMMAC_MMC_STAT(mmc_rx_framecount_gb),
@@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
        STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
        STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
        STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+       STMMAC_MMC_STAT(mmc_rx_error),
        STMMAC_MMC_STAT(mmc_rx_lpi_usec),
        STMMAC_MMC_STAT(mmc_rx_lpi_tran),
        STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
index 24cd80490d19cf86c2cd566b81f13b137109d784..7c6fb14b555508e4461980f99843ac461b323239 100644 (file)
@@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev)
        return ret;
 }
 
-static void stmmac_set_half_duplex(struct stmmac_priv *priv)
-{
-       /* Half-Duplex can only work with single tx queue */
-       if (priv->plat->tx_queues_to_use > 1)
-               priv->phylink_config.mac_capabilities &=
-                       ~(MAC_10HD | MAC_100HD | MAC_1000HD);
-       else
-               priv->phylink_config.mac_capabilities |=
-                       (MAC_10HD | MAC_100HD | MAC_1000HD);
-}
-
 static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
        struct stmmac_mdio_bus_data *mdio_bus_data;
@@ -1236,15 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
                xpcs_get_interfaces(priv->hw->xpcs,
                                    priv->phylink_config.supported_interfaces);
 
-       priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
-                                               MAC_10FD | MAC_100FD |
-                                               MAC_1000FD;
-
-       stmmac_set_half_duplex(priv);
-
        /* Get the MAC specific capabilities */
        stmmac_mac_phylink_get_caps(priv);
 
+       priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
        max_speed = priv->plat->max_speed;
        if (max_speed)
                phylink_limit_mac_speed(&priv->phylink_config, max_speed);
@@ -7342,6 +7327,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
        int ret = 0, i;
+       int max_speed;
 
        if (netif_running(dev))
                stmmac_release(dev);
@@ -7355,7 +7341,14 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
                        priv->rss.table[i] = ethtool_rxfh_indir_default(i,
                                                                        rx_cnt);
 
-       stmmac_set_half_duplex(priv);
+       stmmac_mac_phylink_get_caps(priv);
+
+       priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
+       max_speed = priv->plat->max_speed;
+       if (max_speed)
+               phylink_limit_mac_speed(&priv->phylink_config, max_speed);
+
        stmmac_napi_add(dev);
 
        if (netif_running(dev))
index 2939a21ca74f3cf0f627981df74a949e9c61011e..1d00e21808c1c36dde2fcd4e6a864ca1ecf72a0b 100644 (file)
@@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
 
 static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
 {
+       struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns;
+       struct am65_cpsw_tx_chn *tx_chan = common->tx_chns;
        struct device *dev = common->dev;
        struct am65_cpsw_port *port;
        int ret = 0, i;
@@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
        if (ret)
                return ret;
 
+       /* The DMA Channels are not guaranteed to be in a clean state.
+        * Reset and disable them to ensure that they are back to the
+        * clean state and ready to be used.
+        */
+       for (i = 0; i < common->tx_ch_num; i++) {
+               k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i],
+                                         am65_cpsw_nuss_tx_cleanup);
+               k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
+       }
+
+       for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+               k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
+                                         am65_cpsw_nuss_rx_cleanup, !!i);
+
+       k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
+
        ret = am65_cpsw_nuss_register_devlink(common);
        if (ret)
                return ret;
index 2f6739fe78af2e8e90c0a3b474c2e99c83e02994..6c2835086b57eacbcddb44a3c507e26d5a944427 100644 (file)
@@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        __be16 sport;
        int err;
 
-       if (!pskb_inet_may_pull(skb))
+       if (!skb_vlan_inet_prepare(skb))
                return -EINVAL;
 
        if (!gs4)
@@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        __be16 sport;
        int err;
 
-       if (!pskb_inet_may_pull(skb))
+       if (!skb_vlan_inet_prepare(skb))
                return -EINVAL;
 
        if (!gs6)
index a6fcbda64ecc60e5beccf20f2043ab00870cbd5d..2b6ec979a62f2160a7187e024a4b0dc6bf9e08da 100644 (file)
@@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head)
        int i;
 
        kfree(nvdev->extension);
-       vfree(nvdev->recv_buf);
-       vfree(nvdev->send_buf);
+
+       if (!nvdev->recv_buf_gpadl_handle.decrypted)
+               vfree(nvdev->recv_buf);
+       if (!nvdev->send_buf_gpadl_handle.decrypted)
+               vfree(nvdev->send_buf);
        bitmap_free(nvdev->send_section_map);
 
        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
index 0b3f21cba552f27221a2c7fbe8147feb34e69724..92da8c03d960c9beca5b425a1e3d366f37fa21fc 100644 (file)
@@ -2125,14 +2125,16 @@ static ssize_t tun_put_user(struct tun_struct *tun,
                                            tun_is_little_endian(tun), true,
                                            vlan_hlen)) {
                        struct skb_shared_info *sinfo = skb_shinfo(skb);
-                       pr_err("unexpected GSO type: "
-                              "0x%x, gso_size %d, hdr_len %d\n",
-                              sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
-                              tun16_to_cpu(tun, gso.hdr_len));
-                       print_hex_dump(KERN_ERR, "tun: ",
-                                      DUMP_PREFIX_NONE,
-                                      16, 1, skb->head,
-                                      min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+
+                       if (net_ratelimit()) {
+                               netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+                                          sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+                                          tun16_to_cpu(tun, gso.hdr_len));
+                               print_hex_dump(KERN_ERR, "tun: ",
+                                              DUMP_PREFIX_NONE,
+                                              16, 1, skb->head,
+                                              min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+                       }
                        WARN_ON_ONCE(1);
                        return -EINVAL;
                }
index a9c418890a1cacc584c9265f4716fa2e18fe0e4b..752f821a19901f313a1aca51fe332539ce82385b 100644 (file)
@@ -1317,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 
        netif_set_tso_max_size(dev->net, 16384);
 
+       ax88179_reset(dev);
+
        return 0;
 }
 
@@ -1695,7 +1697,6 @@ static const struct driver_info ax88179_info = {
        .unbind = ax88179_unbind,
        .status = ax88179_status,
        .link_reset = ax88179_link_reset,
-       .reset = ax88179_reset,
        .stop = ax88179_stop,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX,
        .rx_fixup = ax88179_rx_fixup,
@@ -1708,7 +1709,6 @@ static const struct driver_info ax88178a_info = {
        .unbind = ax88179_unbind,
        .status = ax88179_status,
        .link_reset = ax88179_link_reset,
-       .reset = ax88179_reset,
        .stop = ax88179_stop,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX,
        .rx_fixup = ax88179_rx_fixup,
index e2e181378f4124c64b1d02bbe910f6209b57a356..edc34402e787f9ff84a345ecb892cc7b720ef312 100644 (file)
@@ -1431,6 +1431,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x2692, 0x9025, 4)},    /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
        {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */
        {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+       {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */
 
        /* 4. Gobi 1000 devices */
        {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
index c22d1118a13333702c41d0b11148eb067700965b..115c3c5414f2a7aa4f2d8bae0aae96ab574f3235 100644 (file)
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
                            struct netlink_ext_ack *extack)
 {
        struct virtnet_info *vi = netdev_priv(dev);
+       bool update = false;
        int i;
 
        if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
@@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev,
                return -EOPNOTSUPP;
 
        if (rxfh->indir) {
+               if (!vi->has_rss)
+                       return -EOPNOTSUPP;
+
                for (i = 0; i < vi->rss_indir_table_size; ++i)
                        vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+               update = true;
        }
-       if (rxfh->key)
+
+       if (rxfh->key) {
+               /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+                * device provides hash calculation capabilities, that is,
+                * hash_key is configured.
+                */
+               if (!vi->has_rss && !vi->has_rss_hash_report)
+                       return -EOPNOTSUPP;
+
                memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+               update = true;
+       }
 
-       virtnet_commit_rss_command(vi);
+       if (update)
+               virtnet_commit_rss_command(vi);
 
        return 0;
 }
@@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
                vi->has_rss_hash_report = true;
 
-       if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
                vi->has_rss = true;
 
-       if (vi->has_rss || vi->has_rss_hash_report) {
                vi->rss_indir_table_size =
                        virtio_cread16(vdev, offsetof(struct virtio_net_config,
                                rss_max_indirection_table_length));
+       }
+
+       if (vi->has_rss || vi->has_rss_hash_report) {
                vi->rss_key_size =
                        virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
 
index bf4833221816d492d4adca02d508d33a74879a92..eff7f5df08e27fb25909999a89c6742785038b75 100644 (file)
@@ -3765,14 +3765,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
 
-/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan.  Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
 /*
  * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
  * automatically disables LTSSM when Secondary Bus Reset is received and
index 8ea867c2a01a371a64e1eb10327931861d306dc8..62bc24f6dcc7a82cb11361b133d65bab77d90152 100644 (file)
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
        if (!ec_dev)
                return -ENOMEM;
 
-       ret = devm_serdev_device_open(dev, serdev);
-       if (ret) {
-               dev_err(dev, "Unable to open UART device");
-               return ret;
-       }
-
        serdev_device_set_drvdata(serdev, ec_dev);
        init_waitqueue_head(&ec_uart->response.wait_queue);
 
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
                return ret;
        }
 
-       ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
-       if (ret < 0) {
-               dev_err(dev, "Failed to set up host baud rate (%d)", ret);
-               return ret;
-       }
-
-       serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
        /* Initialize ec_dev for cros_ec  */
        ec_dev->phys_name = dev_name(dev);
        ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
 
        serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
 
+       ret = devm_serdev_device_open(dev, serdev);
+       if (ret) {
+               dev_err(dev, "Unable to open UART device");
+               return ret;
+       }
+
+       ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+       if (ret < 0) {
+               dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+               return ret;
+       }
+
+       serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
        return cros_ec_register(ec_dev);
 }
 
index ee2e164f86b9c2973e317bfbfe3297571a8cab48..38c932df6446ac5714d225ac0545ff16345a6e27 100644 (file)
@@ -597,6 +597,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
                },
                .driver_data = &quirk_acer_predator_v4,
        },
+       {
+               .callback = dmi_matched,
+               .ident = "Acer Predator PH18-71",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+               },
+               .driver_data = &quirk_acer_predator_v4,
+       },
        {
                .callback = set_force_caps,
                .ident = "Acer Aspire Switch 10E SW3-016",
index b456370166b6bb2158ca0916e0eb9e106f9fd9d7..b4f49720c87f62aa6e8349af12797382f740c2b7 100644 (file)
@@ -208,6 +208,15 @@ static const struct dmi_system_id fwbug_list[] = {
                        DMI_MATCH(DMI_BIOS_VERSION, "03.03"),
                }
        },
+       {
+               .ident = "Framework Laptop 13 (Phoenix)",
+               .driver_data = &quirk_spurious_8042,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
+               }
+       },
        {}
 };
 
index 6b26e48ce8ad2a5f4de6e78751ffec8941610336..7d6079b02589cbacbb203bdc42cfac4e42dd601c 100644 (file)
@@ -7,4 +7,4 @@
 obj-$(CONFIG_AMD_PMF) += amd-pmf.o
 amd-pmf-objs := core.o acpi.o sps.o \
                auto-mode.o cnqf.o \
-               tee-if.o spc.o
+               tee-if.o spc.o pmf-quirks.o
index d0cf46e2fc8e8a073149c61c52b27e9cc9051da6..1157ec148880b54ec145a7ed9353a656e36f0b33 100644 (file)
@@ -343,7 +343,10 @@ static int apmf_if_verify_interface(struct amd_pmf_dev *pdev)
        if (err)
                return err;
 
-       pdev->supported_func = output.supported_functions;
+       /* only set if not already set by a quirk */
+       if (!pdev->supported_func)
+               pdev->supported_func = output.supported_functions;
+
        dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n",
                output.supported_functions, output.notification_mask, output.version);
 
@@ -437,7 +440,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev)
 
        status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev);
        if (ACPI_FAILURE(status)) {
-               dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
+               dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
                return -EINVAL;
        }
 
index 5d4f80698a8b8824bdb59b4e5632ca5f05982c48..64e6e34a2a9acd954f4ce9a916f77673193aba06 100644 (file)
@@ -445,6 +445,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
        mutex_init(&dev->lock);
        mutex_init(&dev->update_mutex);
 
+       amd_pmf_quirks_init(dev);
        apmf_acpi_init(dev);
        platform_set_drvdata(pdev, dev);
        amd_pmf_dbgfs_register(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c
new file mode 100644 (file)
index 0000000..0b2eb0a
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Platform Management Framework Driver Quirks
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+
+#include "pmf.h"
+
+struct quirk_entry {
+       u32 supported_func;
+};
+
+static struct quirk_entry quirk_no_sps_bug = {
+       .supported_func = 0x4003,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+       {
+               .ident = "ROG Zephyrus G14",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"),
+               },
+               .driver_data = &quirk_no_sps_bug,
+       },
+       {}
+};
+
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev)
+{
+       const struct dmi_system_id *dmi_id;
+       struct quirk_entry *quirks;
+
+       dmi_id = dmi_first_match(fwbug_list);
+       if (!dmi_id)
+               return;
+
+       quirks = dmi_id->driver_data;
+       if (quirks->supported_func) {
+               dev->supported_func = quirks->supported_func;
+               pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n",
+                       dmi_id->ident);
+       }
+}
+
index 8c4df5753f40d48fefc05c6373a64d0a00469149..eeedd0c0395a89704ce360a6aff9f827566b17b2 100644 (file)
@@ -720,4 +720,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev);
 void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
 void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
 
+/* Quirk infrastructure */
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev);
+
 #endif /* PMF_H */
index 7457ca2b27a60b7adadcebb251dba45a0e675e97..c7a8276458640adc888f99fee23fcc10b5ddf2e0 100644 (file)
@@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = {
        {"INTC1076", 0},
        {"INTC1077", 0},
        {"INTC1078", 0},
+       {"INTC107B", 0},
+       {"INTC10CB", 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
@@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
        struct platform_device *device = context;
        struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
        unsigned long long ev_index;
+       struct key_entry *ke;
        int err;
 
        /*
@@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
                if (event == 0xc0 || !priv->array)
                        return;
 
-               if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+               ke = sparse_keymap_entry_from_scancode(priv->array, event);
+               if (!ke) {
                        dev_info(&device->dev, "unknown event 0x%x\n", event);
                        return;
                }
 
+               if (ke->type == KE_IGNORE)
+                       return;
+
 wakeup:
                pm_wakeup_hard_event(&device->dev);
 
index 08df9494603c5e2acf152aacfe13fce81a18dc2c..30951f7131cd98bfdaffb70b2aa30ee3ceb7dbdd 100644 (file)
@@ -719,6 +719,7 @@ static struct miscdevice isst_if_char_driver = {
 };
 
 static const struct x86_cpu_id hpm_cpu_ids[] = {
+       X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D,     NULL),
        X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,     NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,    NULL),
        {}
index bd75d61ff8a66196d620b5ca2824d8bb16332237..ef730200a04bd94682c781be092a43f15f88190e 100644 (file)
@@ -29,7 +29,7 @@
 #include "uncore-frequency-common.h"
 
 #define        UNCORE_MAJOR_VERSION            0
-#define        UNCORE_MINOR_VERSION            1
+#define        UNCORE_MINOR_VERSION            2
 #define UNCORE_HEADER_INDEX            0
 #define UNCORE_FABRIC_CLUSTER_OFFSET   8
 
@@ -329,7 +329,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
                        goto remove_clusters;
                }
 
-               if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION)
+               if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION)
                        dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n",
                                 TPMI_MINOR_VERSION(pd_info->ufs_header_ver));
 
index 084c355c86f5fa9050ccb881a7efa6682b538773..79bb2c801daa972a74b96596e7129583c7abb39c 100644 (file)
@@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device)
        priv->switches_dev->id.bustype = BUS_HOST;
 
        if (priv->has_switches) {
-               detect_tablet_mode(&device->dev);
-
                ret = input_register_device(priv->switches_dev);
                if (ret)
                        return ret;
@@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
 
 static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
 {
-       unsigned long long vgbs;
-       acpi_status status;
-
        /* See dual_accel_detect.h for more info */
        if (dual_accel)
                return false;
@@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
        if (!dmi_check_system(dmi_switches_allow_list))
                return false;
 
-       status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
-       return ACPI_SUCCESS(status);
+       return acpi_has_method(handle, "VGBS");
 }
 
 static int intel_vbtn_probe(struct platform_device *device)
@@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device)
                if (ACPI_FAILURE(status))
                        dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
        }
+       // Check switches after buttons since VBDL may have side effects.
+       if (has_switches)
+               detect_tablet_mode(&device->dev);
 
        device_init_wakeup(&device->dev, true);
        /*
index ad3c39e9e9f586d301abd572c83e76d554a5c382..e714ee6298dda8a66637aa918e33c861508ce15e 100644 (file)
@@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device)
                default:
                        year = 2019;
                }
-       pr_info("product: %s  year: %d\n", product, year);
+       pr_info("product: %s  year: %d\n", product ?: "unknown", year);
 
        if (year >= 2019)
                battery_limit_use_wmbb = 1;
index 291f14ef67024a35befa2ab2418e69b8c94c8302..77244c9aa60d233dd35316d764158ab6dcc378ae 100644 (file)
@@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = {
        { KE_KEY, 0xb32, { KEY_NEXTSONG } },
        { KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
        { KE_KEY, 0xb5a, { KEY_MEDIA } },
+       { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
        { KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
        { KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
        { KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
@@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
                                        (dev->kbd_mode == SCI_KBD_MODE_ON) ?
                                        LED_FULL : LED_OFF);
                break;
+       case 0x8e: /* Power button pressed */
+               break;
        case 0x85: /* Unknown */
        case 0x8d: /* Unknown */
-       case 0x8e: /* Unknown */
        case 0x94: /* Unknown */
        case 0x95: /* Unknown */
        default:
index 043736972cb9216c59a7cb3bc6682e056cdb2373..c8425493b95d855a7562406501b7c803ef481b22 100644 (file)
@@ -172,7 +172,6 @@ struct pwm_chip *dwc_pwm_alloc(struct device *dev)
        dwc->clk_ns = 10;
        chip->ops = &dwc_pwm_ops;
 
-       dev_set_drvdata(dev, chip);
        return chip;
 }
 EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
index 676eaf8d7a53f76672527c1871a306cbcdb9b7ba..fb3eadf6fbc464773b17c30235c51f5a4ff6917f 100644 (file)
@@ -31,26 +31,34 @@ static const struct dwc_pwm_info ehl_pwm_info = {
        .size = 0x1000,
 };
 
-static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset)
+static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx)
 {
        struct pwm_chip *chip;
        struct dwc_pwm *dwc;
+       int ret;
 
        chip = dwc_pwm_alloc(dev);
        if (IS_ERR(chip))
                return PTR_ERR(chip);
 
        dwc = to_dwc_pwm(chip);
-       dwc->base = base + offset;
+       dwc->base = ddata->io_base + (ddata->info->size * idx);
 
-       return devm_pwmchip_add(dev, chip);
+       ret = devm_pwmchip_add(dev, chip);
+       if (ret)
+               return ret;
+
+       ddata->chips[idx] = chip;
+       return 0;
 }
 
 static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
        const struct dwc_pwm_info *info;
        struct device *dev = &pci->dev;
-       int i, ret;
+       struct dwc_pwm_drvdata *ddata;
+       unsigned int idx;
+       int ret;
 
        ret = pcim_enable_device(pci);
        if (ret)
@@ -63,17 +71,25 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
                return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n");
 
        info = (const struct dwc_pwm_info *)id->driver_data;
-
-       for (i = 0; i < info->nr; i++) {
-               /*
-                * No need to check for pcim_iomap_table() failure,
-                * pcim_iomap_regions() already does it for us.
-                */
-               ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size);
+       ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL);
+       if (!ddata)
+               return -ENOMEM;
+
+       /*
+        * No need to check for pcim_iomap_table() failure,
+        * pcim_iomap_regions() already does it for us.
+        */
+       ddata->io_base = pcim_iomap_table(pci)[0];
+       ddata->info = info;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               ret = dwc_pwm_init_one(dev, ddata, idx);
                if (ret)
                        return ret;
        }
 
+       dev_set_drvdata(dev, ddata);
+
        pm_runtime_put(dev);
        pm_runtime_allow(dev);
 
@@ -88,19 +104,24 @@ static void dwc_pwm_remove(struct pci_dev *pci)
 
 static int dwc_pwm_suspend(struct device *dev)
 {
-       struct pwm_chip *chip = dev_get_drvdata(dev);
-       struct dwc_pwm *dwc = to_dwc_pwm(chip);
-       int i;
-
-       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
-               if (chip->pwms[i].state.enabled) {
-                       dev_err(dev, "PWM %u in use by consumer (%s)\n",
-                               i, chip->pwms[i].label);
-                       return -EBUSY;
+       struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+       unsigned int idx;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               struct pwm_chip *chip = ddata->chips[idx];
+               struct dwc_pwm *dwc = to_dwc_pwm(chip);
+               unsigned int i;
+
+               for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+                       if (chip->pwms[i].state.enabled) {
+                               dev_err(dev, "PWM %u in use by consumer (%s)\n",
+                                       i, chip->pwms[i].label);
+                               return -EBUSY;
+                       }
+                       dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+                       dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+                       dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
                }
-               dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
-               dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
-               dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
        }
 
        return 0;
@@ -108,14 +129,19 @@ static int dwc_pwm_suspend(struct device *dev)
 
 static int dwc_pwm_resume(struct device *dev)
 {
-       struct pwm_chip *chip = dev_get_drvdata(dev);
-       struct dwc_pwm *dwc = to_dwc_pwm(chip);
-       int i;
-
-       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
-               dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
-               dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
-               dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+       struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+       unsigned int idx;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               struct pwm_chip *chip = ddata->chips[idx];
+               struct dwc_pwm *dwc = to_dwc_pwm(chip);
+               unsigned int i;
+
+               for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+                       dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+                       dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+                       dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+               }
        }
 
        return 0;
index a8b074841ae8054a5a3737127442a1d0e9979e02..c6e2df5a61227131c50fc3c6351326217371c3a3 100644 (file)
@@ -38,6 +38,12 @@ struct dwc_pwm_info {
        unsigned int size;
 };
 
+struct dwc_pwm_drvdata {
+       const struct dwc_pwm_info *info;
+       void __iomem *io_base;
+       struct pwm_chip *chips[];
+};
+
 struct dwc_pwm_ctx {
        u32 cnt;
        u32 cnt2;
index f95d12345d98a6dbfd1efa850829586852d409ab..920f550bc313bf6c036ed11c539d4319852cb2dd 100644 (file)
@@ -363,10 +363,8 @@ int ccw_device_set_online(struct ccw_device *cdev)
 
        spin_lock_irq(cdev->ccwlock);
        ret = ccw_device_online(cdev);
-       spin_unlock_irq(cdev->ccwlock);
-       if (ret == 0)
-               wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
-       else {
+       if (ret) {
+               spin_unlock_irq(cdev->ccwlock);
                CIO_MSG_EVENT(0, "ccw_device_online returned %d, "
                              "device 0.%x.%04x\n",
                              ret, cdev->private->dev_id.ssid,
@@ -375,7 +373,12 @@ int ccw_device_set_online(struct ccw_device *cdev)
                put_device(&cdev->dev);
                return ret;
        }
-       spin_lock_irq(cdev->ccwlock);
+       /* Wait until a final state is reached */
+       while (!dev_fsm_final_state(cdev)) {
+               spin_unlock_irq(cdev->ccwlock);
+               wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+               spin_lock_irq(cdev->ccwlock);
+       }
        /* Check if online processing was successful */
        if ((cdev->private->state != DEV_STATE_ONLINE) &&
            (cdev->private->state != DEV_STATE_W4SENSE)) {
index 65d8b2cfd6262d53a03bf10295d5d6c59f518c5d..42791fa0b80e26d4246dabc18e82f2130edb9bb9 100644 (file)
@@ -504,6 +504,11 @@ callback:
                ccw_device_done(cdev, DEV_STATE_ONLINE);
                /* Deliver fake irb to device driver, if needed. */
                if (cdev->private->flags.fake_irb) {
+                       CIO_MSG_EVENT(2, "fakeirb: deliver device 0.%x.%04x intparm %lx type=%d\n",
+                                     cdev->private->dev_id.ssid,
+                                     cdev->private->dev_id.devno,
+                                     cdev->private->intparm,
+                                     cdev->private->flags.fake_irb);
                        create_fake_irb(&cdev->private->dma_area->irb,
                                        cdev->private->flags.fake_irb);
                        cdev->private->flags.fake_irb = 0;
index 40c97f8730751f2afa862ace30b16f1ade38ea80..acd6790dba4dd1311d78e05afa0f41ad5b38aa3a 100644 (file)
@@ -208,6 +208,10 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
                if (!cdev->private->flags.fake_irb) {
                        cdev->private->flags.fake_irb = FAKE_CMD_IRB;
                        cdev->private->intparm = intparm;
+                       CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+                                     cdev->private->dev_id.ssid,
+                                     cdev->private->dev_id.devno, intparm,
+                                     cdev->private->flags.fake_irb);
                        return 0;
                } else
                        /* There's already a fake I/O around. */
@@ -551,6 +555,10 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
                if (!cdev->private->flags.fake_irb) {
                        cdev->private->flags.fake_irb = FAKE_TM_IRB;
                        cdev->private->intparm = intparm;
+                       CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+                                     cdev->private->dev_id.ssid,
+                                     cdev->private->dev_id.devno, intparm,
+                                     cdev->private->flags.fake_irb);
                        return 0;
                } else
                        /* There's already a fake I/O around. */
index 3d9f0834c78bf1e6a55c17a21218b29bacbd1dce..a1cb39f4b7a27939dcf8ac247a30ed965957bd5e 100644 (file)
@@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
        lgr_info_log();
 }
 
-static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
-                                     int dstat)
+static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
+                                    int dstat, int dcc)
 {
        DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
 
@@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
                goto error;
        if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END))
                goto error;
+       if (dcc == 1)
+               return -EAGAIN;
        if (!(dstat & DEV_STAT_DEV_END))
                goto error;
        qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
-       return;
+       return 0;
 
 error:
        DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no);
        DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
        qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+       return -EIO;
 }
 
 /* qdio interrupt handler */
@@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 {
        struct qdio_irq *irq_ptr = cdev->private->qdio_data;
        struct subchannel_id schid;
-       int cstat, dstat;
+       int cstat, dstat, rc, dcc;
 
        if (!intparm || !irq_ptr) {
                ccw_device_get_schid(cdev, &schid);
@@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
        qdio_irq_check_sense(irq_ptr, irb);
        cstat = irb->scsw.cmd.cstat;
        dstat = irb->scsw.cmd.dstat;
+       dcc   = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0;
+       rc    = 0;
 
        switch (irq_ptr->state) {
        case QDIO_IRQ_STATE_INACTIVE:
-               qdio_establish_handle_irq(irq_ptr, cstat, dstat);
+               rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc);
                break;
        case QDIO_IRQ_STATE_CLEANUP:
                qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
@@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
                if (cstat || dstat)
                        qdio_handle_activate_check(irq_ptr, intparm, cstat,
                                                   dstat);
+               else if (dcc == 1)
+                       rc = -EAGAIN;
                break;
        case QDIO_IRQ_STATE_STOPPED:
                break;
        default:
                WARN_ON_ONCE(1);
        }
+
+       if (rc == -EAGAIN) {
+               DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry");
+               rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0);
+               if (!rc)
+                       return;
+               DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no);
+               DBF_ERROR("rc:%4x", rc);
+               qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+       }
+
        wake_up(&cdev->private->wait_q);
 }
 
index 2c8e964425dc38ca80fa5009b17b4e9dc29bbf10..43778b088ffac54c4a8911f3e41e187f0ae3f364 100644 (file)
@@ -292,13 +292,16 @@ out:
 static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
        clear_bit(dmb->sba_idx, ism->sba_bitmap);
-       dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
-                         dmb->cpu_addr, dmb->dma_addr);
+       dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+                      DMA_FROM_DEVICE);
+       folio_put(virt_to_folio(dmb->cpu_addr));
 }
 
 static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
+       struct folio *folio;
        unsigned long bit;
+       int rc;
 
        if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
                return -EINVAL;
@@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
            test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
                return -EINVAL;
 
-       dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
-                                          &dmb->dma_addr,
-                                          GFP_KERNEL | __GFP_NOWARN |
-                                          __GFP_NOMEMALLOC | __GFP_NORETRY);
-       if (!dmb->cpu_addr)
-               clear_bit(dmb->sba_idx, ism->sba_bitmap);
+       folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
+                           __GFP_NORETRY, get_order(dmb->dmb_len));
 
-       return dmb->cpu_addr ? 0 : -ENOMEM;
+       if (!folio) {
+               rc = -ENOMEM;
+               goto out_bit;
+       }
+
+       dmb->cpu_addr = folio_address(folio);
+       dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+                                    virt_to_page(dmb->cpu_addr), 0,
+                                    dmb->dmb_len, DMA_FROM_DEVICE);
+       if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+               rc = -ENOMEM;
+               goto out_free;
+       }
+
+       return 0;
+
+out_free:
+       kfree(dmb->cpu_addr);
+out_bit:
+       clear_bit(dmb->sba_idx, ism->sba_bitmap);
+       return rc;
 }
 
 int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
index 097dfe4b620dce85736b8a0d5cf7f4b3c4842e9b..35f8e00850d6cb3e45063c2229c4f7532a9eae40 100644 (file)
@@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
        if (dev_is_sata(device)) {
                struct ata_link *link = &device->sata_dev.ap->link;
 
-               rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+               rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
                                          smp_ata_check_ready_type);
        } else {
                msleep(2000);
index 7d2a33514538c2cd8083733d8303f4dc5934de7d..34f96cc35342bcb4ad2e4208d69b19073e6a9bb2 100644 (file)
@@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
        case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
                if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
                    (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
-                       ts->stat = SAS_PROTO_RESPONSE;
+                       if (task->ata_task.use_ncq) {
+                               struct domain_device *device = task->dev;
+                               struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+                               sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+                               slot->abort = 1;
+                       } else {
+                               ts->stat = SAS_PROTO_RESPONSE;
+                       }
                } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
                        ts->residual = trans_tx_fail_type;
                        ts->stat = SAS_DATA_UNDERRUN;
index 26e6b3e3af4317ca088941bc5bab37c15aebfd32..dcde55c8ee5deadd421b108087605c5c822c3b4c 100644 (file)
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
 
                list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
                        if (fcport->edif.enable) {
-                               if (pcnt > app_req.num_ports)
+                               if (pcnt >= app_req.num_ports)
                                        break;
 
                                app_reply->elem[pcnt].rekey_count =
index 2e28e2360c85740d0b3ebb391785ee111c78d47b..5b3230ef51fe61bce58ba1cc83bff7cb0a6ddbc1 100644 (file)
@@ -635,10 +635,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
        if (blk_queue_add_random(q))
                add_disk_randomness(req->q->disk);
 
-       if (!blk_rq_is_passthrough(req)) {
-               WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
-               cmd->flags &= ~SCMD_INITIALIZED;
-       }
+       WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
+                    !(cmd->flags & SCMD_INITIALIZED));
+       cmd->flags = 0;
 
        /*
         * Calling rcu_barrier() is not necessary here because the
index 386981c6976a53d668632457a47fcf1db609f5fd..baf870a03ecf6c6516f90e599188c659dc986bae 100644 (file)
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
        int dev = iminor(inode);
        int flags = filp->f_flags;
        struct request_queue *q;
+       struct scsi_device *device;
        Sg_device *sdp;
        Sg_fd *sfp;
        int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)
 
        /* This driver's module count bumped by fops_get in <linux/fs.h> */
        /* Prevent the device driver from vanishing while we sleep */
-       retval = scsi_device_get(sdp->device);
+       device = sdp->device;
+       retval = scsi_device_get(device);
        if (retval)
                goto sg_put;
 
-       retval = scsi_autopm_get_device(sdp->device);
+       retval = scsi_autopm_get_device(device);
        if (retval)
                goto sdp_put;
 
@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
         * check if O_NONBLOCK. Permits SCSI commands to be issued
         * during error recovery. Tread carefully. */
        if (!((flags & O_NONBLOCK) ||
-             scsi_block_when_processing_errors(sdp->device))) {
+             scsi_block_when_processing_errors(device))) {
                retval = -ENXIO;
                /* we are in error recovery for this device */
                goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)
 
        if (sdp->open_cnt < 1) {  /* no existing opens */
                sdp->sgdebug = 0;
-               q = sdp->device->request_queue;
+               q = device->request_queue;
                sdp->sg_tablesize = queue_max_segments(q);
        }
        sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ out_undo:
 error_mutex_locked:
        mutex_unlock(&sdp->open_rel_lock);
 error_out:
-       scsi_autopm_put_device(sdp->device);
+       scsi_autopm_put_device(device);
 sdp_put:
-       scsi_device_put(sdp->device);
-       goto sg_put;
+       kref_put(&sdp->d_ref, sg_device_destroy);
+       scsi_device_put(device);
+       return retval;
 }
 
 /* Release resources associated with a successful sg_open()
@@ -2233,7 +2236,6 @@ sg_remove_sfp_usercontext(struct work_struct *work)
                        "sg_remove_sfp: sfp=0x%p\n", sfp));
        kfree(sfp);
 
-       WARN_ON_ONCE(kref_read(&sdp->d_ref) != 1);
        kref_put(&sdp->d_ref, sg_device_destroy);
        scsi_device_put(device);
        module_put(THIS_MODULE);
index c1fbcdd1618264f0cd09f5e4078ac600ad6dc22a..c40217f44b1bc53d149e8d5ea12c0e5297373800 100644 (file)
@@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void)
 {
        struct configfs_subsystem *subsys = &target_core_fabrics;
        struct t10_alua_lu_gp *lu_gp;
+       struct cred *kern_cred;
+       const struct cred *old_cred;
        int ret;
 
        pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
@@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void)
        if (ret < 0)
                goto out;
 
+       /* We use the kernel credentials to access the target directory */
+       kern_cred = prepare_kernel_cred(&init_task);
+       if (!kern_cred) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       old_cred = override_creds(kern_cred);
        target_init_dbroot();
+       revert_creds(old_cred);
+       put_cred(kern_cred);
 
        return 0;
 
 out:
+       target_xcopy_release_pt();
        configfs_unregister_subsystem(subsys);
        core_dev_release_virtual_lun0();
        rd_module_exit();
index c617e8b9f0ddfe18bcb34155e156af47e4006837..d78d54ae2605e8ab3050dd7a1e68fb13688a78c5 100644 (file)
@@ -616,6 +616,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
        tze->trip_stats[trip_id].timestamp = now;
        tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature);
        tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature);
+       tze->trip_stats[trip_id].count++;
        tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg +
                (temperature - tze->trip_stats[trip_id].avg) /
                tze->trip_stats[trip_id].count;
index 06859e17b67b7777a08d7e5e33b1bfb972cdd6c3..7a00004bfd0361799f1a43be4cb8e9c35e414d9e 100644 (file)
@@ -47,7 +47,7 @@ enum {
        TSTBUS_MAX,
 };
 
-#define QCOM_UFS_MAX_GEAR 4
+#define QCOM_UFS_MAX_GEAR 5
 #define QCOM_UFS_MAX_LANE 2
 
 enum {
@@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table {
        [MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844,            1000 },
        [MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688,            1000 },
        [MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376,            1000 },
+       [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752,           1000 },
        [MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844,            1000 },
        [MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688,            1000 },
        [MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376,            1000 },
        [MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752,           1000 },
+       [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504,           1000 },
        [MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796,         1000 },
        [MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591,         1000 },
        [MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582,        102400 },
        [MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200,        204800 },
+       [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800,        409600 },
        [MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591,         1000 },
        [MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181,         1000 },
        [MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582,        204800 },
        [MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200,        409600 },
+       [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800,        819200 },
        [MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422,         1000 },
        [MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189,         1000 },
        [MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582,        102400 },
        [MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200,        204800 },
+       [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800,        409600 },
        [MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189,         1000 },
        [MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378,         1000 },
        [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582,        204800 },
        [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200,        409600 },
+       [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800,        819200 },
        [MODE_MAX][0][0]                    = { 7643136,        307200 },
 };
 
index 20d9762331bd767aa88c7b04d3f4c2e84ff72648..6be3462b109ff29c0d5448a7d3b8f31e068f6adb 100644 (file)
@@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
        if (pdata->send_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
-               vfree(pdata->send_buf);
+               if (!pdata->send_gpadl.decrypted)
+                       vfree(pdata->send_buf);
        }
 
        if (pdata->recv_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
-               vfree(pdata->recv_buf);
+               if (!pdata->recv_gpadl.decrypted)
+                       vfree(pdata->recv_buf);
        }
 }
 
@@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev,
        ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
                                    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
        if (ret) {
-               vfree(pdata->recv_buf);
+               if (!pdata->recv_gpadl.decrypted)
+                       vfree(pdata->recv_buf);
                goto fail_close;
        }
 
@@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev,
        ret = vmbus_establish_gpadl(channel, pdata->send_buf,
                                    SEND_BUFFER_SIZE, &pdata->send_gpadl);
        if (ret) {
-               vfree(pdata->send_buf);
+               if (!pdata->send_gpadl.decrypted)
+                       vfree(pdata->send_buf);
                goto fail_close;
        }
 
index 045f666b4f12a2a6416c93dafc2189af03662668..8995730ce0bfc82d193bd7128e51817fba43de76 100644 (file)
@@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
                if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
-                       vq_err(vq, "Guest moved used index from %u to %u",
+                       vq_err(vq, "Guest moved avail index from %u to %u",
                                last_avail_idx, vq->avail_idx);
                        return -EFAULT;
                }
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
        r = vhost_get_avail_idx(vq, &avail_idx);
        if (unlikely(r))
                return false;
+
        vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+       if (vq->avail_idx != vq->last_avail_idx) {
+               /* Since we have updated avail_idx, the following
+                * call to vhost_get_vq_desc() will read available
+                * ring entries. Make sure that read happens after
+                * the avail_idx read.
+                */
+               smp_rmb();
+               return false;
+       }
 
-       return vq->avail_idx == vq->last_avail_idx;
+       return true;
 }
 EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
 
@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
                       &vq->avail->idx, r);
                return false;
        }
+
        vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+       if (vq->avail_idx != vq->last_avail_idx) {
+               /* Since we have updated avail_idx, the following
+                * call to vhost_get_vq_desc() will read available
+                * ring entries. Make sure that read happens after
+                * the avail_idx read.
+                */
+               smp_rmb();
+               return true;
+       }
 
-       return vq->avail_idx != vq->last_avail_idx;
+       return false;
 }
 EXPORT_SYMBOL_GPL(vhost_enable_notify);
 
index b67a28da47026d0299b8a1f8c22a40fc36b1c4a2..a1c467a0e9f719665fc02fa559d5c94545e5725f 100644 (file)
@@ -68,7 +68,6 @@ out:
 static void vmgenid_notify(struct acpi_device *device, u32 event)
 {
        struct vmgenid_state *state = acpi_driver_data(device);
-       char *envp[] = { "NEW_VMGENID=1", NULL };
        u8 old_id[VMGENID_SIZE];
 
        memcpy(old_id, state->this_id, sizeof(old_id));
@@ -76,7 +75,6 @@ static void vmgenid_notify(struct acpi_device *device, u32 event)
        if (!memcmp(old_id, state->this_id, sizeof(old_id)))
                return;
        add_vmfork_randomness(state->this_id, sizeof(state->this_id));
-       kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp);
 }
 
 static const struct acpi_device_id vmgenid_ids[] = {
index f173587893cb34cadbfb4c6e548c158522c7749d..9510c551dce864d1e7df97f47a0c24fbcb8b8478 100644 (file)
@@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = {
        .remove = virtio_dev_remove,
 };
 
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
 {
        /* Catch this early. */
        BUG_ON(driver->feature_table_size && !driver->feature_table);
        driver->driver.bus = &virtio_bus;
+       driver->driver.owner = owner;
+
        return driver_register(&driver->driver);
 }
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
 
 void unregister_virtio_driver(struct virtio_driver *driver)
 {
index 3640f417cce118b06e43ae4c8b38bb275b0097fc..5c180fdc3efbdf09791c7941f3f1522cd6d6f9dc 100644 (file)
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
        struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
        struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter = { NULL };
-       struct bkey_s_c_xattr xattr;
        struct posix_acl *acl = NULL;
        struct bkey_s_c k;
        int ret;
@@ -290,28 +289,27 @@ retry:
 
        ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
                        &hash, inode_inum(inode), &search, 0);
-       if (ret) {
-               if (!bch2_err_matches(ret, ENOENT))
-                       acl = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               goto err;
 
        k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
-       if (ret) {
-               acl = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               goto err;
 
-       xattr = bkey_s_c_to_xattr(k);
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
        acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
-                       le16_to_cpu(xattr.v->x_val_len));
+                                le16_to_cpu(xattr.v->x_val_len));
+       ret = PTR_ERR_OR_ZERO(acl);
+err:
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               goto retry;
 
-       if (!IS_ERR(acl))
+       if (ret)
+               acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+       if (!IS_ERR_OR_NULL(acl))
                set_cached_acl(&inode->v, type, acl);
-out:
-       if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
-               goto retry;
 
        bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
index 114328acde7202ed201fc8e776ed9cd73176d765..fadb1078903d291ce6ce3c7928d79c71c1eb18f8 100644 (file)
@@ -49,13 +49,15 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
        if (!bch2_dev_exists2(c, bp.k->p.inode))
                return 0;
 
+       struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
        struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
        int ret = 0;
 
-       bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+       bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+                        !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
                         c, err,
-                        backpointer_pos_wrong,
-                        "backpointer at wrong pos");
+                        backpointer_bucket_offset_wrong,
+                        "backpointer bucket_offset wrong");
 fsck_err:
        return ret;
 }
index da012ca7daee5501fe04be48bc875c918abbb33a..85949b9fd880ce2fcce508ba4018350a5dfac9ca 100644 (file)
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
                                           u64 bucket_offset)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
-       struct bpos ret;
-
-       ret = POS(bucket.inode,
-                 (bucket_to_sector(ca, bucket.offset) <<
-                  MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+       struct bpos ret = POS(bucket.inode,
+                             (bucket_to_sector(ca, bucket.offset) <<
+                              MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
 
        EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
        return ret;
 }
 
index a31a5f706929eb2006e4867a38123b9526639cee..91c3c1fef233d118fb083dae3a60a5e779e3cdaf 100644 (file)
@@ -709,6 +709,8 @@ struct btree_trans_buf {
        x(stripe_delete)                                                \
        x(reflink)                                                      \
        x(fallocate)                                                    \
+       x(fsync)                                                        \
+       x(dio_write)                                                    \
        x(discard)                                                      \
        x(discard_fast)                                                 \
        x(invalidate)                                                   \
index 63102992d9556d1b33b445a3116df61964a6ca01..085987435a5ea3cfc7354db7ee5392ce62241f05 100644 (file)
@@ -578,7 +578,8 @@ struct bch_member {
        __le64                  nbuckets;       /* device size */
        __le16                  first_bucket;   /* index of first bucket used */
        __le16                  bucket_size;    /* sectors */
-       __le32                  pad;
+       __u8                    btree_bitmap_shift;
+       __u8                    pad[3];
        __le64                  last_mount;     /* time_t */
 
        __le64                  flags;
@@ -587,6 +588,7 @@ struct bch_member {
        __le64                  errors_at_reset[BCH_MEMBER_ERROR_NR];
        __le64                  errors_reset_time;
        __le64                  seq;
+       __le64                  btree_allocated_bitmap;
 };
 
 #define BCH_MEMBER_V1_BYTES    56
@@ -876,7 +878,8 @@ struct bch_sb_field_downgrade {
        x(rebalance_work,               BCH_VERSION(1,  3))             \
        x(member_seq,                   BCH_VERSION(1,  4))             \
        x(subvolume_fs_parent,          BCH_VERSION(1,  5))             \
-       x(btree_subvolume_children,     BCH_VERSION(1,  6))
+       x(btree_subvolume_children,     BCH_VERSION(1,  6))             \
+       x(mi_btree_bitmap,              BCH_VERSION(1,  7))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -1314,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
        x(write_buffer_keys,    11)             \
        x(datetime,             12)
 
-enum {
+enum bch_jset_entry_type {
 #define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
        BCH_JSET_ENTRY_TYPES()
 #undef x
@@ -1360,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
        x(inodes,               1)              \
        x(key_version,          2)
 
-enum {
+enum bch_fs_usage_type {
 #define x(f, nr)       BCH_FS_USAGE_##f        = nr,
        BCH_FS_USAGE_TYPES()
 #undef x
@@ -1535,6 +1538,20 @@ enum btree_id {
        BTREE_ID_NR
 };
 
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+       switch (id) {
+       case BTREE_ID_alloc:
+       case BTREE_ID_backpointers:
+       case BTREE_ID_need_discard:
+       case BTREE_ID_freespace:
+       case BTREE_ID_bucket_gens:
+               return true;
+       default:
+               return false;
+       }
+}
+
 #define BTREE_MAX_DEPTH                4U
 
 /* Btree nodes */
index cf23ff47bed8be588593a7fb193ee21ca8298c65..3a45d128f608db86d060d43573219d60762e3038 100644 (file)
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
        return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
 }
 
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+                                   const struct bkey_packed *k)
+{
+       return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
 static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
                                       const struct bkey_packed *k)
 {
index 5e52684764eb14de4d8433abd5954a829648440b..db336a43fc083a79615e81ce9da37ff4877005f9 100644 (file)
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
        if (type >= BKEY_TYPE_NR)
                return 0;
 
-       bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+       bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+                         (flags & BKEY_INVALID_COMMIT)) &&
                         !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
                         bkey_invalid_type_for_btree,
                         "invalid key type for btree %s (%s)",
-                        bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+                        bch2_btree_node_type_str(type),
+                        k.k->type < KEY_TYPE_MAX
+                        ? bch2_bkey_types[k.k->type]
+                        : "(unknown)");
 
        if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
                bkey_fsck_err_on(k.k->size == 0, c, err,
index 84474324dba9b508141f0e886bafbd8a95d47537..02c70e813face0ce975f1f700e55a34743d286ea 100644 (file)
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
-       u32 seq;
 
-       BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+       if (unlikely(level >= BTREE_MAX_DEPTH)) {
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+                                                level, BTREE_MAX_DEPTH);
+               return ERR_PTR(ret);
+       }
+
+       if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+               printbuf_exit(&buf);
+               return ERR_PTR(ret);
+       }
+
+       if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+               printbuf_exit(&buf);
+               return ERR_PTR(ret);
+       }
+
        /*
         * Parent node must be locked, else we could read in a btree node that's
         * been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        }
 
        set_btree_node_read_in_flight(b);
-
        six_unlock_write(&b->c.lock);
-       seq = six_lock_seq(&b->c.lock);
-       six_unlock_intent(&b->c.lock);
 
-       /* Unlock before doing IO: */
-       if (path && sync)
-               bch2_trans_unlock_noassert(trans);
-
-       bch2_btree_node_read(trans, b, sync);
+       if (path) {
+               u32 seq = six_lock_seq(&b->c.lock);
 
-       if (!sync)
-               return NULL;
+               /* Unlock before doing IO: */
+               six_unlock_intent(&b->c.lock);
+               bch2_trans_unlock_noassert(trans);
 
-       if (path) {
-               int ret = bch2_trans_relock(trans) ?:
-                       bch2_btree_path_relock_intent(trans, path);
-               if (ret) {
-                       BUG_ON(!trans->restarted);
-                       return ERR_PTR(ret);
-               }
-       }
+               bch2_btree_node_read(trans, b, sync);
 
-       if (!six_relock_type(&b->c.lock, lock_type, seq)) {
-               BUG_ON(!path);
+               if (!sync)
+                       return NULL;
 
-               trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
-               return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+               if (!six_relock_type(&b->c.lock, lock_type, seq))
+                       b = NULL;
+       } else {
+               bch2_btree_node_read(trans, b, sync);
+               if (lock_type == SIX_LOCK_read)
+                       six_lock_downgrade(&b->c.lock);
        }
 
        return b;
@@ -1112,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
-       struct btree *b;
 
        BUG_ON(path && !btree_node_locked(path, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
-       b = btree_cache_find(bc, k);
+       struct btree *b = btree_cache_find(bc, k);
        if (b)
                return 0;
 
        b = bch2_btree_node_fill(trans, path, k, btree_id,
                                 level, SIX_LOCK_read, false);
-       return PTR_ERR_OR_ZERO(b);
+       if (!IS_ERR_OR_NULL(b))
+               six_unlock_read(&b->c.lock);
+       return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
 }
 
 void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1148,6 +1163,8 @@ wait_on_io:
 
        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+       if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+               goto out;
 
        if (btree_node_dirty(b)) {
                __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1162,7 +1179,7 @@ wait_on_io:
        btree_node_data_free(c, b);
        bch2_btree_node_hash_remove(bc, b);
        mutex_unlock(&bc->lock);
-
+out:
        six_unlock_write(&b->c.lock);
        six_unlock_intent(&b->c.lock);
 }
index 6280da1244b55032beaf60c4e2b29df0ff2c3152..ecbd9598f69fd00e86efbe7537a134d6d4c4db06 100644 (file)
@@ -368,11 +368,16 @@ again:
                                buf.buf)) {
                        bch2_btree_node_evict(trans, cur_k.k);
                        cur = NULL;
-                       ret =   bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
-                               bch2_journal_key_delete(c, b->c.btree_id,
-                                                       b->c.level, cur_k.k->k.p);
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, cur_k.k->k.p);
                        if (ret)
                                break;
+
+                       if (!btree_id_is_alloc(b->c.btree_id)) {
+                               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+                               if (ret)
+                                       break;
+                       }
                        continue;
                }
 
@@ -544,12 +549,12 @@ reconstruct_root:
                                bch2_btree_root_alloc_fake(c, i, 0);
                        } else {
                                bch2_btree_root_alloc_fake(c, i, 1);
+                               bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
                                ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
                                if (ret)
                                        break;
                        }
 
-                       bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
                        reconstructed_root = true;
                }
 
@@ -823,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        struct bch_fs *c = trans->c;
        struct bkey deleted = KEY(0, 0, 0);
        struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
        deleted.p = k->k->p;
@@ -843,11 +849,23 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        if (ret)
                goto err;
 
+       if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+                               c, btree_bitmap_not_marked,
+                               "btree ptr not marked in member info btree allocated bitmap\n  %s",
+                               (bch2_bkey_val_to_text(&buf, c, *k),
+                                buf.buf))) {
+               mutex_lock(&c->sb_lock);
+               bch2_dev_btree_bitmap_mark(c, *k);
+               bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       }
+
        ret = commit_do(trans, NULL, NULL, 0,
                        bch2_key_trigger(trans, btree_id, level, old,
                                         unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
 fsck_err:
 err:
+       printbuf_exit(&buf);
        bch_err_fn(c, ret);
        return ret;
 }
index d7de82ac389354f9a0d5eef0a66c8694f1752b94..9678b2375bedde868e7a168435c9a17fc74eb26a 100644 (file)
@@ -831,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
                (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
 }
 
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
                         struct bset *i, struct bkey_packed *k)
 {
        if (bkey_p_next(k) > vstruct_last(i))
@@ -840,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
        if (k->format > KEY_FORMAT_CURRENT)
                return false;
 
-       if (k->u64s < bkeyp_key_u64s(&b->format, k))
+       if (!bkeyp_u64s_valid(&b->format, k))
                return false;
 
        struct printbuf buf = PRINTBUF;
@@ -884,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                 "invalid bkey format %u", k->format))
                        goto drop_this_key;
 
-               if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+               if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
                                 -BCH_ERR_btree_node_read_err_fixable,
                                 c, NULL, b, i,
                                 btree_node_bkey_bad_u64s,
-                                "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+                                "bad k->u64s %u (min %u max %lu)", k->u64s,
+                                bkeyp_key_u64s(&b->format, k),
+                                U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
                        goto drop_this_key;
 
                if (!write)
@@ -947,13 +949,12 @@ drop_this_key:
                         * do
                         */
 
-                       if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+                       if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
                                for (next_good_key = 1;
                                     next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
                                     next_good_key++)
-                                       if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+                                       if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
                                                goto got_good_key;
-
                        }
 
                        /*
@@ -1339,7 +1340,9 @@ start:
                               rb->start_time);
        bio_put(&rb->bio);
 
-       if (saw_error && !btree_node_read_error(b)) {
+       if (saw_error &&
+           !btree_node_read_error(b) &&
+           c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
                printbuf_reset(&buf);
                bch2_bpos_to_text(&buf, b->key.k.p);
                bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
index 24772538e4cc74ada59851bd7847dd5ece5ea122..1c70836dd7cce4988ef8cf166ee0797fd8f8269e 100644 (file)
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
 {
        struct btree_trans *trans = iter->trans;
 
-       if (!trans->restarted)
-               btree_iter_path(trans, iter)->preserve = false;
+       if (!iter->path || trans->restarted)
+               return;
+
+       struct btree_path *path = btree_iter_path(trans, iter);
+       path->preserve          = false;
+       if (path->ref == 1)
+               path->should_be_locked  = false;
 }
 
 void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
@@ -642,7 +647,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
 
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
-       if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+       if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
                return __bch2_btree_trans_too_many_iters(trans);
 
        return 0;
index 5cbcbfe85235b8de3777ae82b120d4627f99c8d7..1e8cf49a69353198774a0e5b798c2f1f135041fa 100644 (file)
@@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
        return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
 }
 
+static void journal_iter_verify(struct journal_iter *iter)
+{
+       struct journal_keys *keys = iter->keys;
+       size_t gap_size = keys->size - keys->nr;
+
+       BUG_ON(iter->idx >= keys->gap &&
+              iter->idx <  keys->gap + gap_size);
+
+       if (iter->idx < keys->size) {
+               struct journal_key *k = keys->data + iter->idx;
+
+               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
+                         cmp_int(k->level,     iter->level);
+               BUG_ON(cmp < 0);
+       }
+}
+
 static void journal_iters_fix(struct bch_fs *c)
 {
        struct journal_keys *keys = &c->journal_keys;
        /* The key we just inserted is immediately before the gap: */
        size_t gap_end = keys->gap + (keys->size - keys->nr);
-       struct btree_and_journal_iter *iter;
+       struct journal_key *new_key = &keys->data[keys->gap - 1];
+       struct journal_iter *iter;
 
        /*
         * If an iterator points one after the key we just inserted, decrement
@@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c)
         * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
         * handle that:
         */
-       list_for_each_entry(iter, &c->journal_iters, journal.list)
-               if (iter->journal.idx == gap_end)
-                       iter->journal.idx = keys->gap - 1;
+       list_for_each_entry(iter, &c->journal_iters, list) {
+               journal_iter_verify(iter);
+               if (iter->idx           == gap_end &&
+                   new_key->btree_id   == iter->btree_id &&
+                   new_key->level      == iter->level)
+                       iter->idx = keys->gap - 1;
+               journal_iter_verify(iter);
+       }
 }
 
 static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
        if (idx > keys->gap)
                idx -= keys->size - keys->nr;
 
+       size_t old_gap = keys->gap;
+
        if (keys->nr == keys->size) {
+               journal_iters_move_gap(c, old_gap, keys->size);
+               old_gap = keys->size;
+
                struct journal_keys new_keys = {
                        .nr                     = keys->nr,
                        .size                   = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
                keys->gap       = keys->nr;
        }
 
-       journal_iters_move_gap(c, keys->gap, idx);
+       journal_iters_move_gap(c, old_gap, idx);
 
        move_gap(keys, idx);
 
@@ -301,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
 
 static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
 {
-       struct journal_key *k = iter->keys->data + iter->idx;
+       journal_iter_verify(iter);
+
+       while (iter->idx < iter->keys->size) {
+               struct journal_key *k = iter->keys->data + iter->idx;
+
+               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
+                         cmp_int(k->level,     iter->level);
+               if (cmp > 0)
+                       break;
+               BUG_ON(cmp);
 
-       while (k < iter->keys->data + iter->keys->size &&
-              k->btree_id      == iter->btree_id &&
-              k->level         == iter->level) {
                if (!k->overwritten)
                        return bkey_i_to_s_c(k->k);
 
                bch2_journal_iter_advance(iter);
-               k = iter->keys->data + iter->idx;
        }
 
        return bkey_s_c_null;
@@ -330,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
        iter->level     = level;
        iter->keys      = &c->journal_keys;
        iter->idx       = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+       journal_iter_verify(iter);
 }
 
 static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -434,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
        iter->trans = trans;
        iter->b = b;
        iter->node_iter = node_iter;
-       bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
-       INIT_LIST_HEAD(&iter->journal.list);
        iter->pos = b->data->min_key;
        iter->at_end = false;
+       INIT_LIST_HEAD(&iter->journal.list);
+
+       if (trans->journal_replay_not_finished) {
+               bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+               if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+                       list_add(&iter->journal.list, &trans->c->journal_iters);
+       }
 }
 
 /*
@@ -452,9 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
 
        bch2_btree_node_iter_init_from_start(&node_iter, b);
        __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
-       if (trans->journal_replay_not_finished &&
-           !test_bit(BCH_FS_may_go_rw, &trans->c->flags))
-               list_add(&iter->journal.list, &trans->c->journal_iters);
 }
 
 /* sort and dedup all keys in the journal: */
index 581edcb0911bfa39e9ec6242686bd213c47f352c..88a3582a32757e34a28eb37143f9ff78a88a4085 100644 (file)
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
        } else {
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_pcpu);
+               bc->nr_freed_pcpu++;
                mutex_unlock(&bc->lock);
        }
 }
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_pcpu)) {
                        ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_pcpu--;
                }
                mutex_unlock(&bc->lock);
        }
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                commit_flags |= BCH_WATERMARK_reclaim;
 
        if (ck->journal.seq != journal_last_seq(j) ||
-           j->watermark == BCH_WATERMARK_stripe)
+           !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
                commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
 
        ret   = bch2_btree_iter_traverse(&b_iter) ?:
index b9b151e693ed60ecc3dc9147cc34902643cfc7aa..f2caf491957efc2345c082323516e58fe2a35302 100644 (file)
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
                                       struct btree_path *path,
                                       struct btree_bkey_cached_common *b)
 {
-       struct btree_path *linked;
-       unsigned i, iter;
-       int ret;
-
-       /*
-        * XXX BIG FAT NOTICE
-        *
-        * Drop all read locks before taking a write lock:
-        *
-        * This is a hack, because bch2_btree_node_lock_write_nofail() is a
-        * hack - but by dropping read locks first, this should never fail, and
-        * we only use this in code paths where whatever read locks we've
-        * already taken are no longer needed:
-        */
-
-       trans_for_each_path(trans, linked, iter) {
-               if (!linked->nodes_locked)
-                       continue;
-
-               for (i = 0; i < BTREE_MAX_DEPTH; i++)
-                       if (btree_node_read_locked(linked, i)) {
-                               btree_node_unlock(trans, linked, i);
-                               btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
-                       }
-       }
-
-       ret = __btree_node_lock_write(trans, path, b, true);
+       int ret = __btree_node_lock_write(trans, path, b, true);
        BUG_ON(ret);
 }
 
index 3f33be7e5e5c26d9be6d0f1431ee04c3e545b825..866bd278439f8bb72a0b1e31e672953ff9b3f839 100644 (file)
@@ -133,6 +133,19 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
        if (le64_to_cpu(bn->magic) != bset_magic(c))
                return;
 
+       if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+               struct nonce nonce = btree_nonce(&bn->keys, 0);
+               unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+               bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+       }
+
+       if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+               return;
+
+       if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+               return;
+
        rcu_read_lock();
        struct found_btree_node n = {
                .btree_id       = BTREE_NODE_ID(bn),
@@ -192,8 +205,13 @@ static int read_btree_nodes_worker(void *p)
                                last_print = jiffies;
                        }
 
-                       try_read_btree_node(w->f, ca, bio, buf,
-                                           bucket * ca->mi.bucket_size + bucket_offset);
+                       u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+                       if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+                           !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+                               continue;
+
+                       try_read_btree_node(w->f, ca, bio, buf, sector);
                }
 err:
        bio_put(bio);
@@ -213,6 +231,9 @@ static int read_btree_nodes(struct find_btree_nodes *f)
        closure_init_stack(&cl);
 
        for_each_online_member(c, ca) {
+               if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+                       continue;
+
                struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
                struct task_struct *t;
 
@@ -290,7 +311,7 @@ again:
                        found_btree_node_to_text(&buf, c, n);
                        bch_err(c, "%s", buf.buf);
                        printbuf_exit(&buf);
-                       return -1;
+                       return -BCH_ERR_fsck_repair_unimplemented;
                }
        }
 
@@ -436,6 +457,9 @@ bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
 int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
                           unsigned level, struct bpos node_min, struct bpos node_max)
 {
+       if (btree_id_is_alloc(btree))
+               return 0;
+
        struct find_btree_nodes *f = &c->found_btree_nodes;
 
        int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
index aa9da49707404015a558c9c6e9339b733d0c98c3..bbec91e8e6506fa32611b340dc1a3a4a104aeed6 100644 (file)
@@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
        struct bkey_cached *ck = (void *) path->l[0].b;
        unsigned new_u64s;
        struct bkey_i *new_k;
+       unsigned watermark = flags & BCH_WATERMARK_MASK;
 
        EBUG_ON(path->level);
 
-       if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
-           bch2_btree_key_cache_must_wait(c) &&
-           !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+       if (watermark < BCH_WATERMARK_reclaim &&
+           !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+           bch2_btree_key_cache_must_wait(c))
                return -BCH_ERR_btree_insert_need_journal_reclaim;
 
        /*
@@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
 }
 
 static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
-                             struct btree_insert_entry *btree_id_start)
+                             unsigned btree_id_start)
 {
-       struct btree_insert_entry *i;
        bool trans_trigger_run;
        int ret, overwrite;
 
@@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
                do {
                        trans_trigger_run = false;
 
-                       for (i = btree_id_start;
-                            i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+                       for (unsigned i = btree_id_start;
+                            i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
                             i++) {
-                               if (i->btree_id != btree_id)
+                               if (trans->updates[i].btree_id != btree_id)
                                        continue;
 
-                               ret = run_one_trans_trigger(trans, i, overwrite);
+                               ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
                                if (ret < 0)
                                        return ret;
                                if (ret)
@@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
 
 static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
 {
-       struct btree_insert_entry *btree_id_start = trans->updates;
-       unsigned btree_id = 0;
+       unsigned btree_id = 0, btree_id_start = 0;
        int ret = 0;
 
        /*
@@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
                if (btree_id == BTREE_ID_alloc)
                        continue;
 
-               while (btree_id_start < trans->updates + trans->nr_updates &&
-                      btree_id_start->btree_id < btree_id)
+               while (btree_id_start < trans->nr_updates &&
+                      trans->updates[btree_id_start].btree_id < btree_id)
                        btree_id_start++;
 
                ret = run_btree_triggers(trans, btree_id, btree_id_start);
@@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
                        return ret;
        }
 
-       trans_for_each_update(trans, i) {
+       for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+               struct btree_insert_entry *i = trans->updates + idx;
+
                if (i->btree_id > BTREE_ID_alloc)
                        break;
                if (i->btree_id == BTREE_ID_alloc) {
-                       ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+                       ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
                        if (ret)
                                return ret;
                        break;
@@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
        struct bch_fs *c = trans->c;
        int ret = 0, u64s_delta = 0;
 
-       trans_for_each_update(trans, i) {
+       for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+               struct btree_insert_entry *i = trans->updates + idx;
                if (i->cached)
                        continue;
 
index 9404d96c38f3b368726a6603b601b241b5106100..e0c982a4195c764ab8a415b5f7f80cbff88c1935 100644 (file)
@@ -364,7 +364,21 @@ struct btree_insert_entry {
        unsigned long           ip_allocated;
 };
 
+/* Number of btree paths we preallocate, usually enough */
 #define BTREE_ITER_INITIAL             64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT                256
+/* never exceed limit */
 #define BTREE_ITER_MAX                 (1U << 10)
 
 struct btree_trans_commit_hook;
index 32397b99752fd2ec3cfd553724c97c7f217ca56e..6030c396754f6f494c3c137abd313f6bf80c2ffb 100644 (file)
 #include "keylist.h"
 #include "recovery_passes.h"
 #include "replicas.h"
+#include "sb-members.h"
 #include "super-io.h"
 #include "trace.h"
 
 #include <linux/random.h>
 
-const char * const bch2_btree_update_modes[] = {
+static const char * const bch2_btree_update_modes[] = {
 #define x(t) #t,
-       BCH_WATERMARKS()
+       BTREE_UPDATE_MODES()
 #undef x
        NULL
 };
@@ -605,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as,
        bch2_keylist_push(keys);
 }
 
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+       for_each_keylist_key(&as->new_keys, k)
+               if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+                       return false;
+       return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+       struct bch_fs *c = as->c;
+
+       mutex_lock(&c->sb_lock);
+       for_each_keylist_key(&as->new_keys, k)
+               bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+}
+
 /*
  * The transactional part of an interior btree node update, where we journal the
  * update we did to the interior node and update alloc info:
@@ -662,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as)
        if (ret)
                goto err;
 
+       if (!btree_update_new_nodes_marked_sb(as))
+               btree_update_new_nodes_mark_sb(as);
+
        /*
         * Wait for any in flight writes to finish before we free the old nodes
         * on disk:
@@ -704,9 +728,13 @@ static void btree_update_nodes_written(struct btree_update *as)
        bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
                             "%s", bch2_err_str(ret));
 err:
-       if (as->b) {
-
-               b = as->b;
+       /*
+        * We have to be careful because another thread might be getting ready
+        * to free as->b and calling btree_update_reparent() on us - we'll
+        * recheck under btree_update_lock below:
+        */
+       b = READ_ONCE(as->b);
+       if (b) {
                btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
                                                as->btree_id, b->c.level, b->key.k.p);
                struct btree_path *path = trans->paths + path_idx;
@@ -850,15 +878,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
 {
        struct bch_fs *c = as->c;
 
-       mutex_lock(&c->btree_interior_update_lock);
-       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
        BUG_ON(as->mode != BTREE_UPDATE_none);
+       BUG_ON(as->update_level_end < b->c.level);
        BUG_ON(!btree_node_dirty(b));
        BUG_ON(!b->c.level);
 
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
        as->mode        = BTREE_UPDATE_node;
        as->b           = b;
+       as->update_level_end = b->c.level;
 
        set_btree_node_write_blocked(b);
        list_add(&as->write_blocked_list, &b->write_blocked);
@@ -1100,7 +1130,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
 
 static struct btree_update *
 bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
-                       unsigned level, bool split, unsigned flags)
+                       unsigned level_start, bool split, unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct btree_update *as;
@@ -1108,7 +1138,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
                ? BCH_DISK_RESERVATION_NOFAIL : 0;
        unsigned nr_nodes[2] = { 0, 0 };
-       unsigned update_level = level;
+       unsigned level_end = level_start;
        enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
        int ret = 0;
        u32 restart_count = trans->restart_count;
@@ -1123,34 +1153,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (watermark < c->journal.watermark) {
-               struct journal_res res = { 0 };
-               unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+       if (watermark < BCH_WATERMARK_reclaim &&
+           test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+               if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+                       return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
 
-               if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark < BCH_WATERMARK_reclaim)
-                       journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-
-               ret = drop_locks_do(trans,
-                       bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
-               if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
-                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
+               bch2_trans_unlock(trans);
+               wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+               ret = bch2_trans_relock(trans);
                if (ret)
                        return ERR_PTR(ret);
        }
 
        while (1) {
-               nr_nodes[!!update_level] += 1 + split;
-               update_level++;
+               nr_nodes[!!level_end] += 1 + split;
+               level_end++;
 
-               ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+               ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
                if (ret)
                        return ERR_PTR(ret);
 
-               if (!btree_path_node(path, update_level)) {
+               if (!btree_path_node(path, level_end)) {
                        /* Allocating new root? */
                        nr_nodes[1] += split;
-                       update_level = BTREE_MAX_DEPTH;
+                       level_end = BTREE_MAX_DEPTH;
                        break;
                }
 
@@ -1158,11 +1184,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                 * Always check for space for two keys, even if we won't have to
                 * split at prior level - it might have been a merge instead:
                 */
-               if (bch2_btree_node_insert_fits(path->l[update_level].b,
+               if (bch2_btree_node_insert_fits(path->l[level_end].b,
                                                BKEY_BTREE_PTR_U64s_MAX * 2))
                        break;
 
-               split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+               split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
        }
 
        if (!down_read_trylock(&c->gc_lock)) {
@@ -1176,14 +1202,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
        memset(as, 0, sizeof(*as));
        closure_init(&as->cl, NULL);
-       as->c           = c;
-       as->start_time  = start_time;
-       as->ip_started  = _RET_IP_;
-       as->mode        = BTREE_UPDATE_none;
-       as->watermark   = watermark;
-       as->took_gc_lock = true;
-       as->btree_id    = path->btree_id;
-       as->update_level = update_level;
+       as->c                   = c;
+       as->start_time          = start_time;
+       as->ip_started          = _RET_IP_;
+       as->mode                = BTREE_UPDATE_none;
+       as->watermark           = watermark;
+       as->took_gc_lock        = true;
+       as->btree_id            = path->btree_id;
+       as->update_level_start  = level_start;
+       as->update_level_end    = level_end;
        INIT_LIST_HEAD(&as->list);
        INIT_LIST_HEAD(&as->unwritten_list);
        INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1277,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
        bch2_recalc_btree_reserve(c);
 }
 
-static void bch2_btree_set_root(struct btree_update *as,
-                               struct btree_trans *trans,
-                               struct btree_path *path,
-                               struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+                              struct btree_trans *trans,
+                              struct btree_path *path,
+                              struct btree *b,
+                              bool nofail)
 {
        struct bch_fs *c = as->c;
-       struct btree *old;
 
        trace_and_count(c, btree_node_set_root, trans, b);
 
-       old = btree_node_root(c, b);
+       struct btree *old = btree_node_root(c, b);
 
        /*
         * Ensure no one is using the old root while we switch to the
         * new root:
         */
-       bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+       if (nofail) {
+               bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+       } else {
+               int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+               if (ret)
+                       return ret;
+       }
 
        bch2_btree_set_root_inmem(c, b);
 
@@ -1307,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as,
         * depend on the new root would have to update the new root.
         */
        bch2_btree_node_unlock_write(trans, path, old);
+       return 0;
 }
 
 /* Interior node updates: */
@@ -1373,12 +1407,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
 }
 
 static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
-                                 struct btree_trans *trans,
-                                 struct btree_path *path,
-                                 struct btree *b,
-                                 struct btree_node_iter node_iter,
-                                 struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+                               struct btree_trans *trans,
+                               struct btree_path *path,
+                               struct btree *b,
+                               struct btree_node_iter node_iter,
+                               struct keylist *keys)
 {
        struct bkey_i *insert = bch2_keylist_front(keys);
        struct bkey_packed *k;
@@ -1534,7 +1568,7 @@ static void btree_split_insert_keys(struct btree_update *as,
 
                bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
 
-               __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+               bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
 
                BUG_ON(bch2_btree_node_check_topology(trans, b));
        }
@@ -1649,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
        if (parent) {
                /* Split a non root node */
                ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
-               if (ret)
-                       goto err;
        } else if (n3) {
-               bch2_btree_set_root(as, trans, trans->paths + path, n3);
+               ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
        } else {
                /* Root filled up but didn't need to be split */
-               bch2_btree_set_root(as, trans, trans->paths + path, n1);
+               ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
        }
 
+       if (ret)
+               goto err;
+
        if (n3) {
                bch2_btree_update_get_open_buckets(as, n3);
                bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
@@ -1714,27 +1749,6 @@ err:
        goto out;
 }
 
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
-                               struct btree_trans *trans,
-                               struct btree_path *path,
-                               struct btree *b,
-                               struct keylist *keys)
-{
-       struct btree_path *linked;
-       unsigned i;
-
-       __bch2_btree_insert_keys_interior(as, trans, path, b,
-                                         path->l[b->c.level].iter, keys);
-
-       btree_update_updated_node(as, b);
-
-       trans_for_each_path_with_node(trans, b, linked, i)
-               bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
-       bch2_trans_verify_paths(trans);
-}
-
 /**
  * bch2_btree_insert_node - insert bkeys into a given btree node
  *
@@ -1755,7 +1769,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
                                  struct keylist *keys)
 {
        struct bch_fs *c = as->c;
-       struct btree_path *path = trans->paths + path_idx;
+       struct btree_path *path = trans->paths + path_idx, *linked;
+       unsigned i;
        int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
@@ -1784,7 +1799,13 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
                return ret;
        }
 
-       bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+       bch2_btree_insert_keys_interior(as, trans, path, b,
+                                       path->l[b->c.level].iter, keys);
+
+       trans_for_each_path_with_node(trans, b, linked, i)
+               bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
+
+       bch2_trans_verify_paths(trans);
 
        live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
        u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1798,6 +1819,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
            bch2_maybe_compact_whiteouts(c, b))
                bch2_trans_node_reinit_iter(trans, b);
 
+       btree_update_updated_node(as, b);
        bch2_btree_node_unlock_write(trans, path, b);
 
        BUG_ON(bch2_btree_node_check_topology(trans, b));
@@ -1807,7 +1829,7 @@ split:
         * We could attempt to avoid the transaction restart, by calling
         * bch2_btree_path_upgrade() and allocating more nodes:
         */
-       if (b->c.level >= as->update_level) {
+       if (b->c.level >= as->update_level_end) {
                trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
        }
@@ -1873,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
        bch2_keylist_add(&as->parent_keys, &b->key);
        btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
 
-       bch2_btree_set_root(as, trans, path, n);
+       int ret = bch2_btree_set_root(as, trans, path, n, true);
+       BUG_ON(ret);
+
        bch2_btree_update_get_open_buckets(as, n);
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
        bch2_trans_node_add(trans, path, n);
@@ -1926,6 +1950,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        BUG_ON(!trans->paths[path].should_be_locked);
        BUG_ON(!btree_node_locked(&trans->paths[path], level));
 
+       /*
+        * Work around a deadlock caused by the btree write buffer not doing
+        * merges and leaving tons of merges for us to do - we really don't need
+        * to be doing merges at all from the interior update path, and if the
+        * interior update path is generating too many new interior updates we
+        * deadlock:
+        */
+       if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+               return 0;
+
+       flags &= ~BCH_WATERMARK_MASK;
+
        b = trans->paths[path].l[level].b;
 
        if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
@@ -2071,6 +2107,10 @@ err:
                bch2_path_put(trans, new_path, true);
        bch2_path_put(trans, sib_path, true);
        bch2_trans_verify_locks(trans);
+       if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+               ret = 0;
+       if (!ret)
+               ret = bch2_trans_relock(trans);
        return ret;
 err_free_update:
        bch2_btree_node_free_never_used(as, trans, n);
@@ -2116,12 +2156,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
                ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
-               if (ret)
-                       goto err;
        } else {
-               bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+               ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
        }
 
+       if (ret)
+               goto err;
+
        bch2_btree_update_get_open_buckets(as, n);
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
@@ -2519,9 +2560,11 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev
 
 static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
 {
-       prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+       prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
                   (void *) as->ip_started,
                   bch2_btree_id_str(as->btree_id),
+                  as->update_level_start,
+                  as->update_level_end,
                   bch2_watermarks[as->watermark],
                   bch2_btree_update_modes[as->mode],
                   as->nodes_written,
index 88dcf5a22a3bd628aaa22065f3cdd70ca3770d90..c1a479ebaad12120813f95a4af50b32cd542023d 100644 (file)
@@ -57,7 +57,8 @@ struct btree_update {
        unsigned                        took_gc_lock:1;
 
        enum btree_id                   btree_id;
-       unsigned                        update_level;
+       unsigned                        update_level_start;
+       unsigned                        update_level_end;
 
        struct disk_reservation         disk_res;
 
index baf63e2fddb64cd8f4c745d0cc80c864c86ffaa6..36a6f42aba5e6fc5a36418c1d7565e07e8f90420 100644 (file)
@@ -316,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                            bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
                                bch2_btree_node_unlock_write(trans, path, path->l[0].b);
                                write_locked = false;
+
+                               ret = lockrestart_do(trans,
+                                       bch2_btree_iter_traverse(&iter) ?:
+                                       bch2_foreground_maybe_merge(trans, iter.path, 0,
+                                                       BCH_WATERMARK_reclaim|
+                                                       BCH_TRANS_COMMIT_journal_reclaim|
+                                                       BCH_TRANS_COMMIT_no_check_rw|
+                                                       BCH_TRANS_COMMIT_no_enospc));
+                               if (ret)
+                                       goto err;
                        }
                }
 
@@ -382,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
 
                        ret = commit_do(trans, NULL, NULL,
                                        BCH_WATERMARK_reclaim|
+                                       BCH_TRANS_COMMIT_journal_reclaim|
                                        BCH_TRANS_COMMIT_no_check_rw|
                                        BCH_TRANS_COMMIT_no_enospc|
-                                       BCH_TRANS_COMMIT_no_journal_res|
-                                       BCH_TRANS_COMMIT_journal_reclaim,
+                                       BCH_TRANS_COMMIT_no_journal_res ,
                                        btree_write_buffered_insert(trans, i));
                        if (ret)
                                goto err;
index 00aaf4bb513974a6b9c0353ea9445f92671c32eb..f9af5adabe83638eea7ffd15ea2f730085f81cc1 100644 (file)
@@ -395,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type)
                : "(invalid data type)";
 }
 
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
-       if (type < BCH_DATA_NR)
-               prt_str(out, __bch2_data_types[type]);
-       else
-               prt_printf(out, "(invalid data type %u)", type);
-}
-
 /* disk reservations: */
 
 static inline void bch2_disk_reservation_put(struct bch_fs *c,
index cbfa6459bdbceec6a953f91a385fc5e4fe76691d..72781aad6ba70ccc774b688c6a9d50b2dc21f133 100644 (file)
@@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
 struct fsck_thread {
        struct thread_with_stdio thr;
        struct bch_fs           *c;
-       char                    **devs;
-       size_t                  nr_devs;
        struct bch_opts         opts;
 };
 
 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
 {
        struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
-       if (thr->devs)
-               for (size_t i = 0; i < thr->nr_devs; i++)
-                       kfree(thr->devs[i]);
-       kfree(thr->devs);
        kfree(thr);
 }
 
 static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
 {
        struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
-       struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
-       if (IS_ERR(c))
-               return PTR_ERR(c);
+       struct bch_fs *c = thr->c;
 
-       int ret = 0;
-       if (test_bit(BCH_FS_errors_fixed, &c->flags))
-               ret |= 1;
-       if (test_bit(BCH_FS_error, &c->flags))
-               ret |= 4;
+       int ret = PTR_ERR_OR_ZERO(c);
+       if (ret)
+               return ret;
 
-       bch2_fs_stop(c);
+       ret = bch2_fs_start(thr->c);
+       if (ret)
+               goto err;
 
-       if (ret & 1)
+       if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
                bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
-       if (ret & 4)
+               ret |= 1;
+       }
+       if (test_bit(BCH_FS_error, &c->flags)) {
                bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+               ret |= 4;
+       }
+err:
+       bch2_fs_stop(c);
        return ret;
 }
 
@@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 {
        struct bch_ioctl_fsck_offline arg;
        struct fsck_thread *thr = NULL;
-       u64 *devs = NULL;
+       darray_str(devs) = {};
        long ret = 0;
 
        if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
-           !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
-           !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       for (size_t i = 0; i < arg.nr_devs; i++) {
+               u64 dev_u64;
+               ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+               if (ret)
+                       goto err;
 
-       thr->opts = bch2_opts_empty();
-       thr->nr_devs = arg.nr_devs;
+               char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+               ret = PTR_ERR_OR_ZERO(dev_str);
+               if (ret)
+                       goto err;
 
-       if (copy_from_user(devs, &user_arg->devs[0],
-                          array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
-               ret = -EINVAL;
-               goto err;
+               ret = darray_push(&devs, dev_str);
+               if (ret) {
+                       kfree(dev_str);
+                       goto err;
+               }
        }
 
-       for (size_t i = 0; i < arg.nr_devs; i++) {
-               thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
-               ret = PTR_ERR_OR_ZERO(thr->devs[i]);
-               if (ret)
-                       goto err;
+       thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+       if (!thr) {
+               ret = -ENOMEM;
+               goto err;
        }
 
+       thr->opts = bch2_opts_empty();
+
        if (arg.opts) {
                char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
 
@@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 
        opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
 
+       /* We need request_key() to be called before we punt to kthread: */
+       opt_set(thr->opts, nostart, true);
+
+       thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+       if (!IS_ERR(thr->c) &&
+           thr->c->opts.errors == BCH_ON_ERROR_panic)
+               thr->c->opts.errors = BCH_ON_ERROR_ro;
+
        ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
-       if (ret < 0) {
-               if (thr)
-                       bch2_fsck_thread_exit(&thr->thr);
-               pr_err("ret %s", bch2_err_str(ret));
-       }
-       kfree(devs);
+out:
+       darray_for_each(devs, i)
+               kfree(*i);
+       darray_exit(&devs);
        return ret;
+err:
+       if (thr)
+               bch2_fsck_thread_exit(&thr->thr);
+       pr_err("ret %s", bch2_err_str(ret));
+       goto out;
 }
 
 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
index 4701457f6381ca820e17a12707009c272ed5b4ac..7ed779b411f61e4e3f05a703ce9e091474237939 100644 (file)
@@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
                                extent_nonce(version, crc_old), bio);
 
        if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
-               bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
-                       "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
-                       __func__,
-                       crc_old.csum.hi,
-                       crc_old.csum.lo,
-                       merged.hi,
-                       merged.lo,
-                       bch2_csum_types[crc_old.csum_type],
-                       bch2_csum_types[new_csum_type]);
+               struct printbuf buf = PRINTBUF;
+               prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+                          "expected %0llx:%0llx got %0llx:%0llx (old type ",
+                          __func__,
+                          crc_old.csum.hi,
+                          crc_old.csum.lo,
+                          merged.hi,
+                          merged.lo);
+               bch2_prt_csum_type(&buf, crc_old.csum_type);
+               prt_str(&buf, " new type ");
+               bch2_prt_csum_type(&buf, new_csum_type);
+               prt_str(&buf, ")");
+               bch_err(c, "%s", buf.buf);
+               printbuf_exit(&buf);
                return -EIO;
        }
 
index 1b8c2c1016dc6347ce12ef3161d4723835dfa56e..e40499fde9a4019fc75d62f825e9e5583caf803b 100644 (file)
@@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
                                     struct bch_csum expected,
                                     struct bch_csum got)
 {
-       prt_printf(out, "checksum error: got ");
+       prt_str(out, "checksum error, type ");
+       bch2_prt_csum_type(out, type);
+       prt_str(out, ": got ");
        bch2_csum_to_text(out, type, got);
        prt_str(out, " should be ");
        bch2_csum_to_text(out, type, expected);
-       prt_printf(out, " type %s", bch2_csum_types[type]);
 }
 
 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
index 58c2eb45570ff022764720f9beb10ecfa2926367..607fd5e232c902dbb39f3dac84ea2e214e6b106c 100644 (file)
@@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
        return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
 }
 
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
-       if (type < BCH_COMPRESSION_TYPE_NR)
-               prt_str(out, __bch2_compression_types[type]);
-       else
-               prt_printf(out, "(invalid compression type %u)", type);
-}
-
 int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
                                struct bch_extent_crc_unpacked *);
 int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
index 34731ee0217f62f6e43fb691e76083c46026b127..0022b51ce3c09cc9eafaab2f0639c944078d8c54 100644 (file)
@@ -598,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans,
                i++;
        }
 
+       unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
        /*
         * If current extent durability is less than io_opts.data_replicas,
         * we're not trying to rereplicate the extent up to data_replicas here -
@@ -607,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans,
         * rereplicate, currently, so that users don't get an unexpected -ENOSPC
         */
        if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
-           durability_have >= io_opts.data_replicas) {
+           !durability_required) {
                m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
                m->data_opts.rewrite_ptrs = 0;
                /* if iter == NULL, it's just a promote */
@@ -616,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans,
                goto done;
        }
 
-       m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+       m->op.nr_replicas = min(durability_removing, durability_required) +
                m->data_opts.extra_replicas;
-       m->op.nr_replicas_required = m->op.nr_replicas;
 
-       BUG_ON(!m->op.nr_replicas);
+       /*
+        * If device(s) were set to durability=0 after data was written to them
+        * we can end up with a duribilty=0 extent, and the normal algorithm
+        * that tries not to increase durability doesn't work:
+        */
+       if (!(durability_have + durability_removing))
+               m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+       m->op.nr_replicas_required = m->op.nr_replicas;
 
        if (reserve_sectors) {
                ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
index 208ce6f0fc4317d561582bae51785da2c016a1cd..cd99b739941447f4c54037c8dc87bffd5f5e0d25 100644 (file)
@@ -13,6 +13,7 @@
 #include "btree_iter.h"
 #include "btree_locking.h"
 #include "btree_update.h"
+#include "btree_update_interior.h"
 #include "buckets.h"
 #include "debug.h"
 #include "error.h"
@@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
        i->size = size;
        i->ret  = 0;
 
-       do {
+       while (1) {
                err = flush_buf(i);
                if (err)
                        return err;
@@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
                if (!i->size)
                        break;
 
+               if (done)
+                       break;
+
                done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
                i->iter++;
-       } while (!done);
+       }
 
        if (i->buf.allocation_failure)
                return -ENOMEM;
@@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = {
        .read           = bch2_journal_pins_read,
 };
 
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+                                      size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct bch_fs *c = i->c;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       if (!i->iter) {
+               bch2_btree_updates_to_text(&i->buf, c);
+               i->iter++;
+       }
+
+       err = flush_buf(i);
+       if (err)
+               return err;
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_btree_updates_read,
+};
+
 static int btree_transaction_stats_open(struct inode *inode, struct file *file)
 {
        struct bch_fs *c = inode->i_private;
        struct dump_iter *i;
 
        i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
        if (!i)
                return -ENOMEM;
 
@@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c)
                debugfs_remove_recursive(c->fs_debug_dir);
 }
 
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+       struct dentry *d;
+
+       d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+       debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+       debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+       debugfs_create_file("bfloat-failed", 0400, d, bd,
+                           &bfloat_failed_debug_ops);
+}
+
 void bch2_fs_debug_init(struct bch_fs *c)
 {
        struct btree_debug *bd;
@@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
                            c->btree_debug, &journal_pins_ops);
 
+       debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+                           c->btree_debug, &btree_updates_ops);
+
        debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
                            c, &btree_transaction_stats_op);
 
@@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
             bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
             bd++) {
                bd->id = bd - c->btree_debug;
-               debugfs_create_file(bch2_btree_id_str(bd->id),
-                                   0400, c->btree_debug_dir, bd,
-                                   &btree_debug_ops);
-
-               snprintf(name, sizeof(name), "%s-formats",
-                        bch2_btree_id_str(bd->id));
-
-               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-                                   &btree_format_debug_ops);
-
-               snprintf(name, sizeof(name), "%s-bfloat-failed",
-                        bch2_btree_id_str(bd->id));
-
-               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-                                   &bfloat_failed_debug_ops);
+               bch2_fs_debug_btree_init(c, bd);
        }
 }
 
index 082075244e16aedc824249b239ecec6efb1a07fa..556a217108d32ef35890da0463751afc688186f3 100644 (file)
@@ -131,29 +131,33 @@ fsck_err:
 void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
-       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-       unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+       const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+       struct bch_stripe s = {};
+
+       memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+       unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+       prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+                  s.algorithm,
+                  le16_to_cpu(s.sectors),
+                  nr_data,
+                  s.nr_redundant);
+       bch2_prt_csum_type(out, s.csum_type);
+       prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+       for (unsigned i = 0; i < s.nr_blocks; i++) {
+               const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+               if ((void *) ptr >= bkey_val_end(k))
+                       break;
+
+               bch2_extent_ptr_to_text(out, c, ptr);
 
-       prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-              s->algorithm,
-              le16_to_cpu(s->sectors),
-              nr_data,
-              s->nr_redundant,
-              s->csum_type,
-              1U << s->csum_granularity_bits);
-
-       for (i = 0; i < s->nr_blocks; i++) {
-               const struct bch_extent_ptr *ptr = s->ptrs + i;
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-               u32 offset;
-               u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
-               prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
-               if (i < nr_data)
-                       prt_printf(out, "#%u", stripe_blockcount_get(s, i));
-               prt_printf(out, " gen %u", ptr->gen);
-               if (ptr_stale(ca, ptr))
-                       prt_printf(out, " stale");
+               if (s.csum_type < BCH_CSUM_NR &&
+                   i < nr_data &&
+                   stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+                       prt_printf(out,  "#%u", stripe_blockcount_get(sp, i));
        }
 }
 
@@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
                                struct printbuf err = PRINTBUF;
                                struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
 
-                               prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
-                                          want.hi, want.lo,
-                                          got.hi, got.lo,
-                                          bch2_csum_types[v->csum_type]);
+                               prt_str(&err, "stripe ");
+                               bch2_csum_err_msg(&err, v->csum_type, want, got);
                                prt_printf(&err, "  for %ps at %u of\n  ", (void *) _RET_IP_, i);
                                bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
                                bch_err_ratelimited(ca, "%s", err.buf);
index f4369b02e805f0a24572a8cf87d18867c3d3301a..f042616888b0a1d47d7797e987c912c58d0945b3 100644 (file)
@@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
 static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
                                          unsigned dev, unsigned csum_idx)
 {
+       EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
        unsigned csum_bytes = bch_crc_bytes[s->csum_type];
 
        return sizeof(struct bch_stripe) +
index 0e3ca99fbd2de1522c5e8dea8ac313232f60f7f3..1a331e539204852d4db9e7620df0282abe262f1e 100644 (file)
@@ -998,7 +998,9 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
                        prt_str(out, " cached");
                if (ptr->unwritten)
                        prt_str(out, " unwritten");
-               if (ca && ptr_stale(ca, ptr))
+               if (b >= ca->mi.first_bucket &&
+                   b <  ca->mi.nbuckets &&
+                   ptr_stale(ca, ptr))
                        prt_printf(out, " stale");
        }
 }
@@ -1028,11 +1030,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                        struct bch_extent_crc_unpacked crc =
                                bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-                       prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+                       prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
                               crc.compressed_size,
                               crc.uncompressed_size,
-                              crc.offset, crc.nonce,
-                              bch2_csum_types[crc.csum_type]);
+                              crc.offset, crc.nonce);
+                       bch2_prt_csum_type(out, crc.csum_type);
+                       prt_str(out, " compress ");
                        bch2_prt_compression_type(out, crc.compression_type);
                        break;
                }
index 4ce5e957a6e9162307d98b5b74b02338087f7e1e..0f955c3c76a7bcdce86556e4d09ba0e5cf4e7f9a 100644 (file)
@@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n)
 
 struct wrapper {
        cmp_func_t cmp;
-       swap_func_t swap;
+       swap_func_t swap_func;
 };
 
 /*
@@ -125,7 +125,7 @@ struct wrapper {
 static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
 {
        if (swap_func == SWAP_WRAPPER) {
-               ((const struct wrapper *)priv)->swap(a, b, (int)size);
+               ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
                return;
        }
 
@@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
        int i, c, r;
 
        /* called from 'sort' without swap function, let's pick the default */
-       if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+       if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
                swap_func = NULL;
 
        if (!swap_func) {
@@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 {
        struct wrapper w = {
                .cmp  = cmp_func,
-               .swap = swap_func,
+               .swap_func = swap_func,
        };
 
        return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
index ee0e2df33322d2dccb60e1ed90257863769ead0d..24840aee335c0ffeabd3ad69c79665cc005e28d8 100644 (file)
@@ -242,8 +242,8 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
             (_i) = eytzinger0_next((_i), (_size)))
 
 /* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-                                        cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+                                    cmp_func_t cmp, const void *search)
 {
        unsigned i, n = 0;
 
@@ -256,18 +256,32 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
        } while (n < nr);
 
        if (n & 1) {
-               /* @i was greater than @search, return previous node: */
+               /*
+                * @i was greater than @search, return previous node:
+                *
+                * if @i was leftmost/smallest element,
+                * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+                */
                return eytzinger0_prev(i, nr);
        } else {
                return i;
        }
 }
 
-static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
-                                        cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+                                    cmp_func_t cmp, const void *search)
 {
        ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
-       return eytzinger0_next(idx, size);
+
+       /*
+        * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+        * want to return the first element; next/prev identities mean this work
+        * as expected
+        *
+        * similarly if find_le() returns last element, we should return -1;
+        * identities mean this all works out:
+        */
+       return eytzinger0_next(idx, nr);
 }
 
 #define eytzinger0_find(base, nr, size, _cmp, search)                  \
index f49e6c0f0f6835968202ab2f1fa194933945554a..b889370a5088113a2417787bdbb4b98a16597063 100644 (file)
@@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
        ret = dio->op.error ?: ((long) dio->written << 9);
        bio_put(&dio->op.wbio.bio);
 
+       bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
        /* inode->i_dio_count is our ref on inode and thus bch_fs */
        inode_dio_end(&inode->v);
 
@@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        prefetch(&inode->ei_inode);
        prefetch((void *) &inode->ei_inode + 64);
 
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+               return -EROFS;
+
        inode_lock(&inode->v);
 
        ret = generic_write_checks(req, iter);
        if (unlikely(ret <= 0))
-               goto err;
+               goto err_put_write_ref;
 
        ret = file_remove_privs(file);
        if (unlikely(ret))
-               goto err;
+               goto err_put_write_ref;
 
        ret = file_update_time(file);
        if (unlikely(ret))
-               goto err;
+               goto err_put_write_ref;
 
        if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-               goto err;
+               goto err_put_write_ref;
 
        inode_dio_begin(&inode->v);
        bch2_pagecache_block_get(inode);
@@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        }
 
        ret = bch2_dio_write_loop(dio);
-err:
+out:
        if (locked)
                inode_unlock(&inode->v);
        return ret;
@@ -653,7 +658,9 @@ err_put_bio:
        bch2_pagecache_block_put(inode);
        bio_put(bio);
        inode_dio_end(&inode->v);
-       goto err;
+err_put_write_ref:
+       bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+       goto out;
 }
 
 void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
index 8c70123b6a0c809b6d50040593281c2e9c115828..20b40477425f49449499b11d63930d92e10ed3ba 100644 (file)
@@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
 static int bch2_flush_inode(struct bch_fs *c,
                            struct bch_inode_info *inode)
 {
-       struct bch_inode_unpacked u;
-       int ret;
-
        if (c->opts.journal_flush_disabled)
                return 0;
 
-       ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
-       if (ret)
-               return ret;
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+               return -EROFS;
 
-       return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
-               bch2_inode_flush_nocow_writes(c, inode);
+       struct bch_inode_unpacked u;
+       int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+                 bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+                 bch2_inode_flush_nocow_writes(c, inode);
+       bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+       return ret;
 }
 
 int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
index 725fcf46f6312c267c2a7c05f1eaa6aed5fb83e7..9aa28b52ab926c567f49e0bb68b9c6791fb326e5 100644 (file)
@@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out,
 
        if (entry) {
                prt_str(out, " type=");
-               prt_str(out, bch2_jset_entry_types[entry->type]);
+               bch2_prt_jset_entry_type(out, entry->type);
        }
 
        if (!jset) {
@@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
        jset_entry_for_each_key(entry, k) {
                if (!first) {
                        prt_newline(out);
-                       prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+                       bch2_prt_jset_entry_type(out, entry->type);
+                       prt_str(out, ": ");
                }
                prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
                bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
@@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
        struct jset_entry_usage *u =
                container_of(entry, struct jset_entry_usage, entry);
 
-       prt_printf(out, "type=%s v=%llu",
-              bch2_fs_usage_types[u->entry.btree_id],
-              le64_to_cpu(u->v));
+       prt_str(out, "type=");
+       bch2_prt_fs_usage_type(out, u->entry.btree_id);
+       prt_printf(out, " v=%llu", le64_to_cpu(u->v));
 }
 
 static int journal_entry_data_usage_validate(struct bch_fs *c,
@@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
 void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
                                struct jset_entry *entry)
 {
+       bch2_prt_jset_entry_type(out, entry->type);
+
        if (entry->type < BCH_JSET_ENTRY_NR) {
-               prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+               prt_str(out, ": ");
                bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
-       } else {
-               prt_printf(out, "(unknown type %u)", entry->type);
        }
 }
 
index ab811c0dad26accfb4924eaef4cccb3ab957087c..04a577848b015cd900a1a040ec0565ffb2f69811 100644 (file)
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
            track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
                trace_and_count(c, journal_full, c);
 
+       mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
        swap(watermark, j->watermark);
        if (watermark > j->watermark)
                journal_wake(j);
index 8c053cb64ca5ee25b9a5b2613f2fcd9e03d517d3..b5161b5d76a00874ed9ed88a0969927f2cfc9dbe 100644 (file)
@@ -134,6 +134,7 @@ enum journal_flags {
        JOURNAL_STARTED,
        JOURNAL_MAY_SKIP_FLUSH,
        JOURNAL_NEED_FLUSH_WRITE,
+       JOURNAL_SPACE_LOW,
 };
 
 /* Reasons we may fail to get a journal reservation: */
index e1800c4119b5fbaf8ebbfcdaef996e1dd9c35ca8..bb068fd724656cf8307d14022ca537f918b65747 100644 (file)
@@ -43,7 +43,7 @@ const char * const __bch2_btree_ids[] = {
        NULL
 };
 
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
        BCH_CSUM_TYPES()
        NULL
 };
@@ -53,7 +53,7 @@ const char * const bch2_csum_opts[] = {
        NULL
 };
 
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
        BCH_COMPRESSION_TYPES()
        NULL
 };
@@ -83,18 +83,39 @@ const char * const bch2_member_states[] = {
        NULL
 };
 
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
        BCH_JSET_ENTRY_TYPES()
        NULL
 };
 
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
        BCH_FS_USAGE_TYPES()
        NULL
 };
 
 #undef x
 
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+                                   unsigned nr, const char *type, unsigned idx)
+{
+       if (idx < nr)
+               prt_str(out, opts[idx]);
+       else
+               prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type)                                  \
+void bch2_prt_##name(struct printbuf *out, type t)                             \
+{                                                                              \
+       prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type,     enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type,       enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type,           enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type,           enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type,    enum bch_compression_type);
+
 static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
                                     struct printbuf *err)
 {
index 1ac4135cca1c3dccc71a75a0d062ee30df33111c..84e452835a17d84d36c4d0f3906501578bf702d3 100644 (file)
@@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[];
 extern const char * const bch2_sb_features[];
 extern const char * const bch2_sb_compat[];
 extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
 extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
 extern const char * const bch2_compression_opts[];
 extern const char * const bch2_str_hash_types[];
 extern const char * const bch2_str_hash_opts[];
 extern const char * const __bch2_data_types[];
 extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
 extern const char * const bch2_d_types[];
 
+void bch2_prt_jset_entry_type(struct printbuf *,       enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *,         enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *,             enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *,             enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *,      enum bch_compression_type);
+
 static inline const char *bch2_d_type_str(unsigned d_type)
 {
        return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
index b76c16152579c6d3e5a51dbf54c839392c0ce0b2..0f328aba9760ba0e89fd015ee757239b6d8bd8c4 100644 (file)
@@ -47,20 +47,6 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
        }
 }
 
-static bool btree_id_is_alloc(enum btree_id id)
-{
-       switch (id) {
-       case BTREE_ID_alloc:
-       case BTREE_ID_backpointers:
-       case BTREE_ID_need_discard:
-       case BTREE_ID_freespace:
-       case BTREE_ID_bucket_gens:
-               return true;
-       default:
-               return false;
-       }
-}
-
 /* for -o reconstruct_alloc: */
 static void bch2_reconstruct_alloc(struct bch_fs *c)
 {
index cb501460d6152b31a4ae57d9dea6db0792c47c0f..0cec0f7d9703520a3cf24bcc2ca2ce7f86285ebc 100644 (file)
@@ -44,7 +44,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
 
        set_bit(BCH_FS_may_go_rw, &c->flags);
 
-       if (keys->nr || c->opts.fsck || !c->sb.clean)
+       if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
                return bch2_fs_read_write_early(c);
        return 0;
 }
index d6f81179c3a29b6e884f92c94628d512626c6b45..a98ef940b7a3280bd0da0474ef4f387fdcd0cc18 100644 (file)
          BCH_FSCK_ERR_subvol_fs_path_parent_wrong)             \
        x(btree_subvolume_children,                             \
          BIT_ULL(BCH_RECOVERY_PASS_check_subvols),             \
-         BCH_FSCK_ERR_subvol_children_not_set)
+         BCH_FSCK_ERR_subvol_children_not_set)                 \
+       x(mi_btree_bitmap,                                      \
+         BIT_ULL(BCH_RECOVERY_PASS_check_allocations),         \
+         BCH_FSCK_ERR_btree_bitmap_not_marked)
 
 #define DOWNGRADE_TABLE()
 
index d7d609131030a817c5fa2867fc3cee5796fb898c..4ca6e7b0d8aaed2c4b95fff82c2ed964c6a102ad 100644 (file)
        x(bucket_gens_nonzero_for_invalid_buckets,              122)    \
        x(need_discard_freespace_key_to_invalid_dev_bucket,     123)    \
        x(need_discard_freespace_key_bad,                       124)    \
-       x(backpointer_pos_wrong,                                125)    \
+       x(backpointer_bucket_offset_wrong,                      125)    \
        x(backpointer_to_missing_device,                        126)    \
        x(backpointer_to_missing_alloc,                         127)    \
        x(backpointer_to_missing_ptr,                           128)    \
        x(btree_ptr_v2_min_key_bad,                             262)    \
        x(btree_root_unreadable_and_scan_found_nothing,         263)    \
        x(snapshot_node_missing,                                264)    \
-       x(dup_backpointer_to_bad_csum_extent,                   265)
+       x(dup_backpointer_to_bad_csum_extent,                   265)    \
+       x(btree_bitmap_not_marked,                              266)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index eff5ce18c69c0600047c1fef688a5980af33c678..522a969345e5289ac87cf53b5f5a735e3b5f8d67 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "btree_cache.h"
 #include "disk_groups.h"
 #include "opts.h"
 #include "replicas.h"
@@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
        bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 }
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+       bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+               if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+                                                         ptr->offset, btree_sectors(c)))
+                       return false;
+       return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+                               u64 start, unsigned sectors)
+{
+       struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+       u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+       u64 end = start + sectors;
+
+       int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+       if (resize > 0) {
+               u64 new_bitmap = 0;
+
+               for (unsigned i = 0; i < 64; i++)
+                       if (bitmap & BIT_ULL(i))
+                               new_bitmap |= BIT_ULL(i >> resize);
+               bitmap = new_bitmap;
+               m->btree_bitmap_shift += resize;
+       }
+
+       for (unsigned bit = sectors >> m->btree_bitmap_shift;
+            bit << m->btree_bitmap_shift < end;
+            bit++)
+               bitmap |= BIT_ULL(bit);
+
+       m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+       lockdep_assert_held(&c->sb_lock);
+
+       struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+       bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+               __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
index be0a941832715a32634b8c3dea60bbf1685a672f..b27c3e4467cf288d67587143e5343d57d5aa41c9 100644 (file)
@@ -3,6 +3,7 @@
 #define _BCACHEFS_SB_MEMBERS_H
 
 #include "darray.h"
+#include "bkey_types.h"
 
 extern char * const bch2_member_error_strs[];
 
@@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
                        : 1,
                .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
                .valid          = bch2_member_exists(mi),
+               .btree_bitmap_shift     = mi->btree_bitmap_shift,
+               .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
        };
 }
 
@@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *);
 void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
 void bch2_dev_errors_reset(struct bch_dev *);
 
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+       u64 end = start + sectors;
+
+       if (end > 64 << ca->mi.btree_bitmap_shift)
+               return false;
+
+       for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
+            bit << ca->mi.btree_bitmap_shift < end;
+            bit++)
+               if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+                       return false;
+       return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
 #endif /* _BCACHEFS_SB_MEMBERS_H */
index 0e806f04f3d7c5117ade3d612b1c851da243aead..544322d5c2517070143d367fa15d4ff353642556 100644 (file)
@@ -125,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
        return s->parent;
 }
 
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+       const struct snapshot_t *s = __snapshot_t(t, id);
+       if (!s)
+               return false;
+
+       return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
 {
        bool ret;
@@ -140,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
        while (id && id < ancestor - IS_ANCESTOR_BITMAP)
                id = get_ancestor_below(t, id, ancestor);
 
-       if (id && id < ancestor) {
-               ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+       ret = id && id < ancestor
+               ? test_ancestor_bitmap(t, id, ancestor)
+               : id == ancestor;
 
-               EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
-       } else {
-               ret = id == ancestor;
-       }
+       EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
 out:
        rcu_read_unlock();
 
index 5eee055ee2721a3967fb31ca38adcbc5672521d6..08ea3dbbbe97ce11833fe79baa5fd87935919339 100644 (file)
@@ -700,8 +700,11 @@ retry:
                return -ENOMEM;
 
        sb->sb_name = kstrdup(path, GFP_KERNEL);
-       if (!sb->sb_name)
-               return -ENOMEM;
+       if (!sb->sb_name) {
+               ret = -ENOMEM;
+               prt_printf(&err, "error allocating memory for sb_name");
+               goto err;
+       }
 
 #ifndef __KERNEL__
        if (opt_get(*opts, direct_io) == false)
index ed63018f21bef58b2aa854f9c3f05ad1b3f26202..8daf80a38d60c6e4fa97b97345d3d4ecb80e7e88 100644 (file)
@@ -288,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
        if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
            !test_bit(BCH_FS_emergency_ro, &c->flags))
                set_bit(BCH_FS_clean_shutdown, &c->flags);
+
        bch2_fs_journal_stop(&c->journal);
 
+       bch_info(c, "%sshutdown complete, journal seq %llu",
+                test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+                c->journal.seq_ondisk);
+
        /*
         * After stopping journal:
         */
index ec784d975f6655a378207692644975e53271ddca..11bcef170c2c22644108e9fbec9b24eaf478059c 100644 (file)
@@ -37,6 +37,8 @@ struct bch_member_cpu {
        u8                      durability;
        u8                      freespace_initialized;
        u8                      valid;
+       u8                      btree_bitmap_shift;
+       u64                     btree_allocated_bitmap;
 };
 
 #endif /* _BCACHEFS_SUPER_TYPES_H */
index c86a93a8d8fc81bbe373efcbec74f3e2563e6da5..5be92fe3f4ea4e115512f0b7a31482919406a507 100644 (file)
@@ -17,7 +17,6 @@
 #include "btree_iter.h"
 #include "btree_key_cache.h"
 #include "btree_update.h"
-#include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
 #include "clock.h"
@@ -26,6 +25,7 @@
 #include "ec.h"
 #include "inode.h"
 #include "journal.h"
+#include "journal_reclaim.h"
 #include "keylist.h"
 #include "move.h"
 #include "movinggc.h"
@@ -139,6 +139,7 @@ do {                                                                        \
 write_attribute(trigger_gc);
 write_attribute(trigger_discards);
 write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
 write_attribute(prune_cache);
 write_attribute(btree_wakeup);
 rw_attribute(btree_gc_periodic);
@@ -166,7 +167,6 @@ read_attribute(btree_write_stats);
 read_attribute(btree_cache_size);
 read_attribute(compression_stats);
 read_attribute(journal_debug);
-read_attribute(btree_updates);
 read_attribute(btree_cache);
 read_attribute(btree_key_cache);
 read_attribute(stripes_heap);
@@ -415,9 +415,6 @@ SHOW(bch2_fs)
        if (attr == &sysfs_journal_debug)
                bch2_journal_debug_to_text(out, &c->journal);
 
-       if (attr == &sysfs_btree_updates)
-               bch2_btree_updates_to_text(out, c);
-
        if (attr == &sysfs_btree_cache)
                bch2_btree_cache_to_text(out, c);
 
@@ -505,7 +502,7 @@ STORE(bch2_fs)
 
        /* Debugging: */
 
-       if (!test_bit(BCH_FS_rw, &c->flags))
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
                return -EROFS;
 
        if (attr == &sysfs_prune_cache) {
@@ -538,6 +535,11 @@ STORE(bch2_fs)
        if (attr == &sysfs_trigger_invalidates)
                bch2_do_invalidates(c);
 
+       if (attr == &sysfs_trigger_journal_flush) {
+               bch2_journal_flush_all_pins(&c->journal);
+               bch2_journal_meta(&c->journal);
+       }
+
 #ifdef CONFIG_BCACHEFS_TESTS
        if (attr == &sysfs_perf_test) {
                char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -558,6 +560,7 @@ STORE(bch2_fs)
                        size = ret;
        }
 #endif
+       bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
        return size;
 }
 SYSFS_OPS(bch2_fs);
@@ -639,7 +642,6 @@ SYSFS_OPS(bch2_fs_internal);
 struct attribute *bch2_fs_internal_files[] = {
        &sysfs_flags,
        &sysfs_journal_debug,
-       &sysfs_btree_updates,
        &sysfs_btree_cache,
        &sysfs_btree_key_cache,
        &sysfs_new_stripes,
@@ -657,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_trigger_gc,
        &sysfs_trigger_discards,
        &sysfs_trigger_invalidates,
+       &sysfs_trigger_journal_flush,
        &sysfs_prune_cache,
        &sysfs_btree_wakeup,
 
index b3fe9fc577470ff14659df531959c9e7aa6c324b..bfec656f94c0758ee081ea7d36fe1e272baca810 100644 (file)
@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
                             BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
        ret = bkey_err(k);
        if (ret)
                goto err;
index b7e7c29278fc052a90fe7c029e8fd0626c48ddc5..5cf885b09986ac95effa15f7a37fc78bd56323cb 100644 (file)
@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
 
 #endif
 
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+       if (v)
+               set_bit(nr, addr);
+       else
+               clear_bit(nr, addr);
+}
+
 static inline void __set_bit_le64(size_t bit, __le64 *addr)
 {
        addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
@@ -795,7 +803,7 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
 
 static inline void __clear_bit_le64(size_t bit, __le64 *addr)
 {
-       addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
+       addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
 }
 
 static inline bool test_bit_le64(size_t bit, __le64 *addr)
index dd6f566a383f00e83c9f36125ee4ecd3fc0e3541..121ab890bd0557e4779bd25d00dc422ba8fb1b3f 100644 (file)
@@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
+       ret = btrfs_record_root_in_trans(trans, node->root);
+       if (ret)
+               return ret;
        ret = btrfs_update_delayed_inode(trans, node->root, path, node);
        return ret;
 }
index beedd6ed64d39bd7f53ad814c22df36b80b96235..257d044bca9158c95e205ff22ff0d662d0d7f074 100644 (file)
@@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        if (root_id != BTRFS_TREE_LOG_OBJECTID) {
                struct btrfs_ref generic_ref = { 0 };
 
+               /*
+                * Assert that the extent buffer is not cleared due to
+                * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer
+                * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for
+                * detail.
+                */
+               ASSERT(btrfs_header_bytenr(buf) != 0);
+
                btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
                                       buf->start, buf->len, parent,
                                       btrfs_header_owner(buf));
index 61594eaf1f8969fc3ba04604e3470a8932450767..2776112dbdf8d471a7cb4d515fdd443e6fadbac5 100644 (file)
@@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
                           gfp_t extra_gfp)
 {
+       const gfp_t gfp = GFP_NOFS | extra_gfp;
        unsigned int allocated;
 
        for (allocated = 0; allocated < nr_pages;) {
                unsigned int last = allocated;
 
-               allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
-                                                  nr_pages, page_array);
-
-               if (allocated == nr_pages)
-                       return 0;
-
-               /*
-                * During this iteration, no page could be allocated, even
-                * though alloc_pages_bulk_array() falls back to alloc_page()
-                * if  it could not bulk-allocate. So we must be out of memory.
-                */
-               if (allocated == last) {
+               allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array);
+               if (unlikely(allocated == last)) {
+                       /* No progress, fail and do cleanup. */
                        for (int i = 0; i < allocated; i++) {
                                __free_page(page_array[i]);
                                page_array[i] = NULL;
                        }
                        return -ENOMEM;
                }
-
-               memalloc_retry_wait(GFP_NOFS);
        }
        return 0;
 }
@@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
         * The actual zeroout of the buffer will happen later in
         * btree_csum_one_bio.
         */
-       if (btrfs_is_zoned(fs_info)) {
+       if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
                set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags);
                return;
        }
@@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
        num_folios = num_extent_folios(eb);
        WARN_ON(atomic_read(&eb->refs) == 0);
        WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+       WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
 
        if (!was_dirty) {
                bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
index 37701531eeb1ba486cd8117f104794083dff8816..c65fe5de40220d3b51003bb73b3e6414eaefba08 100644 (file)
@@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
                 */
                if (bits & EXTENT_CLEAR_META_RESV &&
                    root != fs_info->tree_root)
-                       btrfs_delalloc_release_metadata(inode, len, false);
+                       btrfs_delalloc_release_metadata(inode, len, true);
 
                /* For sanity tests. */
                if (btrfs_is_testing(fs_info))
@@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
        struct btrfs_trans_handle *trans;
        struct btrfs_block_rsv block_rsv;
        u64 root_flags;
+       u64 qgroup_reserved = 0;
        int ret;
 
        down_write(&fs_info->subvol_sem);
@@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
        ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
        if (ret)
                goto out_undead;
+       qgroup_reserved = block_rsv.qgroup_rsv_reserved;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                goto out_release;
        }
+       ret = btrfs_record_root_in_trans(trans, root);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_end_trans;
+       }
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
 
@@ -4611,7 +4620,9 @@ out_end_trans:
        ret = btrfs_end_transaction(trans);
        inode->i_flags |= S_DEAD;
 out_release:
-       btrfs_subvolume_release_metadata(root, &block_rsv);
+       btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 out_undead:
        if (ret) {
                spin_lock(&dest->root_item_lock);
index 294e31edec9d3bbe566e9234c8ef76d73612adbc..55f3ba6a831ca194e2d8405dbf7caa60fbd81dfc 100644 (file)
@@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
        int ret;
        dev_t anon_dev;
        u64 objectid;
+       u64 qgroup_reserved = 0;
 
        root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
        if (!root_item)
@@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
                                               trans_num_items, false);
        if (ret)
                goto out_new_inode_args;
+       qgroup_reserved = block_rsv.qgroup_rsv_reserved;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_subvolume_release_metadata(root, &block_rsv);
-               goto out_new_inode_args;
+               goto out_release_rsv;
        }
+       ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+       if (ret)
+               goto out;
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
        /* Tree log can't currently deal with an inode which is a new root. */
@@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
 out:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
-       btrfs_subvolume_release_metadata(root, &block_rsv);
-
        btrfs_end_transaction(trans);
+out_release_rsv:
+       btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 out_new_inode_args:
        btrfs_new_inode_args_destroy(&new_inode_args);
 out_inode:
@@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        struct btrfs_pending_snapshot *pending_snapshot;
        unsigned int trans_num_items;
        struct btrfs_trans_handle *trans;
+       struct btrfs_block_rsv *block_rsv;
+       u64 qgroup_reserved = 0;
        int ret;
 
        /* We do not support snapshotting right now. */
@@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                goto free_pending;
        }
 
-       btrfs_init_block_rsv(&pending_snapshot->block_rsv,
-                            BTRFS_BLOCK_RSV_TEMP);
+       block_rsv = &pending_snapshot->block_rsv;
+       btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
        /*
         * 1 to add dir item
         * 1 to add dir index
         * 1 to update parent inode item
         */
        trans_num_items = create_subvol_num_items(inherit) + 3;
-       ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
-                                              &pending_snapshot->block_rsv,
+       ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
                                               trans_num_items, false);
        if (ret)
                goto free_pending;
+       qgroup_reserved = block_rsv->qgroup_rsv_reserved;
 
        pending_snapshot->dentry = dentry;
        pending_snapshot->root = root;
@@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                ret = PTR_ERR(trans);
                goto fail;
        }
+       ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+       if (ret) {
+               btrfs_end_transaction(trans);
+               goto fail;
+       }
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
 
        trans->pending_snapshot = pending_snapshot;
 
@@ -881,7 +898,9 @@ fail:
        if (ret && pending_snapshot->snap)
                pending_snapshot->snap->anon_dev = 0;
        btrfs_put_root(pending_snapshot->snap);
-       btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+       btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 free_pending:
        if (pending_snapshot->anon_dev)
                free_anon_bdev(pending_snapshot->anon_dev);
index 5f90f0605b12f7126e93d69e7fbec42720301fad..cf8820ce7aa2979920c6daafc1071c26571ecee6 100644 (file)
@@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
                                      BTRFS_QGROUP_RSV_META_PREALLOC);
        trace_qgroup_meta_convert(root, num_bytes);
        qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+       if (!sb_rdonly(fs_info->sb))
+               add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
 }
 
 /*
index 4bb538a372ce56404de84d6ddbca7fb951715949..7007f9e0c97282bc5f415f56d14e02e79895aafc 100644 (file)
@@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
        }
        return ret;
 }
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
-                                     struct btrfs_block_rsv *rsv)
-{
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       u64 qgroup_to_release;
-
-       btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
-       btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
index 6f929cf3bd4967560964659ee9f631e6766a07ab..8f5739e732b9b6c9cc1d47ee34e20d50a403d90c 100644 (file)
@@ -18,8 +18,6 @@ struct btrfs_trans_handle;
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
-                                     struct btrfs_block_rsv *rsv);
 int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
                       u64 ref_id, u64 dirid, u64 sequence,
                       const struct fscrypt_str *name);
index 46e8426adf4f15768507303430b38c9e6be56c7d..85f359e0e0a7f2ea078157c85a1f78b0ea2bcadd 100644 (file)
@@ -745,14 +745,6 @@ again:
                h->reloc_reserved = reloc_reserved;
        }
 
-       /*
-        * Now that we have found a transaction to be a part of, convert the
-        * qgroup reservation from prealloc to pertrans. A different transaction
-        * can't race in and free our pertrans out from under us.
-        */
-       if (qgroup_reserved)
-               btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
 got_it:
        if (!current->journal_info)
                current->journal_info = h;
@@ -786,8 +778,15 @@ got_it:
                 * not just freed.
                 */
                btrfs_end_transaction(h);
-               return ERR_PTR(ret);
+               goto reserve_fail;
        }
+       /*
+        * Now that we have found a transaction to be a part of, convert the
+        * qgroup reservation from prealloc to pertrans. A different transaction
+        * can't race in and free our pertrans out from under us.
+        */
+       if (qgroup_reserved)
+               btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
 
        return h;
 
@@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
                        radix_tree_tag_clear(&fs_info->fs_roots_radix,
                                        (unsigned long)root->root_key.objectid,
                                        BTRFS_ROOT_TRANS_TAG);
+                       btrfs_qgroup_free_meta_all_pertrans(root);
                        spin_unlock(&fs_info->fs_roots_radix_lock);
 
                        btrfs_free_log(trans, root);
@@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
                        if (ret2)
                                return ret2;
                        spin_lock(&fs_info->fs_roots_radix_lock);
-                       btrfs_qgroup_free_meta_all_pertrans(root);
                }
        }
        spin_unlock(&fs_info->fs_roots_radix_lock);
index 1340d77124ae4db09c3b96548acdf1cd8a6c3fb0..ee9caf7916fb95931e08e41467cc97ddba950c0b 100644 (file)
@@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
        ihold(inode);
 
        if (wbc->sync_mode == WB_SYNC_NONE &&
-           ceph_inode_to_fs_client(inode)->write_congested)
+           ceph_inode_to_fs_client(inode)->write_congested) {
+               redirty_page_for_writepage(wbc, page);
                return AOP_WRITEPAGE_ACTIVATE;
+       }
 
        wait_on_page_fscache(page);
 
index 55051ad09c19197e9b12d5d17068d20b04d6d3e6..c4941ba245ac3d0d3ae4e0f2598838b4ceb69ca9 100644 (file)
@@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 
                        doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
                              ceph_vinop(inode));
-                       spin_lock(&mdsc->cap_unlink_delay_lock);
+                       spin_lock(&mdsc->cap_delay_lock);
                        ci->i_ceph_flags |= CEPH_I_FLUSH;
                        if (!list_empty(&ci->i_cap_delay_list))
                                list_del_init(&ci->i_cap_delay_list);
                        list_add_tail(&ci->i_cap_delay_list,
                                      &mdsc->cap_unlink_delay_list);
-                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+                       spin_unlock(&mdsc->cap_delay_lock);
 
                        /*
                         * Fire the work immediately, because the MDS maybe
index 3ab9c268a8bb398b779cc93d3da98f3d13df8fe3..360b686c3c67cfd1f256c656642957f6ca278427 100644 (file)
@@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
        struct ceph_client *cl = mdsc->fsc->client;
 
        doutc(cl, "begin\n");
-       spin_lock(&mdsc->cap_unlink_delay_lock);
+       spin_lock(&mdsc->cap_delay_lock);
        while (!list_empty(&mdsc->cap_unlink_delay_list)) {
                struct ceph_inode_info *ci;
                struct inode *inode;
@@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
 
                inode = igrab(&ci->netfs.inode);
                if (inode) {
-                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+                       spin_unlock(&mdsc->cap_delay_lock);
                        doutc(cl, "on %p %llx.%llx\n", inode,
                              ceph_vinop(inode));
                        ceph_check_caps(ci, CHECK_CAPS_FLUSH);
                        iput(inode);
-                       spin_lock(&mdsc->cap_unlink_delay_lock);
+                       spin_lock(&mdsc->cap_delay_lock);
                }
        }
-       spin_unlock(&mdsc->cap_unlink_delay_lock);
+       spin_unlock(&mdsc->cap_delay_lock);
        doutc(cl, "done\n");
 }
 
@@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        INIT_LIST_HEAD(&mdsc->cap_wait_list);
        spin_lock_init(&mdsc->cap_delay_lock);
        INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
-       spin_lock_init(&mdsc->cap_unlink_delay_lock);
        INIT_LIST_HEAD(&mdsc->snap_flush_list);
        spin_lock_init(&mdsc->snap_flush_lock);
        mdsc->last_cap_flush_tid = 1;
index 03f8ff00874f727adff8b88cc8d538fc989692d8..b88e804152241281e5d1cd5ca90057d9deff9240 100644 (file)
@@ -461,9 +461,8 @@ struct ceph_mds_client {
        struct delayed_work    delayed_work;  /* delayed work */
        unsigned long    last_renew_caps;  /* last time we renewed our caps */
        struct list_head cap_delay_list;   /* caps with delayed release */
-       spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
        struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
-       spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
+       spinlock_t       cap_delay_lock;   /* protects cap_delay_list and cap_unlink_delay_list */
        struct list_head snap_flush_list;  /* cap_snaps ready to flush */
        spinlock_t       snap_flush_lock;
 
index e9df2f87072c687073abe9625e66886934497a02..8502ef68459b9842d090a4ac338591778d1b3b24 100644 (file)
@@ -636,11 +636,18 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
         * each file a separate locking class.  Let's differentiate on
         * whether the file has mmap or not for now.
         *
-        * Both paths of the branch look the same.  They're supposed to
+        * For similar reasons, writable and readonly files are given different
+        * lockdep key, because the writable file /sys/power/resume may call vfs
+        * lookup helpers for arbitrary paths and readonly files can be read by
+        * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
+        *
+        * All three cases look the same.  They're supposed to
         * look that way and give @of->mutex different static lockdep keys.
         */
        if (has_mmap)
                mutex_init(&of->mutex);
+       else if (file->f_mode & FMODE_WRITE)
+               mutex_init(&of->mutex);
        else
                mutex_init(&of->mutex);
 
index fac938f563ad022ce79cdc5f67321bc7f529cc1c..1955481832e03796170ea8f80361bc25cc452ca6 100644 (file)
@@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                    struct dentry *dentry, const u32 *bmval,
                    int ignore_crossmnt)
 {
+       DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
        struct nfsd4_fattr_args args;
        struct svc_fh *tempfh = NULL;
        int starting_len = xdr->buf->len;
        __be32 *attrlen_p, status;
        int attrlen_offset;
+       u32 attrmask[3];
        int err;
        struct nfsd4_compoundres *resp = rqstp->rq_resp;
        u32 minorversion = resp->cstate.minorversion;
@@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                .mnt    = exp->ex_path.mnt,
                .dentry = dentry,
        };
-       union {
-               u32             attrmask[3];
-               unsigned long   mask[2];
-       } u;
        unsigned long bit;
        bool file_modified = false;
        u64 size = 0;
@@ -3521,20 +3519,19 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
        /*
         * Make a local copy of the attribute bitmap that can be modified.
         */
-       memset(&u, 0, sizeof(u));
-       u.attrmask[0] = bmval[0];
-       u.attrmask[1] = bmval[1];
-       u.attrmask[2] = bmval[2];
+       attrmask[0] = bmval[0];
+       attrmask[1] = bmval[1];
+       attrmask[2] = bmval[2];
 
        args.rdattr_err = 0;
        if (exp->ex_fslocs.migrated) {
-               status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
-                                               &u.attrmask[2], &args.rdattr_err);
+               status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+                                               &attrmask[2], &args.rdattr_err);
                if (status)
                        goto out;
        }
        args.size = 0;
-       if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+       if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
                status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
                                        &file_modified, &size);
                if (status)
@@ -3553,16 +3550,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
        if (!(args.stat.result_mask & STATX_BTIME))
                /* underlying FS does not offer btime so we can't share it */
-               u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
-       if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+               attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+       if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
                        FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
-           (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+           (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
                       FATTR4_WORD1_SPACE_TOTAL))) {
                err = vfs_statfs(&path, &args.statfs);
                if (err)
                        goto out_nfserr;
        }
-       if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+       if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
            !fhp) {
                tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
                status = nfserr_jukebox;
@@ -3577,10 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                args.fhp = fhp;
 
        args.acl = NULL;
-       if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+       if (attrmask[0] & FATTR4_WORD0_ACL) {
                err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
                if (err == -EOPNOTSUPP)
-                       u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+                       attrmask[0] &= ~FATTR4_WORD0_ACL;
                else if (err == -EINVAL) {
                        status = nfserr_attrnotsupp;
                        goto out;
@@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
        args.context = NULL;
-       if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
-            u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+       if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+            attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
                if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
                        err = security_inode_getsecctx(d_inode(dentry),
                                                &args.context, &args.contextlen);
                else
                        err = -EOPNOTSUPP;
                args.contextsupport = (err == 0);
-               if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+               if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
                        if (err == -EOPNOTSUPP)
-                               u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+                               attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
                        else if (err)
                                goto out_nfserr;
                }
@@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 
        /* attrmask */
-       status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
-                                     u.attrmask[1], u.attrmask[2]);
+       status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+                                     attrmask[2]);
        if (status)
                goto out;
 
@@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
        attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
        if (!attrlen_p)
                goto out_resource;
-       for_each_set_bit(bit, (const unsigned long *)&u.mask,
+       bitmap_from_arr32(attr_bitmap, attrmask,
+                         ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+       for_each_set_bit(bit, attr_bitmap,
                         ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
                status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
                if (status != nfs_ok)
index bc846b904b68d43816c48c69c3ae83152cadabf1..aee40db7a036fb9f7d34e2e456fb6d61ae3bbf2d 100644 (file)
@@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
 
 #define S_SHIFT 12
 static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
        [S_IFREG >> S_SHIFT]    = NILFS_FT_REG_FILE,
        [S_IFDIR >> S_SHIFT]    = NILFS_FT_DIR,
        [S_IFCHR >> S_SHIFT]    = NILFS_FT_CHRDEV,
index 902b326e1e5607d5537721b51f68c28e602e2b92..87dcaae32ff87b40c3d65a0d2463a6e60cdc0dbc 100644 (file)
@@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
                                break;
                        dst += ret;
                }
-               if (ret >= 0 && boot_command_line[0]) {
-                       ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
-                                      boot_command_line);
-                       if (ret > 0)
-                               dst += ret;
-               }
+       }
+       if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) {
+               ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
+                              boot_command_line);
+               if (ret > 0)
+                       dst += ret;
        }
 out:
        kfree(key);
index 13a9d7acf8f8ec151323d18d44a346e060bf0ce2..0ff2491c311d8a669c709fb94eb4a16a54515c68 100644 (file)
@@ -433,8 +433,8 @@ smb2_close_cached_fid(struct kref *ref)
        if (cfid->is_open) {
                rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
                           cfid->fid.volatile_fid);
-               if (rc != -EBUSY && rc != -EAGAIN)
-                       atomic_dec(&cfid->tcon->num_remote_opens);
+               if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+                       cifs_dbg(VFS, "close cached dir rc %d\n", rc);
        }
 
        free_cached_dir(cfid);
index f6a302205f89c456d9fa3adb3dae238deeb97d10..d6669ce4ae87f07415b150eaffcbf429c4fe74bd 100644 (file)
@@ -1077,6 +1077,7 @@ struct cifs_ses {
                                   and after mount option parsing we fill it */
        char *domainName;
        char *password;
+       char *password2; /* When key rotation used, new password may be set before it expires */
        char workstation_name[CIFS_MAX_WORKSTATION_LEN];
        struct session_key auth_key;
        struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
index 85679ae106fd50a4e3289349e2916204ae3f94fc..4e35970681bf052dc343c23935549600f5ce8859 100644 (file)
@@ -2183,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
        }
 
        ++delim;
+       /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
        ctx->password = kstrndup(delim, len, GFP_KERNEL);
        if (!ctx->password) {
                cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
@@ -2206,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
                        kfree(ctx->username);
                        ctx->username = NULL;
                        kfree_sensitive(ctx->password);
+                       /* no need to free ctx->password2 since not allocated in this path */
                        ctx->password = NULL;
                        goto out_key_put;
                }
@@ -2317,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
                if (!ses->password)
                        goto get_ses_fail;
        }
+       /* ctx->password freed at unmount */
+       if (ctx->password2) {
+               ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+               if (!ses->password2)
+                       goto get_ses_fail;
+       }
        if (ctx->domainname) {
                ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
                if (!ses->domainName)
index b7bfe705b2c498b83a60131713246bb9d37abf98..6c727d8c31e870ddd0f809db12b21aae76ac80cd 100644 (file)
@@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
        fsparam_string("username", Opt_user),
        fsparam_string("pass", Opt_pass),
        fsparam_string("password", Opt_pass),
+       fsparam_string("password2", Opt_pass2),
        fsparam_string("ip", Opt_ip),
        fsparam_string("addr", Opt_ip),
        fsparam_string("domain", Opt_domain),
@@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        new_ctx->nodename = NULL;
        new_ctx->username = NULL;
        new_ctx->password = NULL;
+       new_ctx->password2 = NULL;
        new_ctx->server_hostname = NULL;
        new_ctx->domainname = NULL;
        new_ctx->UNC = NULL;
@@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        DUP_CTX_STR(prepath);
        DUP_CTX_STR(username);
        DUP_CTX_STR(password);
+       DUP_CTX_STR(password2);
        DUP_CTX_STR(server_hostname);
        DUP_CTX_STR(UNC);
        DUP_CTX_STR(source);
@@ -905,6 +908,8 @@ static int smb3_reconfigure(struct fs_context *fc)
        else  {
                kfree_sensitive(ses->password);
                ses->password = kstrdup(ctx->password, GFP_KERNEL);
+               kfree_sensitive(ses->password2);
+               ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
        }
        STEAL_STRING(cifs_sb, ctx, domainname);
        STEAL_STRING(cifs_sb, ctx, nodename);
@@ -1305,6 +1310,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                        goto cifs_parse_mount_err;
                }
                break;
+       case Opt_pass2:
+               kfree_sensitive(ctx->password2);
+               ctx->password2 = NULL;
+               if (strlen(param->string) == 0)
+                       break;
+
+               ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+               if (ctx->password2 == NULL) {
+                       cifs_errorf(fc, "OOM when copying password2 string\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
        case Opt_ip:
                if (strlen(param->string) == 0) {
                        ctx->got_ip = false;
@@ -1608,6 +1625,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
  cifs_parse_mount_err:
        kfree_sensitive(ctx->password);
        ctx->password = NULL;
+       kfree_sensitive(ctx->password2);
+       ctx->password2 = NULL;
        return -EINVAL;
 }
 
@@ -1713,6 +1732,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
        ctx->username = NULL;
        kfree_sensitive(ctx->password);
        ctx->password = NULL;
+       kfree_sensitive(ctx->password2);
+       ctx->password2 = NULL;
        kfree(ctx->server_hostname);
        ctx->server_hostname = NULL;
        kfree(ctx->UNC);
index 8a35645e0b65b244741da59177a2bcb0acea0256..a947bddeba273ea850b3502f07555a19316266a6 100644 (file)
@@ -145,6 +145,7 @@ enum cifs_param {
        Opt_source,
        Opt_user,
        Opt_pass,
+       Opt_pass2,
        Opt_ip,
        Opt_domain,
        Opt_srcaddr,
@@ -177,6 +178,7 @@ struct smb3_fs_context {
 
        char *username;
        char *password;
+       char *password2;
        char *domainname;
        char *source;
        char *server_hostname;
index 91b07ef9e25ca1c195bef21d06c92f5193633022..60afab5c83d410a9c5122d5f4826ade67cb93dee 100644 (file)
@@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
                } else {
                        cifs_open_info_to_fattr(fattr, data, sb);
                }
-               if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+               if (!rc && *inode &&
+                   (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
                        cifs_mark_open_handles_for_deleted_file(*inode, full_path);
                break;
        case -EREMOTE:
index 33ac4f8f5050c416cd2004ee4516756edd3b11d8..7d15a1969b818439515b5188e8662a3b8f1276ce 100644 (file)
@@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
        kfree(buf_to_free->serverDomain);
        kfree(buf_to_free->serverNOS);
        kfree_sensitive(buf_to_free->password);
+       kfree_sensitive(buf_to_free->password2);
        kfree(buf_to_free->user_name);
        kfree(buf_to_free->domainName);
        kfree_sensitive(buf_to_free->auth_key.response);
index b156eefa75d7cb4b13d1bf402234f08271a558ad..78c94d0350fe9970fab31564aeba6870d71859bd 100644 (file)
@@ -4964,68 +4964,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf,
        return 0;
 }
 
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
-                      struct dentry *dentry, struct cifs_tcon *tcon,
-                      const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                               struct dentry *dentry, struct cifs_tcon *tcon,
+                               const char *full_path, umode_t mode, dev_t dev)
 {
-       struct cifs_open_info_data buf = {};
        struct TCP_Server_Info *server = tcon->ses->server;
        struct cifs_open_parms oparms;
        struct cifs_io_parms io_parms = {};
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifs_fid fid;
        unsigned int bytes_written;
-       struct win_dev *pdev;
+       struct win_dev pdev = {};
        struct kvec iov[2];
        __u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
        int rc;
 
-       if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+       switch (mode & S_IFMT) {
+       case S_IFCHR:
+               strscpy(pdev.type, "IntxCHR");
+               pdev.major = cpu_to_le64(MAJOR(dev));
+               pdev.minor = cpu_to_le64(MINOR(dev));
+               break;
+       case S_IFBLK:
+               strscpy(pdev.type, "IntxBLK");
+               pdev.major = cpu_to_le64(MAJOR(dev));
+               pdev.minor = cpu_to_le64(MINOR(dev));
+               break;
+       case S_IFIFO:
+               strscpy(pdev.type, "LnxFIFO");
+               break;
+       default:
                return -EPERM;
+       }
 
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = GENERIC_WRITE,
-               .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
-                                                     CREATE_OPTION_SPECIAL),
-               .disposition = FILE_CREATE,
-               .path = full_path,
-               .fid = &fid,
-       };
+       oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+                            FILE_CREATE, CREATE_NOT_DIR |
+                            CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+       oparms.fid = &fid;
 
-       rc = server->ops->open(xid, &oparms, &oplock, &buf);
+       rc = server->ops->open(xid, &oparms, &oplock, NULL);
        if (rc)
                return rc;
 
-       /*
-        * BB Do not bother to decode buf since no local inode yet to put
-        * timestamps in, but we can reuse it safely.
-        */
-       pdev = (struct win_dev *)&buf.fi;
        io_parms.pid = current->tgid;
        io_parms.tcon = tcon;
-       io_parms.length = sizeof(*pdev);
-       iov[1].iov_base = pdev;
-       iov[1].iov_len = sizeof(*pdev);
-       if (S_ISCHR(mode)) {
-               memcpy(pdev->type, "IntxCHR", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-       } else if (S_ISBLK(mode)) {
-               memcpy(pdev->type, "IntxBLK", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-       } else if (S_ISFIFO(mode)) {
-               memcpy(pdev->type, "LnxFIFO", 8);
-       }
+       io_parms.length = sizeof(pdev);
+       iov[1].iov_base = &pdev;
+       iov[1].iov_len = sizeof(pdev);
 
        rc = server->ops->sync_write(xid, &fid, &io_parms,
                                     &bytes_written, iov, 1);
        server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
-       /* FIXME: add code here to set EAs */
-       cifs_free_open_info(&buf);
+       return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev)
+{
+       struct inode *new = NULL;
+       int rc;
+
+       rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
+       if (rc)
+               return rc;
+
+       if (tcon->posix_extensions) {
+               rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+                                                inode->i_sb, xid);
+       } else if (tcon->unix_ext) {
+               rc = cifs_get_inode_info_unix(&new, full_path,
+                                             inode->i_sb, xid);
+       } else {
+               rc = cifs_get_inode_info(&new, full_path, NULL,
+                                        inode->i_sb, xid, NULL);
+       }
+       if (!rc)
+               d_instantiate(dentry, new);
        return rc;
 }
 
index c0c4933af5fc386911922b4e23c7869bdea8098b..86c647a947ccd1065a8edb0712e113351839b96f 100644 (file)
@@ -367,6 +367,17 @@ again:
                }
 
                rc = cifs_setup_session(0, ses, server, nls_codepage);
+               if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+                       /*
+                        * Try alternate password for next reconnect (key rotation
+                        * could be enabled on the server e.g.) if an alternate
+                        * password is available and the current password is expired,
+                        * but do not swap on non pwd related errors like host down
+                        */
+                       if (ses->password2)
+                               swap(ses->password2, ses->password);
+               }
+
                if ((rc == -EACCES) && !tcon->retry) {
                        mutex_unlock(&ses->session_mutex);
                        rc = -EHOSTDOWN;
index aa3411354e66d00c48db30adf73b34eedb84cb6d..16bd693d0b3aa23ce87af9cc1540e113a4c2a286 100644 (file)
@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
        gid_t i_gid;
        int err;
 
+       inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+       if (inode->i_ino == 0)
+               return -EINVAL;
+
        err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
        if (err)
                return err;
@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
 
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
-       inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
        inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
        inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
        inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
index dc067eeb638744b72be8487102498e9d010b2945..894c6ca1e5002015b378ad8d6ec598de44c25ebe 100644 (file)
@@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
 
 /**
  * lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
  * @dentry: the dentry to look up
  * @mode: the permission that the file should have.
  * @attr: saved attributes changed by user
@@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
 /**
  * lookup_dir_entry - look up a dir in the tracefs filesystem
  * @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
  * @ei: the eventfs_inode that represents the directory to create
  *
  * This function will look up a dentry for a directory represented by
@@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry)
 
 /**
  * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
  * @ei: the eventfs_inode that the file will be created under
  * @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
  * @mode: The mode of the file.
  * @data: The data to use to set the inode of the file with on open()
  * @fops: The fops of the file to be created.
  *
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
  */
 static struct dentry *
 lookup_file_dentry(struct dentry *dentry,
index c6a124e8d565febb690377ae982f60042ba2383b..964fa7f2400335dc8eb9456c3190aa36f2c0c8ec 100644 (file)
@@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb,
        zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
                    zonefs_zgroup_name(ztype),
                    zgroup->g_nr_zones,
-                   zgroup->g_nr_zones > 1 ? "s" : "");
+                   str_plural(zgroup->g_nr_zones));
 
        return 0;
 }
index 5de954e2b18aaac5c0796466d256f6cd10e1130d..e7796f373d0dac4daa5c322a7ba82983b9a8ac81 100644 (file)
@@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
  * acpi_dev_hid_uid_match - Match device by supplied HID and UID
  * @adev: ACPI device to match.
  * @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
  *
  * Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
  * will be treated as a match. If user wants to validate @uid2, it should be
  * done before calling this function.
  *
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
  */
 #define acpi_dev_hid_uid_match(adev, hid2, uid2)                       \
        (acpi_dev_hid_match(adev, hid2) &&                              \
-               (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+               /* Distinguish integer 0 from NULL @uid2 */             \
+               (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+               acpi_dev_uid_match(adev, uid2)))
 
 void acpi_dev_clear_dependencies(struct acpi_device *supplier);
 bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
index 6e794420bd398c7e4848cadebdc107116cfb6af2..b7de3a4eade1c265acc4f92b53d5617d1ae3cb87 100644 (file)
@@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 
 #else /* !CONFIG_BUG */
 #ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do {             \
+       do {} while (1);        \
+       unreachable();          \
+} while (0)
 #endif
 
 #ifndef HAVE_ARCH_BUG_ON
index 87e3d49a4e29bf7af1de43d1da45bfd9ca3791ea..814207e7c37fcf17a65638f68ceda02e400c58fa 100644 (file)
@@ -512,13 +512,9 @@ struct hv_proximity_domain_flags {
        u32 proximity_info_valid : 1;
 } __packed;
 
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
-       struct {
-               u32 domain_id;
-               struct hv_proximity_domain_flags flags;
-       };
-       u64 as_uint64;
+struct hv_proximity_domain_info {
+       u32 domain_id;
+       struct hv_proximity_domain_flags flags;
 } __packed;
 
 struct hv_lp_startup_status {
@@ -532,14 +528,13 @@ struct hv_lp_startup_status {
 } __packed;
 
 /* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
        u32 lp_index;
        u32 apic_id;
-       union hv_proximity_domain_info proximity_domain_info;
-       u64 flags;
+       struct hv_proximity_domain_info proximity_domain_info;
 } __packed;
 
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
        struct hv_lp_startup_status startup_status;
 } __packed;
 
@@ -560,7 +555,7 @@ struct hv_create_vp {
        u8 padding[3];
        u8 subnode_type;
        u64 subnode_id;
-       union hv_proximity_domain_info proximity_domain_info;
+       struct hv_proximity_domain_info proximity_domain_info;
        u64 flags;
 } __packed;
 
index 99935779682dc29180f556469c4487603b082740..8fe7aaab25990aa2fdebd81463b9ac9dedd36945 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
 #include <linux/cpumask.h>
 #include <linux/nmi.h>
 #include <asm/ptrace.h>
@@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
 bool hv_isolation_type_snp(void);
 bool hv_isolation_type_tdx(void);
 
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+       struct hv_proximity_domain_info pxm_info = {};
+
+       if (node != NUMA_NO_NODE) {
+               pxm_info.domain_id = node_to_pxm(node);
+               pxm_info.flags.proximity_info_valid = 1;
+               pxm_info.flags.proximity_preferred = 1;
+       }
+
+       return pxm_info;
+}
+
 /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
 static inline int hv_result(u64 status)
 {
index ca73940e26df83ddd65301b39b77c350b0f4c3c2..3f4b4ac527ca28c66119cf00fc13083633ec80a3 100644 (file)
@@ -10,6 +10,7 @@
 #ifdef __KERNEL__
 #include <linux/kernel.h>
 #include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
 #else /* !__KERNEL__ */
 /*
  * NOTE: This is only for tools/bootconfig, because tools/bootconfig will
@@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
 int __init xbc_get_info(int *node_size, size_t *data_size);
 
 /* XBC cleanup data structures */
-void __init xbc_exit(void);
+void __init _xbc_exit(bool early);
+
+static inline void xbc_exit(void)
+{
+       _xbc_exit(false);
+}
 
 /* XBC embedded bootconfig data in kernel */
 #ifdef CONFIG_BOOT_CONFIG_EMBED
index c00cc6c0878a1e173701a6267ac062fa3d41b790..8c252e073bd8103c8b13f39d90a965370b16d37d 100644 (file)
@@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
  *   - When one operand is a null pointer constant (i.e. when x is an integer
  *     constant expression) and the other is an object pointer (i.e. our
  *     third operand), the conditional operator returns the type of the
- *     object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ *     object pointer operand (i.e. "int *"). Here, within the sizeof(), we
  *     would then get:
  *       sizeof(*((int *)(...))  == sizeof(int)  == 4
  *   - When one operand is a void pointer (i.e. when x is not an integer
index e06bad467f55ef1befdad569f0a8a37875def383..c3f9bb6602ba2135cae645bda4d730cd703a12a6 100644 (file)
@@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
        return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
 }
 
+#define DMA_FENCE_WARN(f, fmt, args...) \
+       do {                                                            \
+               struct dma_fence *__ff = (f);                           \
+               pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+                        ##args);                                       \
+       } while (0)
+
 #endif /* __LINUX_DMA_FENCE_H */
index 868c8fb1bbc1c2dabd708bc2c6485c2e42dee8fe..13becafe41df00f94dddb5e4f0417d3447c6456c 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __LINUX_GFP_TYPES_H
 #define __LINUX_GFP_TYPES_H
 
+#include <linux/bits.h>
+
 /* The typedef is in types.h but we want the documentation here */
 #if 0
 /**
index 6c75c8bd44a0bb627020ba267c3d0debb2379ba4..1a14e239221f7e9aec06b510d450c9f30b34fc2c 100644 (file)
@@ -2,7 +2,6 @@
 #ifndef __LINUX_GPIO_PROPERTY_H
 #define __LINUX_GPIO_PROPERTY_H
 
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
 #include <linux/property.h>
 
 #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
index 6ef0557b4bff8ed5d14bc18391d356913136c23c..96ceb4095425eb39aa8145fda63cc1d859fb56f5 100644 (file)
@@ -832,6 +832,7 @@ struct vmbus_gpadl {
        u32 gpadl_handle;
        u32 size;
        void *buffer;
+       bool decrypted;
 };
 
 struct vmbus_channel {
index 05df0e399d7c0b84236198f57e0c61e90412beaa..ac333ea81d319526d5fde59bf9f64b5510f94e41 100644 (file)
@@ -13,7 +13,7 @@ enum {
         * A hint to not wake right away but delay until there are enough of
         * tw's queued to match the number of CQEs the task is waiting for.
         *
-        * Must not be used wirh requests generating more than one CQE.
+        * Must not be used with requests generating more than one CQE.
         * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
         */
        IOU_F_TWQ_LAZY_WAKE                     = 1,
index 147feebd508cabfa98a1844fb75e2235fb9b56d6..3f003d5fde5341bd789d0d1286109563624090d3 100644 (file)
@@ -114,7 +114,7 @@ do {                                                \
 # define lockdep_softirq_enter()               do { } while (0)
 # define lockdep_softirq_exit()                        do { } while (0)
 # define lockdep_hrtimer_enter(__hrtimer)      false
-# define lockdep_hrtimer_exit(__context)       do { } while (0)
+# define lockdep_hrtimer_exit(__context)       do { (void)(__context); } while (0)
 # define lockdep_posixtimer_enter()            do { } while (0)
 # define lockdep_posixtimer_exit()             do { } while (0)
 # define lockdep_irq_work_enter(__work)                do { } while (0)
index 0436b919f1c7fc535b30400bf95affc7e4534186..7b0ee64225de9cefebaf63e0d759c082684e3ab5 100644 (file)
@@ -2207,11 +2207,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
  */
 #include <linux/vmstat.h>
 
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
-       return page_to_virt(page);
-}
-
 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
 #define HASHED_PAGE_VIRTUAL
 #endif
@@ -2234,6 +2229,11 @@ void set_page_address(struct page *page, void *virtual);
 void page_address_init(void);
 #endif
 
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+       return page_to_virt(page);
+}
+
 #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
 #define page_address(page) lowmem_page_address(page)
 #define set_page_address(page, address)  do { } while(0)
index 5d868505a94e43fe4f6124915e90dc814c269278..6d92b68efbf6c3afe86b9f6c22a8759ce51e7a28 100644 (file)
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
        if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
                                &randomize_kstack_offset)) {            \
                u32 offset = raw_cpu_read(kstack_offset);               \
-               offset ^= (rand);                                       \
+               offset = ror32(offset, 5) ^ (rand);                     \
                raw_cpu_write(kstack_offset, offset);                   \
        }                                                               \
 } while (0)
index 29c4e4f243e47d580945626da4a172e2ecff0c5b..f2394a409c9d5e478844b0d6a43011f4447798a0 100644 (file)
@@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
        return atomic_read(&rwb->readers) != READER_BIAS;
 }
 
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
 {
-       WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+       return atomic_read(&rwb->readers) == WRITER_BIAS;
 }
 
 static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
index 4f1c18992f768fe67faffa139f259e8213a93f9e..c8b543d428b0a8d4662183f3342e88ec61d10189 100644 (file)
@@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
        return rw_base_is_locked(&sem->rwbase);
 }
 
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
 {
        WARN_ON(!rwsem_is_locked(sem));
 }
 
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
 {
-       rw_base_assert_held_write(sem);
+       WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
 }
 
 static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
index a4c15db2f5e5401373e66a1b1f66ffd5e97ae35e..3fb18f7eb73eafecf8101a6e73a141cc4d46a0f9 100644 (file)
@@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 int shmem_unuse(unsigned int type);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
                          struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+                                         struct mm_struct *mm, unsigned long vm_flags)
+{
+       return false;
+}
+#endif
+
 #ifdef CONFIG_SHMEM
 extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
 #else
index 307961b41541a620023ad40d3178d47b94768126..317200cd3a603e213228e1b5b561b9757f3f3940 100644 (file)
@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
        return 0;
 }
 
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
 static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
 {
        return copy_from_sockptr_offset(dst, src, 0, size);
 }
 
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst:   Destination address, in kernel space. This buffer must be @ksize
+ *         bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ *  * -EINVAL: @optlen < @ksize
+ *  * -EFAULT: access to userspace failed.
+ *  * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+                                        sockptr_t optval, unsigned int optlen)
+{
+       if (optlen < ksize)
+               return -EINVAL;
+       return copy_from_sockptr(dst, optval, ksize);
+}
+
 static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
                sockptr_t src, size_t usize)
 {
index 48b700ba1d188a798209d4de4693173bfc6b98af..a5c560a2f8c25867e8e3e53588d551ea3b356feb 100644 (file)
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 }
 #endif /* CONFIG_MIGRATION */
 
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+       return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+       return 0;
+}
+#endif
+
 typedef unsigned long pte_marker;
 
 #define  PTE_MARKER_UFFD_WP                    BIT(0)
@@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
 
 /*
  * A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
  */
 static inline bool is_pfn_swap_entry(swp_entry_t entry)
 {
@@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
        BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
 
        return is_migration_entry(entry) || is_device_private_entry(entry) ||
-              is_device_exclusive_entry(entry);
+              is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
 }
 
 struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
 }
 #endif  /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
 
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
-       BUG_ON(!PageLocked(page));
-       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
-       return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
-       return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
-       return 0;
-}
-#endif
-
 static inline int non_swap_entry(swp_entry_t entry)
 {
        return swp_type(entry) >= MAX_SWAPFILES;
index ffe48e69b3f3ae136c97df9193e066f302ae40e4..457879938fc198b7104cdd12f7290c439b44e3e2 100644 (file)
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
        p->v++;
 }
 
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
-       seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp)                          \
+       do {                                            \
+               struct u64_stats_sync *__s = (syncp);   \
+               seqcount_init(&__s->seq);               \
+       } while (0)
 
 static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
index 17539d08966618bfe4c2f0b41f4893c8951b22fe..e398e1dbd2d365c54874a6f368adacf6734e9f03 100644 (file)
@@ -108,7 +108,7 @@ struct udp_sock {
 #define udp_assign_bit(nr, sk, val)            \
        assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
 
-#define UDP_MAX_SEGMENTS       (1 << 6UL)
+#define UDP_MAX_SEGMENTS       (1 << 7UL)
 
 #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
 
index b0201747a263a9526c5d60c2c2644a8e064a8439..26c4325aa3734eaa32ffbbdd862b151b93e7fdf8 100644 (file)
@@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
 
 /**
  * struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this driver.
  * @feature_table_size: number of entries in the feature table array.
@@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
        return container_of(drv, struct virtio_driver, driver);
 }
 
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+       __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
 void unregister_virtio_driver(struct virtio_driver *drv);
 
 /* module_virtio_driver() - Helper macro for drivers that don't do
index 9d06eb945509ecfcf01bec1ffa8481262931c5bd..62a407db1bf5ff6dd3298077e05e636ccc2ff97b 100644 (file)
@@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
        refcount_inc(&ifp->refcnt);
 }
 
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+       return refcount_inc_not_zero(&ifp->refcnt);
+}
 
 /*
  *     compute link-local solicited-node multicast address
index 9fe95a22abeb7e2fb12d7384a974e5689db61211..eaec5d6caa29d293902f86666c91cceebd6f388c 100644 (file)
@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
        return skb;
 }
 
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+                                      sockptr_t src, size_t src_size)
+{
+       if (dst_size > src_size)
+               return -EINVAL;
+
+       return copy_from_sockptr(dst, src, dst_size);
+}
+
 int bt_to_errno(u16 code);
 __u8 bt_status(int err);
 
index 5cd64bb2104df389250fb3c518ba00a3826c53f7..c286cc2e766ee04a77206b7c326b4283de43933e 100644 (file)
@@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
        return pskb_network_may_pull(skb, nhlen);
 }
 
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+       int nhlen = 0, maclen = ETH_HLEN;
+       __be16 type = skb->protocol;
+
+       /* Essentially this is skb_protocol(skb, true)
+        * And we get MAC len.
+        */
+       if (eth_type_vlan(type))
+               type = __vlan_get_protocol(skb, type, &maclen);
+
+       switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+       case htons(ETH_P_IPV6):
+               nhlen = sizeof(struct ipv6hdr);
+               break;
+#endif
+       case htons(ETH_P_IP):
+               nhlen = sizeof(struct iphdr);
+               break;
+       }
+       /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+        * a base network header in skb->head.
+        */
+       if (!pskb_may_pull(skb, maclen + nhlen))
+               return false;
+
+       skb_set_network_header(skb, maclen);
+       return true;
+}
+
 static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
 {
        const struct ip_tunnel_encap_ops *ops;
index a763dd327c6ea95d6b94fda1ea2efd8f1784335f..9abb7ee40d72fc2e7d2ef0ec86ef18df939ddd9c 100644 (file)
@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
 int nf_flow_table_offload_init(void);
 void nf_flow_table_offload_exit(void);
 
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
 {
        __be16 proto;
 
@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
        return 0;
 }
 
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+       if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+               return false;
+
+       *inner_proto = __nf_flow_pppoe_proto(skb);
+
+       return true;
+}
+
 #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
 #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
 #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count)      \
index e27c28b612e464ca41c9f07e213d48bf84f11bf6..3f1ed467f951f6342d9ee8da6b576cf8c787af2d 100644 (file)
@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
        return (void *)priv;
 }
 
+
+/**
+ * enum nft_iter_type - nftables set iterator type
+ *
+ * @NFT_ITER_READ: read-only iteration over set elements
+ * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ */
+enum nft_iter_type {
+       NFT_ITER_UNSPEC,
+       NFT_ITER_READ,
+       NFT_ITER_UPDATE,
+};
+
 struct nft_set;
 struct nft_set_iter {
        u8              genmask;
+       enum nft_iter_type type:8;
        unsigned int    count;
        unsigned int    skip;
        int             err;
index cefe0c4bdae34c91868c22731a3b666f8e16e996..41ca14e81d55f926dc4002e820d7e027f4729021 100644 (file)
@@ -117,6 +117,7 @@ struct Qdisc {
        struct qdisc_skb_head   q;
        struct gnet_stats_basic_sync bstats;
        struct gnet_stats_queue qstats;
+       int                     owner;
        unsigned long           state;
        unsigned long           state2; /* must be written under qdisc spinlock */
        struct Qdisc            *next_sched;
index ba2d96a1bc2f94703945c5f79294a66af1fe8fd4..f50fcafc69de20b8b20a53a45f29b23f4259a65e 100644 (file)
@@ -609,7 +609,7 @@ TRACE_EVENT(rpcgss_context,
                __field(unsigned int, timeout)
                __field(u32, window_size)
                __field(int, len)
-               __string(acceptor, data)
+               __string_len(acceptor, data, len)
        ),
 
        TP_fast_assign(
@@ -618,7 +618,7 @@ TRACE_EVENT(rpcgss_context,
                __entry->timeout = timeout;
                __entry->window_size = window_size;
                __entry->len = len;
-               strncpy(__get_str(acceptor), data, len);
+               __assign_str(acceptor, data);
        ),
 
        TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s",
index bea6973906134656d84299958258205932c23e04..b95dd84eef2db2311f985921064e65d96e3e2f4c 100644 (file)
 /* Get the config size */
 #define VHOST_VDPA_GET_CONFIG_SIZE     _IOR(VHOST_VIRTIO, 0x79, __u32)
 
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT       _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM       _IOR(VHOST_VIRTIO, 0x81, __u32)
-
 /* Get the number of address spaces. */
 #define VHOST_VDPA_GET_AS_NUM          _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
 
 #define VHOST_VDPA_GET_VRING_DESC_GROUP        _IOWR(VHOST_VIRTIO, 0x7F,       \
                                              struct vhost_vring_state)
 
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT       _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM       _IOR(VHOST_VIRTIO, 0x81, __u32)
+
 /* Get the queue size of a specific virtqueue.
  * userspace set the vring index in vhost_vring_state.index
  * kernel set the queue size in vhost_vring_state.num
  */
-#define VHOST_VDPA_GET_VRING_SIZE      _IOWR(VHOST_VIRTIO, 0x80,       \
+#define VHOST_VDPA_GET_VRING_SIZE      _IOWR(VHOST_VIRTIO, 0x82,       \
                                              struct vhost_vring_state)
 #endif
index 2ca52474d0c3032e44ae410add7b2430218f9c41..5dcf5274c09c7ae60232051569bd24651a8379af 100644 (file)
@@ -487,6 +487,11 @@ static int __init warn_bootconfig(char *str)
 
 early_param("bootconfig", warn_bootconfig);
 
+bool __init cmdline_has_extra_options(void)
+{
+       return extra_command_line || extra_init_args;
+}
+
 /* Change NUL term back to "=", to make "param" the whole string. */
 static void __init repair_env_string(char *param, char *val)
 {
@@ -631,6 +636,8 @@ static void __init setup_command_line(char *command_line)
        if (!saved_command_line)
                panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
 
+       len = xlen + strlen(command_line) + 1;
+
        static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
        if (!static_command_line)
                panic("%s: Failed to allocate %zu bytes\n", __func__, len);
index 4521c2b66b98db3c3affc55c7aeb4a69b8eec0a7..c170a2b8d2cf21f06d1c5af8bf57edecb94aaa95 100644 (file)
@@ -2602,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        if (__io_cqring_events_user(ctx) >= min_events)
                return 0;
 
-       if (sig) {
-#ifdef CONFIG_COMPAT
-               if (in_compat_syscall())
-                       ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
-                                                     sigsz);
-               else
-#endif
-                       ret = set_user_sigmask(sig, sigsz);
-
-               if (ret)
-                       return ret;
-       }
-
        init_waitqueue_func_entry(&iowq.wq, io_wake_function);
        iowq.wq.private = current;
        INIT_LIST_HEAD(&iowq.wq.entry);
@@ -2633,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                io_napi_adjust_timeout(ctx, &iowq, &ts);
        }
 
+       if (sig) {
+#ifdef CONFIG_COMPAT
+               if (in_compat_syscall())
+                       ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+                                                     sigsz);
+               else
+#endif
+                       ret = set_user_sigmask(sig, sigsz);
+
+               if (ret)
+                       return ret;
+       }
+
        io_napi_busy_loop(ctx, &iowq);
 
        trace_io_uring_cqring_wait(ctx, min_events);
index 1e7665ff6ef70264b26206f99c34aa5516190129..4afb475d41974b95a86a22bd84771d8c29781c08 100644 (file)
@@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 
        if (req_has_async_data(req)) {
                kmsg = req->async_data;
+               kmsg->msg.msg_control_user = sr->msg_control;
        } else {
                ret = io_sendmsg_copy_hdr(req, &iomsg);
                if (ret)
index 8f6affd051f77564f96ca4682a58d0c131f62c56..07ad53b7f11952080e890ed91f99f3e762bf984d 100644 (file)
@@ -3207,7 +3207,8 @@ enum cpu_mitigations {
 };
 
 static enum cpu_mitigations cpu_mitigations __ro_after_init =
-       CPU_MITIGATIONS_AUTO;
+       IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+                                                    CPU_MITIGATIONS_OFF;
 
 static int __init mitigations_parse_cmdline(char *arg)
 {
index 86fe172b5958232ee29d481bf2f9fe60a51c5881..a5e0dfc44d24e22641e72bb0362511a33b23a1fd 100644 (file)
  * @alloc_size:        Size of the allocated buffer.
  * @list:      The free list describing the number of free entries available
  *             from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ *             allocated non-padding slot.
  */
 struct io_tlb_slot {
        phys_addr_t orig_addr;
        size_t alloc_size;
-       unsigned int list;
+       unsigned short list;
+       unsigned short pad_slots;
 };
 
 static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
                                         mem->nslabs - i);
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
+               mem->slots[i].pad_slots = 0;
        }
 
        memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
 #endif
 }
 
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev:         Owning device.
+ * @align_mask:  Allocation alignment mask.
+ * @addr:        DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
  */
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+                                        unsigned int align_mask, u64 addr)
 {
-       return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+       return addr & dma_get_min_align_mask(dev) &
+               (align_mask | (IO_TLB_SIZE - 1));
 }
 
 /*
@@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
        size_t alloc_size = mem->slots[index].alloc_size;
        unsigned long pfn = PFN_DOWN(orig_addr);
        unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
-       unsigned int tlb_offset, orig_addr_offset;
+       int tlb_offset;
 
        if (orig_addr == INVALID_PHYS_ADDR)
                return;
 
-       tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
-       orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
-       if (tlb_offset < orig_addr_offset) {
-               dev_WARN_ONCE(dev, 1,
-                       "Access before mapping start detected. orig offset %u, requested offset %u.\n",
-                       orig_addr_offset, tlb_offset);
-               return;
-       }
-
-       tlb_offset -= orig_addr_offset;
-       if (tlb_offset > alloc_size) {
-               dev_WARN_ONCE(dev, 1,
-                       "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
-                       alloc_size, size, tlb_offset);
-               return;
-       }
+       /*
+        * It's valid for tlb_offset to be negative. This can happen when the
+        * "offset" returned by swiotlb_align_offset() is non-zero, and the
+        * tlb_addr is pointing within the first "offset" bytes of the second
+        * or subsequent slots of the allocated swiotlb area. While it's not
+        * valid for tlb_addr to be pointing within the first "offset" bytes
+        * of the first slot, there's no way to check for such an error since
+        * this function can't distinguish the first slot from the second and
+        * subsequent slots.
+        */
+       tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+                    swiotlb_align_offset(dev, 0, orig_addr);
 
        orig_addr += tlb_offset;
        alloc_size -= tlb_offset;
@@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
        unsigned long max_slots = get_max_slots(boundary_mask);
        unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
        unsigned int nslots = nr_slots(alloc_size), stride;
-       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
        unsigned int index, slots_checked, count = 0, i;
        unsigned long flags;
        unsigned int slot_base;
@@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
                unsigned long attrs)
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       unsigned int offset;
        struct io_tlb_pool *pool;
        unsigned int i;
        int index;
        phys_addr_t tlb_addr;
+       unsigned short pad_slots;
 
        if (!mem || !mem->nslabs) {
                dev_warn_ratelimited(dev,
@@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }
 
+       offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
        index = swiotlb_find_slots(dev, orig_addr,
                                   alloc_size + offset, alloc_align_mask, &pool);
        if (index == -1) {
@@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
         * This is needed when we sync the memory.  Then we sync the buffer if
         * needed.
         */
+       pad_slots = offset >> IO_TLB_SHIFT;
+       offset &= (IO_TLB_SIZE - 1);
+       index += pad_slots;
+       pool->slots[index].pad_slots = pad_slots;
        for (i = 0; i < nr_slots(alloc_size + offset); i++)
                pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
        tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 {
        struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
        unsigned long flags;
-       unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
-       int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-       int nslots = nr_slots(mem->slots[index].alloc_size + offset);
-       int aindex = index / mem->area_nslabs;
-       struct io_tlb_area *area = &mem->areas[aindex];
+       unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+       int index, nslots, aindex;
+       struct io_tlb_area *area;
        int count, i;
 
+       index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+       index -= mem->slots[index].pad_slots;
+       nslots = nr_slots(mem->slots[index].alloc_size + offset);
+       aindex = index / mem->area_nslabs;
+       area = &mem->areas[aindex];
+
        /*
         * Return the buffer to the free list by setting the corresponding
         * entries to indicate the number of contiguous entries available.
@@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
                mem->slots[i].list = ++count;
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
+               mem->slots[i].pad_slots = 0;
        }
 
        /*
@@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
                                         const char *dirname)
 {
-       atomic_long_set(&mem->total_used, 0);
-       atomic_long_set(&mem->used_hiwater, 0);
-
        mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
        if (!mem->nslabs)
                return;
@@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
        debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
                        &fops_io_tlb_hiwater);
 #ifdef CONFIG_SWIOTLB_DYNAMIC
-       atomic_long_set(&mem->transient_nslabs, 0);
        debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
                            mem, &fops_io_tlb_transient_used);
 #endif
index 39a5046c2f0bf49e1bcade15c4c3c5574742b09d..aebb3e6c96dc62e5818ce16e31779868ea669067 100644 (file)
@@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                } else if (anon_vma_fork(tmp, mpnt))
                        goto fail_nomem_anon_vma_fork;
                vm_flags_clear(tmp, VM_LOCKED_MASK);
+               /*
+                * Copy/update hugetlb private vma information.
+                */
+               if (is_vm_hugetlb_page(tmp))
+                       hugetlb_dup_vma_private(tmp);
+
+               /*
+                * Link the vma into the MT. After using __mt_dup(), memory
+                * allocation is not necessary here, so it cannot fail.
+                */
+               vma_iter_bulk_store(&vmi, tmp);
+
+               mm->map_count++;
+
+               if (tmp->vm_ops && tmp->vm_ops->open)
+                       tmp->vm_ops->open(tmp);
+
                file = tmp->vm_file;
                if (file) {
                        struct address_space *mapping = file->f_mapping;
@@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        i_mmap_unlock_write(mapping);
                }
 
-               /*
-                * Copy/update hugetlb private vma information.
-                */
-               if (is_vm_hugetlb_page(tmp))
-                       hugetlb_dup_vma_private(tmp);
-
-               /*
-                * Link the vma into the MT. After using __mt_dup(), memory
-                * allocation is not necessary here, so it cannot fail.
-                */
-               vma_iter_bulk_store(&vmi, tmp);
-
-               mm->map_count++;
                if (!(tmp->vm_flags & VM_WIPEONFORK))
                        retval = copy_page_range(tmp, mpnt);
 
-               if (tmp->vm_ops && tmp->vm_ops->open)
-                       tmp->vm_ops->open(tmp);
-
                if (retval) {
                        mpnt = vma_next(&vmi);
                        goto loop_out;
index 9d9095e817928658d2c6d54d5da6f4826ff7c6be..65adc815fc6e63027e1b7f0b23c597475a3fea1e 100644 (file)
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
        jump_label_lock();
        preempt_disable();
 
-       /* Ensure it is not in reserved area nor out of text */
-       if (!(core_kernel_text((unsigned long) p->addr) ||
-           is_module_text_address((unsigned long) p->addr)) ||
-           in_gate_area_no_mm((unsigned long) p->addr) ||
+       /* Ensure the address is in a text area, and find a module if exists. */
+       *probed_mod = NULL;
+       if (!core_kernel_text((unsigned long) p->addr)) {
+               *probed_mod = __module_text_address((unsigned long) p->addr);
+               if (!(*probed_mod)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+       /* Ensure it is not in reserved area. */
+       if (in_gate_area_no_mm((unsigned long) p->addr) ||
            within_kprobe_blacklist((unsigned long) p->addr) ||
            jump_label_text_reserved(p->addr, p->addr) ||
            static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
                goto out;
        }
 
-       /* Check if 'p' is probing a module. */
-       *probed_mod = __module_text_address((unsigned long) p->addr);
+       /* Get module refcount and reject __init functions for loaded modules. */
        if (*probed_mod) {
                /*
                 * We must hold a refcount of the probed module while updating
index e3ae93bbcb9b50d487727bee16f63f67d66442ac..09f8397bae15fb9c895d060d7708c9bca5ef62f7 100644 (file)
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
        swait_event_exclusive(s2idle_wait_head,
                    s2idle_state == S2IDLE_STATE_WAKE);
 
+       /*
+        * Kick all CPUs to ensure that they resume their timers and restore
+        * consistent system state.
+        */
+       wake_up_all_idle_cpus();
+
        cpus_read_unlock();
 
        raw_spin_lock_irq(&s2idle_lock);
index fb0fdec8719a13ed5fd5eb66d13027e184dce5de..d88b13076b7944e54fefb2802d914f8f7fe1abf5 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
+#include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
  */
 static void tick_periodic(int cpu)
 {
-       if (tick_do_timer_cpu == cpu) {
+       if (READ_ONCE(tick_do_timer_cpu) == cpu) {
                raw_spin_lock(&jiffies_lock);
                write_seqcount_begin(&jiffies_seq);
 
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
                 * If no cpu took the do_timer update, assign it to
                 * this cpu:
                 */
-               if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-                       tick_do_timer_cpu = cpu;
+               if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+                       WRITE_ONCE(tick_do_timer_cpu, cpu);
                        tick_next_period = ktime_get();
 #ifdef CONFIG_NO_HZ_FULL
                        /*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
                                                !tick_nohz_full_cpu(cpu)) {
                        tick_take_do_timer_from_boot();
                        tick_do_timer_boot_cpu = -1;
-                       WARN_ON(tick_do_timer_cpu != cpu);
+                       WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
 #endif
                }
 
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
 int tick_cpu_dying(unsigned int dying_cpu)
 {
        /*
-        * If the current CPU is the timekeeper, it's the only one that
-        * can safely hand over its duty. Also all online CPUs are in
-        * stop machine, guaranteed not to be idle, therefore it's safe
-        * to pick any online successor.
+        * If the current CPU is the timekeeper, it's the only one that can
+        * safely hand over its duty. Also all online CPUs are in stop
+        * machine, guaranteed not to be idle, therefore there is no
+        * concurrency and it's safe to pick any online successor.
         */
        if (tick_do_timer_cpu == dying_cpu)
                tick_do_timer_cpu = cpumask_first(cpu_online_mask);
index 1331216a9cae749cce5e13b7ff4adcf9ba5fefaa..71a792cd893620eebe73eb1a0fc0c4ff5d454344 100644 (file)
@@ -8,6 +8,7 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  */
+#include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
 
 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
-       int cpu = smp_processor_id();
+       int tick_cpu, cpu = smp_processor_id();
 
        /*
         * Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
         * If nohz_full is enabled, this should not happen because the
         * 'tick_do_timer_cpu' CPU never relinquishes.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
-           unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
 #ifdef CONFIG_NO_HZ_FULL
                WARN_ON_ONCE(tick_nohz_full_running);
 #endif
-               tick_do_timer_cpu = cpu;
+               WRITE_ONCE(tick_do_timer_cpu, cpu);
+               tick_cpu = cpu;
        }
 
        /* Check if jiffies need an update */
-       if (tick_do_timer_cpu == cpu)
+       if (tick_cpu == cpu)
                tick_do_update_jiffies64(now);
 
        /*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
         * timers, workqueues, timekeeping, ...) on behalf of full dynticks
         * CPUs. It must remain online when nohz full is enabled.
         */
-       if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+       if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
                return false;
        return true;
 }
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
        u64 basemono, next_tick, delta, expires;
        unsigned long basejiff;
+       int tick_cpu;
 
        basemono = get_jiffies_update(&basejiff);
        ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
         * Otherwise we can sleep as long as we want.
         */
        delta = timekeeping_max_deferment();
-       if (cpu != tick_do_timer_cpu &&
-           (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
-            !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+       if (tick_cpu != cpu &&
+           (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
                delta = KTIME_MAX;
 
        /* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
        unsigned long basejiff = ts->last_jiffies;
        u64 basemono = ts->timer_expires_base;
        bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+       int tick_cpu;
        u64 expires;
 
        /* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
         * do_timer() never gets invoked. Keep track of the fact that it
         * was the one which had the do_timer() duty last.
         */
-       if (cpu == tick_do_timer_cpu) {
-               tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+       if (tick_cpu == cpu) {
+               WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
                tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
-       } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+       } else if (tick_cpu != TICK_DO_TIMER_NONE) {
                tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
        }
 
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
                return false;
 
        if (tick_nohz_full_enabled()) {
+               int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
                /*
                 * Keep the tick alive to guarantee timekeeping progression
                 * if there are full dynticks CPUs around
                 */
-               if (tick_do_timer_cpu == cpu)
+               if (tick_cpu == cpu)
                        return false;
 
                /* Should not happen for nohz-full */
-               if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+               if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
                        return false;
        }
 
index 61c541c36596d9cdb532d876b56a273f44731928..47345bf1d4a9f7e850db213999c62ecb02f8fea0 100644 (file)
@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
 
 config FTRACE_RECORD_RECURSION_SIZE
        int "Max number of recursed functions to record"
-       default 128
+       default 128
        depends on FTRACE_RECORD_RECURSION
        help
          This defines the limit of number of functions that can be
index 25476ead681b8411f41d713a77603cdf0653b4ad..6511dc3a00da841bc79554973636056b51c600ff 100644 (file)
@@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
        old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
        old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
 
-       local_inc(&cpu_buffer->pages_touched);
        /*
         * Just make sure we have seen our old_write and synchronize
         * with any interrupts that come in.
@@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
                 */
                local_set(&next_page->page->commit, 0);
 
-               /* Again, either we update tail_page or an interrupt does */
-               (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+               /* Either we update tail_page or an interrupt does */
+               if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+                       local_inc(&cpu_buffer->pages_touched);
        }
 }
 
index 7c364b87352eed92e0f76137091882231f187028..52f75c36bbca4922bec786815bb70ff409f62a61 100644 (file)
@@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
        return 0;
 }
 
+#ifdef CONFIG_PERF_EVENTS
 static ssize_t
 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
@@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
+#endif
 
 static ssize_t
 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = {
        .release = seq_release,
 };
 
+#ifdef CONFIG_PERF_EVENTS
 static const struct file_operations ftrace_event_id_fops = {
        .read = event_id_read,
        .llseek = default_llseek,
 };
+#endif
 
 static const struct file_operations ftrace_event_filter_fops = {
        .open = tracing_open_file_tr,
index c59d26068a6401990343e26c03002fcc4022ef98..97f8911ea339e69cc23228ef7d63b4381e883b34 100644 (file)
@@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size)
        return memblock_alloc(size, SMP_CACHE_BYTES);
 }
 
-static inline void __init xbc_free_mem(void *addr, size_t size)
+static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
 {
-       memblock_free(addr, size);
+       if (early)
+               memblock_free(addr, size);
+       else if (addr)
+               memblock_free_late(__pa(addr), size);
 }
 
 #else /* !__KERNEL__ */
@@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size)
        return malloc(size);
 }
 
-static inline void xbc_free_mem(void *addr, size_t size)
+static inline void xbc_free_mem(void *addr, size_t size, bool early)
 {
        free(addr);
 }
@@ -898,19 +901,20 @@ static int __init xbc_parse_tree(void)
 }
 
 /**
- * xbc_exit() - Clean up all parsed bootconfig
+ * _xbc_exit() - Clean up all parsed bootconfig
+ * @early: Set true if this is called before budy system is initialized.
  *
  * This clears all data structures of parsed bootconfig on memory.
  * If you need to reuse xbc_init() with new boot config, you can
  * use this.
  */
-void __init xbc_exit(void)
+void __init _xbc_exit(bool early)
 {
-       xbc_free_mem(xbc_data, xbc_data_size);
+       xbc_free_mem(xbc_data, xbc_data_size, early);
        xbc_data = NULL;
        xbc_data_size = 0;
        xbc_node_num = 0;
-       xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+       xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early);
        xbc_nodes = NULL;
        brace_index = 0;
 }
@@ -963,7 +967,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
        if (!xbc_nodes) {
                if (emsg)
                        *emsg = "Failed to allocate bootconfig nodes";
-               xbc_exit();
+               _xbc_exit(true);
                return -ENOMEM;
        }
        memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
@@ -977,7 +981,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
                        *epos = xbc_err_pos;
                if (emsg)
                        *emsg = xbc_err_msg;
-               xbc_exit();
+               _xbc_exit(true);
        } else
                ret = xbc_node_num;
 
index bf70850035c76f468c7c0af023454bf5bc6716e3..404dba36bae380eeadfd881a8b807dc7eab0037f 100644 (file)
@@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test)
 
 static void test_csum_ipv6_magic(struct kunit *test)
 {
-#if defined(CONFIG_NET)
        const struct in6_addr *saddr;
        const struct in6_addr *daddr;
        unsigned int len;
        unsigned char proto;
        __wsum csum;
 
+       if (!IS_ENABLED(CONFIG_NET))
+               return;
+
        const int daddr_offset = sizeof(struct in6_addr);
        const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
        const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
@@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test)
                CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
                         csum_ipv6_magic(saddr, daddr, len, proto, csum));
        }
-#endif /* !CONFIG_NET */
 }
 
 static struct kunit_case __refdata checksum_test_cases[] = {
index 276c12140ee26dac37137e400f4c303d34045bb1..c288df9372ede1cbda1371ae92e20a25ae9ebe91 100644 (file)
@@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = {
 };
 
 /* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
        test_ubsan_divrem_overflow,
 };
 
index af8edadc05d1b87200050bd34ff3d58ec52abd52..1611e73b1121b1b9031356ccac6b765c60327ba1 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
 
                /* first iteration or cross vma bound */
                if (!vma || start >= vma->vm_end) {
+                       /*
+                        * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+                        * lookups+error reporting differently.
+                        */
+                       if (gup_flags & FOLL_MADV_POPULATE) {
+                               vma = vma_lookup(mm, start);
+                               if (!vma) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                               if (check_vma_flags(vma, gup_flags)) {
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+                               goto retry;
+                       }
                        vma = gup_vma_lookup(mm, start);
                        if (!vma && in_gate_area(mm, start)) {
                                ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 }
 
 /*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- *                           given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ *                       given range readable/writable
  *
  * This takes care of mlocking the pages, too, if VM_LOCKED is set.
  *
- * @vma: target vma
+ * @mm: the mm to populate page tables in
  * @start: start address
  * @end: end address
  * @write: whether to prefault readable or writable
  * @locked: whether the mmap_lock is still held
  *
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
  *
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
  */
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
-                           unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+                       unsigned long end, bool write, int *locked)
 {
-       struct mm_struct *mm = vma->vm_mm;
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int gup_flags;
        long ret;
 
        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
-       VM_BUG_ON_VMA(start < vma->vm_start, vma);
-       VM_BUG_ON_VMA(end > vma->vm_end, vma);
        mmap_assert_locked(mm);
 
        /*
@@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
         *                a poisoned page.
         * !FOLL_FORCE: Require proper access permissions.
         */
-       gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+       gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+                   FOLL_MADV_POPULATE;
        if (write)
                gup_flags |= FOLL_WRITE;
 
-       /*
-        * We want to report -EINVAL instead of -EFAULT for any permission
-        * problems or incompatible mappings.
-        */
-       if (check_vma_flags(vma, gup_flags))
-               return -EINVAL;
-
-       ret = __get_user_pages(mm, start, nr_pages, gup_flags,
-                              NULL, locked);
+       ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+                                     gup_flags);
        lru_add_drain();
        return ret;
 }
index 9859aa4f755380a88013c70791e6e6df90ce861f..89f58c7603b255feb3dceaccfee603a503c40e49 100644 (file)
@@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                        goto unlock_ptls;
                }
 
-               folio_move_anon_rmap(src_folio, dst_vma);
-               WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
                src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
                /* Folio got pinned from under us. Put it back and fail the move. */
                if (folio_maybe_dma_pinned(src_folio)) {
@@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                        goto unlock_ptls;
                }
 
+               folio_move_anon_rmap(src_folio, dst_vma);
+               WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
                _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
                /* Follow mremap() behavior and treat the entry dirty after the move */
                _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
index 23ef240ba48a60a77102f7bf1beb2e76a987486d..31d00eee028f1179b99405f0fdd3461b16e17d2e 100644 (file)
@@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
                        if (!pte_same(pte, newpte))
                                set_huge_pte_at(mm, address, ptep, newpte, psize);
                } else if (unlikely(is_pte_marker(pte))) {
-                       /* No other markers apply for now. */
-                       WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
-                       if (uffd_wp_resolve)
+                       /*
+                        * Do nothing on a poison marker; page is
+                        * corrupted, permissons do not apply.  Here
+                        * pte_marker_uffd_wp()==true implies !poison
+                        * because they're mutual exclusive.
+                        */
+                       if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
                                /* Safe to modify directly (non-present->none). */
                                huge_pte_clear(mm, address, ptep, psize);
                } else if (!huge_pte_none(pte)) {
index 7e486f2c502cee245991e2468a0655228a81aef5..07ad2675a88b4798b140d3af3a303a20519cddc3 100644 (file)
@@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
 void unmap_mapping_folio(struct folio *folio);
 extern long populate_vma_page_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
-                                  unsigned long start, unsigned long end,
-                                  bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+               unsigned long end, bool write, int *locked);
 extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
                               unsigned long bytes);
 
@@ -1127,10 +1126,13 @@ enum {
        FOLL_FAST_ONLY = 1 << 20,
        /* allow unlocking the mmap lock */
        FOLL_UNLOCKABLE = 1 << 21,
+       /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+       FOLL_MADV_POPULATE = 1 << 22,
 };
 
 #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
-                           FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+                           FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+                           FOLL_MADV_POPULATE)
 
 /*
  * Indicates for which pages that are write-protected in the page table,
index 44a498c94158c882c624eac2e29a5f07d854e322..1a073fcc4c0c021496667619a20f6ee1afaef7c0 100644 (file)
@@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
 {
        const bool write = behavior == MADV_POPULATE_WRITE;
        struct mm_struct *mm = vma->vm_mm;
-       unsigned long tmp_end;
        int locked = 1;
        long pages;
 
        *prev = vma;
 
        while (start < end) {
-               /*
-                * We might have temporarily dropped the lock. For example,
-                * our VMA might have been split.
-                */
-               if (!vma || start >= vma->vm_end) {
-                       vma = vma_lookup(mm, start);
-                       if (!vma)
-                               return -ENOMEM;
-               }
-
-               tmp_end = min_t(unsigned long, end, vma->vm_end);
                /* Populate (prefault) page tables readable/writable. */
-               pages = faultin_vma_page_range(vma, start, tmp_end, write,
-                                              &locked);
+               pages = faultin_page_range(mm, start, end, write, &locked);
                if (!locked) {
                        mmap_read_lock(mm);
                        locked = 1;
@@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
                                pr_warn_once("%s: unhandled return value: %ld\n",
                                             __func__, pages);
                                fallthrough;
-                       case -ENOMEM:
+                       case -ENOMEM: /* No VMA or out of memory. */
                                return -ENOMEM;
                        }
                }
index 9349948f1abfd120977706bbda23456999f057bc..9e62a00b46ddee5899f85cfc252dabd7c0d04121 100644 (file)
@@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page)
 {
        int ret;
 
-       zone_pcp_disable(page_zone(page));
+       /*
+        * zone_pcp_disable() can't be used here. It will
+        * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
+        * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
+        * optimization is enabled. This will break current lock dependency
+        * chain and leads to deadlock.
+        * Disabling pcp before dissolving the page was a deterministic
+        * approach because we made sure that those pages cannot end up in any
+        * PCP list. Draining PCP lists expels those pages to the buddy system,
+        * but nothing guarantees that those pages do not get back to a PCP
+        * queue if we need to refill those.
+        */
        ret = dissolve_free_huge_page(page);
-       if (!ret)
+       if (!ret) {
+               drain_all_pages(page_zone(page));
                ret = take_page_off_buddy(page);
-       zone_pcp_enable(page_zone(page));
+       }
 
        return ret;
 }
index d17d1351ec84af6ae4f49dc34d4c27c56bf77468..742f432e5bf06f560abdc675d658401f76df5237 100644 (file)
@@ -118,7 +118,6 @@ static __init void init_page_owner(void)
        register_dummy_stack();
        register_failure_stack();
        register_early_stack();
-       static_branch_enable(&page_owner_inited);
        init_early_allocated_pages();
        /* Initialize dummy and failure stacks and link them to stack_list */
        dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
@@ -129,6 +128,7 @@ static __init void init_page_owner(void)
                refcount_set(&failure_stack.stack_record->count, 1);
        dummy_stack.next = &failure_stack;
        stack_list = &dummy_stack;
+       static_branch_enable(&page_owner_inited);
 }
 
 struct page_ext_operations page_owner_ops = {
@@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
        spin_unlock_irqrestore(&stack_list_lock, flags);
 }
 
-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+                                  int nr_base_pages)
 {
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 
@@ -217,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
                        /* Add the new stack_record to our list */
                        add_stack_record_to_list(stack_record, gfp_mask);
        }
-       refcount_inc(&stack_record->count);
+       refcount_add(nr_base_pages, &stack_record->count);
 }
 
-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+                                  int nr_base_pages)
 {
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 
-       if (stack_record)
-               refcount_dec(&stack_record->count);
+       if (!stack_record)
+               return;
+
+       if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+               pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
+                       handle);
 }
 
-void __reset_page_owner(struct page *page, unsigned short order)
+static inline void __update_page_owner_handle(struct page_ext *page_ext,
+                                             depot_stack_handle_t handle,
+                                             unsigned short order,
+                                             gfp_t gfp_mask,
+                                             short last_migrate_reason, u64 ts_nsec,
+                                             pid_t pid, pid_t tgid, char *comm)
 {
        int i;
+       struct page_owner *page_owner;
+
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               page_owner->handle = handle;
+               page_owner->order = order;
+               page_owner->gfp_mask = gfp_mask;
+               page_owner->last_migrate_reason = last_migrate_reason;
+               page_owner->pid = pid;
+               page_owner->tgid = tgid;
+               page_owner->ts_nsec = ts_nsec;
+               strscpy(page_owner->comm, comm,
+                       sizeof(page_owner->comm));
+               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+               __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+               page_ext = page_ext_next(page_ext);
+       }
+}
+
+static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+                                                  depot_stack_handle_t handle,
+                                                  unsigned short order,
+                                                  pid_t pid, pid_t tgid,
+                                                  u64 free_ts_nsec)
+{
+       int i;
+       struct page_owner *page_owner;
+
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               /* Only __reset_page_owner() wants to clear the bit */
+               if (handle) {
+                       __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+                       page_owner->free_handle = handle;
+               }
+               page_owner->free_ts_nsec = free_ts_nsec;
+               page_owner->free_pid = current->pid;
+               page_owner->free_tgid = current->tgid;
+               page_ext = page_ext_next(page_ext);
+       }
+}
+
+void __reset_page_owner(struct page *page, unsigned short order)
+{
        struct page_ext *page_ext;
        depot_stack_handle_t handle;
        depot_stack_handle_t alloc_handle;
@@ -245,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
        alloc_handle = page_owner->handle;
 
        handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
-       for (i = 0; i < (1 << order); i++) {
-               __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-               page_owner->free_handle = handle;
-               page_owner->free_ts_nsec = free_ts_nsec;
-               page_owner->free_pid = current->pid;
-               page_owner->free_tgid = current->tgid;
-               page_ext = page_ext_next(page_ext);
-               page_owner = get_page_owner(page_ext);
-       }
+       __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+                                       current->tgid, free_ts_nsec);
        page_ext_put(page_ext);
+
        if (alloc_handle != early_handle)
                /*
                 * early_handle is being set as a handle for all those
@@ -263,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
                 * the machinery is not ready yet, we cannot decrement
                 * their refcount either.
                 */
-               dec_stack_record_count(alloc_handle);
-}
-
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
-                                       depot_stack_handle_t handle,
-                                       unsigned short order, gfp_t gfp_mask)
-{
-       struct page_owner *page_owner;
-       int i;
-       u64 ts_nsec = local_clock();
-
-       for (i = 0; i < (1 << order); i++) {
-               page_owner = get_page_owner(page_ext);
-               page_owner->handle = handle;
-               page_owner->order = order;
-               page_owner->gfp_mask = gfp_mask;
-               page_owner->last_migrate_reason = -1;
-               page_owner->pid = current->pid;
-               page_owner->tgid = current->tgid;
-               page_owner->ts_nsec = ts_nsec;
-               strscpy(page_owner->comm, current->comm,
-                       sizeof(page_owner->comm));
-               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
-               __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-
-               page_ext = page_ext_next(page_ext);
-       }
+               dec_stack_record_count(alloc_handle, 1 << order);
 }
 
 noinline void __set_page_owner(struct page *page, unsigned short order,
                                        gfp_t gfp_mask)
 {
        struct page_ext *page_ext;
+       u64 ts_nsec = local_clock();
        depot_stack_handle_t handle;
 
        handle = save_stack(gfp_mask);
@@ -303,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
        page_ext = page_ext_get(page);
        if (unlikely(!page_ext))
                return;
-       __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+       __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+                                  current->pid, current->tgid, ts_nsec,
+                                  current->comm);
        page_ext_put(page_ext);
-       inc_stack_record_count(handle, gfp_mask);
+       inc_stack_record_count(handle, gfp_mask, 1 << order);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -340,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
 
 void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 {
+       int i;
        struct page_ext *old_ext;
        struct page_ext *new_ext;
-       struct page_owner *old_page_owner, *new_page_owner;
+       struct page_owner *old_page_owner;
+       struct page_owner *new_page_owner;
+       depot_stack_handle_t migrate_handle;
 
        old_ext = page_ext_get(&old->page);
        if (unlikely(!old_ext))
@@ -356,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 
        old_page_owner = get_page_owner(old_ext);
        new_page_owner = get_page_owner(new_ext);
-       new_page_owner->order = old_page_owner->order;
-       new_page_owner->gfp_mask = old_page_owner->gfp_mask;
-       new_page_owner->last_migrate_reason =
-               old_page_owner->last_migrate_reason;
-       new_page_owner->handle = old_page_owner->handle;
-       new_page_owner->pid = old_page_owner->pid;
-       new_page_owner->tgid = old_page_owner->tgid;
-       new_page_owner->free_pid = old_page_owner->free_pid;
-       new_page_owner->free_tgid = old_page_owner->free_tgid;
-       new_page_owner->ts_nsec = old_page_owner->ts_nsec;
-       new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
-       strcpy(new_page_owner->comm, old_page_owner->comm);
-
+       migrate_handle = new_page_owner->handle;
+       __update_page_owner_handle(new_ext, old_page_owner->handle,
+                                  old_page_owner->order, old_page_owner->gfp_mask,
+                                  old_page_owner->last_migrate_reason,
+                                  old_page_owner->ts_nsec, old_page_owner->pid,
+                                  old_page_owner->tgid, old_page_owner->comm);
+       /*
+        * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
+        * will be freed after migration. Keep them until then as they may be
+        * useful.
+        */
+       __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+                                       old_page_owner->free_pid,
+                                       old_page_owner->free_tgid,
+                                       old_page_owner->free_ts_nsec);
        /*
-        * We don't clear the bit on the old folio as it's going to be freed
-        * after migration. Until then, the info can be useful in case of
-        * a bug, and the overall stats will be off a bit only temporarily.
-        * Also, migrate_misplaced_transhuge_page() can still fail the
-        * migration and then we want the old folio to retain the info. But
-        * in that case we also don't need to explicitly clear the info from
-        * the new page, which will be freed.
+        * We linked the original stack to the new folio, we need to do the same
+        * for the new one and the old folio otherwise there will be an imbalance
+        * when subtracting those pages from the stack.
         */
-       __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
-       __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+       for (i = 0; i < (1 << new_page_owner->order); i++) {
+               old_page_owner->handle = migrate_handle;
+               old_ext = page_ext_next(old_ext);
+               old_page_owner = get_page_owner(old_ext);
+       }
+
        page_ext_put(new_ext);
        page_ext_put(old_ext);
 }
@@ -787,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
                                goto ext_put_continue;
 
                        /* Found early allocated page */
-                       __set_page_owner_handle(page_ext, early_handle,
-                                               0, 0);
+                       __update_page_owner_handle(page_ext, early_handle, 0, 0,
+                                                  -1, local_clock(), current->pid,
+                                                  current->tgid, current->comm);
                        count++;
 ext_put_continue:
                        page_ext_put(page_ext);
@@ -840,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
                 * value of stack_list.
                 */
                stack = smp_load_acquire(&stack_list);
+               m->private = stack;
        } else {
                stack = m->private;
-               stack = stack->next;
        }
 
-       m->private = stack;
-
        return stack;
 }
 
@@ -861,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
        return stack;
 }
 
-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;
 
 static int stack_print(struct seq_file *m, void *v)
 {
-       int i, stack_count;
+       int i, nr_base_pages;
        struct stack *stack = v;
        unsigned long *entries;
        unsigned long nr_entries;
@@ -876,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
 
        nr_entries = stack_record->size;
        entries = stack_record->entries;
-       stack_count = refcount_read(&stack_record->count) - 1;
+       nr_base_pages = refcount_read(&stack_record->count) - 1;
 
-       if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+       if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
                return 0;
 
        for (i = 0; i < nr_entries; i++)
                seq_printf(m, " %pS\n", (void *)entries[i]);
-       seq_printf(m, "stack_count: %d\n\n", stack_count);
+       seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
 
        return 0;
 }
@@ -913,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
 
 static int page_owner_threshold_get(void *data, u64 *val)
 {
-       *val = READ_ONCE(page_owner_stack_threshold);
+       *val = READ_ONCE(page_owner_pages_threshold);
        return 0;
 }
 
 static int page_owner_threshold_set(void *data, u64 val)
 {
-       WRITE_ONCE(page_owner_stack_threshold, val);
+       WRITE_ONCE(page_owner_pages_threshold, val);
        return 0;
 }
 
index 0aad0d9a621b80e7a3f758125806bfb64e984c12..94ab99b6b574a461e34bb875fdec497ad24728ce 100644 (file)
@@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
 
 #define shmem_huge SHMEM_HUGE_DENY
 
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
-                  struct mm_struct *mm, unsigned long vm_flags)
-{
-       return false;
-}
-
 static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                struct shrink_control *sc, unsigned long nr_to_split)
 {
index b95c36765d045c0486068362fbd949ab2b1866e8..2243cec18ecc866eb7877ae933828f1eeadc980a 100644 (file)
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
 
        spin_lock_bh(&bat_priv->tt.commit_lock);
 
-       while (true) {
+       while (timeout) {
                table_size = batadv_tt_local_table_transmit_size(bat_priv);
                if (packet_size_max >= table_size)
                        break;
index 00e02138003ecefef75714c950056ced5ccd5fda..efea25eb56ce036364c7325916326b687180bbcf 100644 (file)
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
        if (hdev->req_status == HCI_REQ_PEND) {
                hdev->req_result = result;
                hdev->req_status = HCI_REQ_DONE;
-               if (skb)
+               if (skb) {
+                       kfree_skb(hdev->req_skb);
                        hdev->req_skb = skb_get(skb);
+               }
                wake_up_interruptible(&hdev->req_wait_q);
        }
 }
index 4ee1b976678b2525ff135fb947221b93923f2aee..703b84bd48d5befc51d787bcd6c04dcbcff61675 100644 (file)
@@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
 
        switch (optname) {
        case HCI_DATA_DIR:
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
@@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
                break;
 
        case HCI_TIME_STAMP:
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
@@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
                        uf.event_mask[1] = *((u32 *) f->event_mask + 1);
                }
 
-               len = min_t(unsigned int, len, sizeof(uf));
-               if (copy_from_sockptr(&uf, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+               if (err)
                        break;
-               }
 
                if (!capable(CAP_NET_RAW)) {
                        uf.type_mask &= hci_sec_filter.type_mask;
@@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
                        goto done;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                hci_pi(sk)->mtu = opt;
                break;
index 8fe02921adf15d4b968be310415bf9383ae3d63d..c5d8799046ccffbf798e6f47ffaef3dddcb364ca 100644 (file)
@@ -2814,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
                                if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
                                        cp->scanning_phys |= LE_SCAN_PHY_CODED;
                                        hci_le_scan_phy_params(phy, type,
-                                                              interval,
-                                                              window);
+                                                              interval * 3,
+                                                              window * 3);
                                        num_phy++;
                                        phy++;
                                }
@@ -2835,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
 
        if (scan_coded(hdev)) {
                cp->scanning_phys |= LE_SCAN_PHY_CODED;
-               hci_le_scan_phy_params(phy, type, interval, window);
+               hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
                num_phy++;
                phy++;
        }
index c8793e57f4b547d5bd465b80575143083b867624..ef0cc80b4c0cc1ff4043d05c05fc0c429a64a6c2 100644 (file)
@@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
 
 static bool check_bcast_qos(struct bt_iso_qos *qos)
 {
-       if (qos->bcast.sync_factor == 0x00)
-               return false;
+       if (!qos->bcast.sync_factor)
+               qos->bcast.sync_factor = 0x01;
 
        if (qos->bcast.packing > 0x01)
                return false;
@@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
        if (qos->bcast.skip > 0x01f3)
                return false;
 
+       if (!qos->bcast.sync_timeout)
+               qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
        if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
                return false;
 
@@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
        if (qos->bcast.mse > 0x1f)
                return false;
 
+       if (!qos->bcast.timeout)
+               qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
        if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
                return false;
 
@@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
-       int len, err = 0;
+       int err = 0;
        struct bt_iso_qos qos = default_qos;
        u32 opt;
 
@@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_PKT_STATUS:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               len = min_t(unsigned int, sizeof(qos), optlen);
-
-               if (copy_from_sockptr(&qos, optval, len)) {
-                       err = -EFAULT;
-                       break;
-               }
-
-               if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
-                       err = -EINVAL;
+               err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+               if (err)
                        break;
-               }
 
                iso_pi(sk)->qos = qos;
                iso_pi(sk)->qos_user_set = true;
@@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                }
 
                if (optlen > sizeof(iso_pi(sk)->base)) {
-                       err = -EOVERFLOW;
+                       err = -EINVAL;
                        break;
                }
 
-               len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
-               if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+                                          optlen);
+               if (err)
                        break;
-               }
 
-               iso_pi(sk)->base_len = len;
+               iso_pi(sk)->base_len = optlen;
 
                break;
 
index 467b242d8be071da16bd48d04e1520ce1e1aa8a6..dc089740879363dd0d6d973dcdb4fc05cfc7070a 100644 (file)
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
                return -EPROTO;
 
        hci_dev_lock(hdev);
-       if (hci_dev_test_flag(hdev, HCI_MGMT) &&
-           !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+       if (hci_dev_test_flag(hdev, HCI_MGMT))
                mgmt_device_connected(hdev, hcon, NULL, 0);
        hci_dev_unlock(hdev);
 
index 4287aa6cc988e3ce34849d1f317be8fd8645832c..e7d810b23082f5ffd8ea4b506366b2684f2e1ece 100644 (file)
@@ -727,7 +727,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
        struct sock *sk = sock->sk;
        struct l2cap_chan *chan = l2cap_pi(sk)->chan;
        struct l2cap_options opts;
-       int len, err = 0;
+       int err = 0;
        u32 opt;
 
        BT_DBG("sk %p", sk);
@@ -754,11 +754,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
                opts.max_tx   = chan->max_tx;
                opts.txwin_size = chan->tx_win;
 
-               len = min_t(unsigned int, sizeof(opts), optlen);
-               if (copy_from_sockptr(&opts, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
                        err = -EINVAL;
@@ -801,10 +799,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
                break;
 
        case L2CAP_LM:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt & L2CAP_LM_FIPS) {
                        err = -EINVAL;
@@ -885,7 +882,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
        struct bt_security sec;
        struct bt_power pwr;
        struct l2cap_conn *conn;
-       int len, err = 0;
+       int err = 0;
        u32 opt;
        u16 mtu;
        u8 mode;
@@ -911,11 +908,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 
                sec.level = BT_SECURITY_LOW;
 
-               len = min_t(unsigned int, sizeof(sec), optlen);
-               if (copy_from_sockptr(&sec, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (sec.level < BT_SECURITY_LOW ||
                    sec.level > BT_SECURITY_FIPS) {
@@ -960,10 +955,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt) {
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -975,10 +969,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_FLUSHABLE:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > BT_FLUSHABLE_ON) {
                        err = -EINVAL;
@@ -1010,11 +1003,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 
                pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
 
-               len = min_t(unsigned int, sizeof(pwr), optlen);
-               if (copy_from_sockptr(&pwr, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (pwr.force_active)
                        set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
@@ -1023,10 +1014,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_CHANNEL_POLICY:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                err = -EOPNOTSUPP;
                break;
@@ -1055,10 +1045,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
                    sk->sk_state == BT_CONNECTED)
@@ -1086,10 +1075,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+               if (err)
                        break;
-               }
 
                BT_DBG("mode %u", mode);
 
index b54e8a530f55a1ff9547a2a5546db34059ebd672..29aa07e9db9d7122bac6ac0c6dfcd76765f11cb8 100644 (file)
@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
 
        switch (optname) {
        case RFCOMM_LM:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+               if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
                        err = -EFAULT;
                        break;
                }
@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
        struct sock *sk = sock->sk;
        struct bt_security sec;
        int err = 0;
-       size_t len;
        u32 opt;
 
        BT_DBG("sk %p", sk);
@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
 
                sec.level = BT_SECURITY_LOW;
 
-               len = min_t(unsigned int, sizeof(sec), optlen);
-               if (copy_from_sockptr(&sec, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (sec.level > BT_SECURITY_HIGH) {
                        err = -EINVAL;
@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
index 43daf965a01e4ac5c9329150080b00dcd63c7e1c..368e026f4d15ca4711737af941ad30c7b48b827f 100644 (file)
@@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
-       int len, err = 0;
+       int err = 0;
        struct bt_voice voice;
        u32 opt;
        struct bt_codecs *codecs;
@@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
 
                voice.setting = sco_pi(sk)->setting;
 
-               len = min_t(unsigned int, sizeof(voice), optlen);
-               if (copy_from_sockptr(&voice, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+                                          optlen);
+               if (err)
                        break;
-               }
 
                /* Explicitly check for these values */
                if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_PKT_STATUS:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(buffer, optval, optlen)) {
+               err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+               if (err) {
                        hci_dev_put(hdev);
-                       err = -EFAULT;
                        break;
                }
 
index f21097e734827891f87adb9d0a1f7cebf9f15380..ceaa5a89b947fc574ee2a05003db3de7cc9797b1 100644 (file)
@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
        return netif_receive_skb(skb);
 }
 
-static int br_pass_frame_up(struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
 {
        struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
        struct net_bridge *br = netdev_priv(brdev);
@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
        br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
                           BR_MCAST_DIR_TX);
 
+       BR_INPUT_SKB_CB(skb)->promisc = promisc;
+
        return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
                       dev_net(indev), NULL, skb, indev, NULL,
                       br_netif_receive_skb);
@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        struct net_bridge_mcast *brmctx;
        struct net_bridge_vlan *vlan;
        struct net_bridge *br;
+       bool promisc;
        u16 vid = 0;
        u8 state;
 
@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        if (p->flags & BR_LEARNING)
                br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
 
-       local_rcv = !!(br->dev->flags & IFF_PROMISC);
+       promisc = !!(br->dev->flags & IFF_PROMISC);
+       local_rcv = promisc;
+
        if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
                /* by definition the broadcast is also a multicast address */
                if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
                unsigned long now = jiffies;
 
                if (test_bit(BR_FDB_LOCAL, &dst->flags))
-                       return br_pass_frame_up(skb);
+                       return br_pass_frame_up(skb, false);
 
                if (now != dst->used)
                        dst->used = now;
@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        }
 
        if (local_rcv)
-               return br_pass_frame_up(skb);
+               return br_pass_frame_up(skb, promisc);
 
 out:
        return 0;
@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
                                goto forward;
                }
 
+               BR_INPUT_SKB_CB(skb)->promisc = false;
+
                /* The else clause should be hit when nf_hook():
                 *   - returns < 0 (drop/error)
                 *   - returns = 0 (stolen/nf_queue)
index 35e10c5a766d550e0c5cb85cf5a0c4835b52a89d..22e35623c148ac41056d7c24e3996227726ec1a6 100644 (file)
@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
                                   struct sk_buff *skb,
                                   const struct nf_hook_state *state)
 {
+       bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
        struct nf_conntrack *nfct = skb_nfct(skb);
        const struct nf_ct_hook *ct_hook;
        struct nf_conn *ct;
        int ret;
 
+       if (promisc) {
+               nf_reset_ct(skb);
+               return NF_ACCEPT;
+       }
+
        if (!nfct || skb->pkt_type == PACKET_HOST)
                return NF_ACCEPT;
 
index 86ea5e6689b5ce49a4b71b383893d2ef5b53d110..d4bedc87b1d8f1bcf96c714fc80078227470550a 100644 (file)
@@ -589,6 +589,7 @@ struct br_input_skb_cb {
 #endif
        u8 proxyarp_replied:1;
        u8 src_port_isolated:1;
+       u8 promisc:1;
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        u8 vlan_filtered:1;
 #endif
index 6f877e31709bad3646ea15bf3a96999ed275bdc1..c3c51b9a68265b443326432274e7fd75675e0e28 100644 (file)
@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
                                    const struct nf_hook_state *state)
 {
-       enum ip_conntrack_info ctinfo;
+       bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+       struct nf_conntrack *nfct = skb_nfct(skb);
        struct nf_conn *ct;
 
-       if (skb->pkt_type == PACKET_HOST)
+       if (promisc) {
+               nf_reset_ct(skb);
+               return NF_ACCEPT;
+       }
+
+       if (!nfct || skb->pkt_type == PACKET_HOST)
                return NF_ACCEPT;
 
        /* nf_conntrack_confirm() cannot handle concurrent clones,
         * this happens for broad/multicast frames with e.g. macvlan on top
         * of the bridge device.
         */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
                return NF_ACCEPT;
 
        /* let inet prerouting call conntrack again */
index 984ff8b9d0e1aa5646a7237a8cf0b0a21c2aa559..331848eca7d3109d8043bba1f99e84d8e46d5507 100644 (file)
@@ -3775,6 +3775,10 @@ no_lock_out:
                return rc;
        }
 
+       if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
+               kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
+               return NET_XMIT_DROP;
+       }
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
@@ -3814,7 +3818,9 @@ no_lock_out:
                qdisc_run_end(q);
                rc = NET_XMIT_SUCCESS;
        } else {
+               WRITE_ONCE(q->owner, smp_processor_id());
                rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+               WRITE_ONCE(q->owner, -1);
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
index 48741352a88a72e0232977cc9f2cf172f45df89b..c484b1c0fc00a79a45a1c3e7fde230ce59cb67a3 100644 (file)
@@ -1050,6 +1050,11 @@ next:
                        e++;
                }
        }
+
+       /* Don't let NLM_DONE coalesce into a message, even if it could.
+        * Some user space expects NLM_DONE in a separate recv().
+        */
+       err = skb->len;
 out:
 
        cb->args[1] = e;
index b150c9929b12e86219a55c77da480e0c538b3449..14365b20f1c5c09964dd7024060116737f22cb63 100644 (file)
@@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index 487670759578168c5ff53bce6642898fc41936b3..fe89a056eb06c43743b2d7449e59f4e9360ba223 100644 (file)
@@ -1118,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1504,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index c8f76f56dc1653371ca39663f29cc798b062e60d..d36ace160d426f6224f8e692f3b438ae863bb9b9 100644 (file)
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
                peer->rate_last = jiffies;
                ++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
-               if (log_martians &&
+               if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
                    peer->n_redirects == ip_rt_redirect_number)
                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
                                             &ip_hdr(skb)->saddr, inet_iif(skb),
                                             &ip_hdr(skb)->daddr, &gw);
-#endif
        }
 out_put_peer:
        inet_putpeer(peer);
index 92db9b474f2bdb0a2efc91ab2be6c83c8a46372d..779aa6ecdd499b6acd3aa8e14d73735f28b94649 100644 (file)
@@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (!dev || ifp->idev->dev == dev ||
                            !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
-                               result = ifp;
-                               in6_ifa_hold(ifp);
-                               break;
+                               if (in6_ifa_hold_safe(ifp)) {
+                                       result = ifp;
+                                       break;
+                               }
                        }
                }
        }
index 7209419cfb0e9c295a3feb5ecd3f9e1720ca16dc..c1f62352a481454a505dcbfafc637f187abcf4e0 100644 (file)
@@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
             struct nl_info *info, struct netlink_ext_ack *extack)
 {
        struct fib6_table *table = rt->fib6_table;
-       struct fib6_node *fn, *pn = NULL;
+       struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+       struct fib6_node *pn = NULL;
+#endif
        int err = -ENOMEM;
        int allow_create = 1;
        int replace_required = 0;
@@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
                goto out;
        }
 
+#ifdef CONFIG_IPV6_SUBTREES
        pn = fn;
 
-#ifdef CONFIG_IPV6_SUBTREES
        if (rt->fib6_src.plen) {
                struct fib6_node *sn;
 
index 636b360311c5365fba2330f6ca2f7f1b6dd1363e..131f7bb2110d3a08244c6da40ff9be45a2be711b 100644 (file)
@@ -1135,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1513,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index 9505f9d188ff257a8ca35f30ee111c2f19805a5a..6eef15648b7b0853fb249288bf4545dca3a2cf85 100644 (file)
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
                proto = veth->h_vlan_encapsulated_proto;
                break;
        case htons(ETH_P_PPP_SES):
-               proto = nf_flow_pppoe_proto(skb);
+               if (!nf_flow_pppoe_proto(skb, &proto))
+                       return NF_ACCEPT;
                break;
        default:
                proto = skb->protocol;
index e45fade764096182443814e8dcd70700e7956742..5383bed3d3e002661f01468e1a8bef8425e229b4 100644 (file)
@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
                tuple->encap[i].proto = skb->protocol;
                break;
        case htons(ETH_P_PPP_SES):
-               phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+               phdr = (struct pppoe_hdr *)skb_network_header(skb);
                tuple->encap[i].id = ntohs(phdr->sid);
                tuple->encap[i].proto = skb->protocol;
                break;
@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
        return NF_STOLEN;
 }
 
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
                                       u32 *offset)
 {
        struct vlan_ethhdr *veth;
+       __be16 inner_proto;
 
        switch (skb->protocol) {
        case htons(ETH_P_8021Q):
@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
                }
                break;
        case htons(ETH_P_PPP_SES):
-               if (nf_flow_pppoe_proto(skb) == proto) {
+               if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+                   inner_proto == proto) {
                        *offset += PPPOE_SES_HLEN;
                        return true;
                }
@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
                        skb_reset_network_header(skb);
                        break;
                case htons(ETH_P_PPP_SES):
-                       skb->protocol = nf_flow_pppoe_proto(skb);
+                       skb->protocol = __nf_flow_pppoe_proto(skb);
                        skb_pull(skb, PPPOE_SES_HLEN);
                        skb_reset_network_header(skb);
                        break;
index d89d779467197a0846406e0b0ce6938e8a3d404d..167074283ea91dff50a7aa0299a5794bcddeb32a 100644 (file)
@@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
                                  const struct nft_set_iter *iter,
                                  struct nft_elem_priv *elem_priv)
 {
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
+       nft_set_elem_change_active(ctx->net, set, ext);
        nft_setelem_data_deactivate(ctx->net, set, elem_priv);
 
        return 0;
@@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
                if (!nft_set_elem_active(ext, genmask))
                        continue;
 
+               nft_set_elem_change_active(ctx->net, set, ext);
                nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
                break;
        }
@@ -626,6 +633,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
 {
        struct nft_set_iter iter = {
                .genmask        = nft_genmask_next(ctx->net),
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_mapelem_deactivate,
        };
 
@@ -3060,7 +3068,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
 {
        const struct nft_expr_type *type, *candidate = NULL;
 
-       list_for_each_entry(type, &nf_tables_expressions, list) {
+       list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
                if (!nla_strcmp(nla, type->name)) {
                        if (!type->family && !candidate)
                                candidate = type;
@@ -3092,9 +3100,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
        if (nla == NULL)
                return ERR_PTR(-EINVAL);
 
+       rcu_read_lock();
        type = __nft_expr_type_get(family, nla);
-       if (type != NULL && try_module_get(type->owner))
+       if (type != NULL && try_module_get(type->owner)) {
+               rcu_read_unlock();
                return type;
+       }
+       rcu_read_unlock();
 
        lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
@@ -3875,6 +3887,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
        const struct nft_data *data;
        int err;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
            *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
                return 0;
@@ -3898,17 +3913,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
 
 int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
 {
-       u8 genmask = nft_genmask_next(ctx->net);
+       struct nft_set_iter dummy_iter = {
+               .genmask        = nft_genmask_next(ctx->net),
+       };
        struct nft_set_elem_catchall *catchall;
+
        struct nft_set_ext *ext;
        int ret = 0;
 
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_set_elem_active(ext, genmask))
+               if (!nft_set_elem_active(ext, dummy_iter.genmask))
                        continue;
 
-               ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
+               ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem);
                if (ret < 0)
                        return ret;
        }
@@ -5397,6 +5415,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
                                        const struct nft_set_iter *iter,
                                        struct nft_elem_priv *elem_priv)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        return nft_setelem_data_validate(ctx, set, elem_priv);
 }
 
@@ -5441,6 +5464,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                }
 
                iter.genmask    = nft_genmask_next(ctx->net);
+               iter.type       = NFT_ITER_UPDATE;
                iter.skip       = 0;
                iter.count      = 0;
                iter.err        = 0;
@@ -5488,6 +5512,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx,
                                const struct nft_set_iter *iter,
                                struct nft_elem_priv *elem_priv)
 {
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       /* called from abort path, reverse check to undo changes. */
+       if (nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
+       nft_clear(ctx->net, ext);
        nft_setelem_data_activate(ctx->net, set, elem_priv);
 
        return 0;
@@ -5505,6 +5536,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
                if (!nft_set_elem_active(ext, genmask))
                        continue;
 
+               nft_clear(ctx->net, ext);
                nft_setelem_data_activate(ctx->net, set, catchall->elem);
                break;
        }
@@ -5514,6 +5546,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
 {
        struct nft_set_iter iter = {
                .genmask        = nft_genmask_next(ctx->net),
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_mapelem_activate,
        };
 
@@ -5778,6 +5811,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
        struct nft_set_dump_args *args;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
                return 0;
 
@@ -5888,6 +5924,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
        args.skb                = skb;
        args.reset              = dump_ctx->reset;
        args.iter.genmask       = nft_genmask_cur(net);
+       args.iter.type          = NFT_ITER_READ;
        args.iter.skip          = cb->args[0];
        args.iter.count         = 0;
        args.iter.err           = 0;
@@ -6627,7 +6664,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
        struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
 
        if (nft_setelem_is_catchall(set, elem_priv)) {
-               nft_set_elem_change_active(net, set, ext);
+               nft_clear(net, ext);
        } else {
                set->ops->activate(net, set, elem_priv);
        }
@@ -7186,6 +7223,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
        }
 }
 
+static int nft_setelem_active_next(const struct net *net,
+                                  const struct nft_set *set,
+                                  struct nft_elem_priv *elem_priv)
+{
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+       u8 genmask = nft_genmask_next(net);
+
+       return nft_set_elem_active(ext, genmask);
+}
+
 static void nft_setelem_data_activate(const struct net *net,
                                      const struct nft_set *set,
                                      struct nft_elem_priv *elem_priv)
@@ -7309,8 +7356,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
                             const struct nft_set_iter *iter,
                             struct nft_elem_priv *elem_priv)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
        struct nft_trans *trans;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
                                    sizeof(struct nft_trans_elem), GFP_ATOMIC);
        if (!trans)
@@ -7372,6 +7423,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
 {
        struct nft_set_iter iter = {
                .genmask        = genmask,
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_setelem_flush,
        };
 
@@ -7607,7 +7659,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
 {
        const struct nft_object_type *type;
 
-       list_for_each_entry(type, &nf_tables_objects, list) {
+       list_for_each_entry_rcu(type, &nf_tables_objects, list) {
                if (type->family != NFPROTO_UNSPEC &&
                    type->family != family)
                        continue;
@@ -7623,9 +7675,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
 {
        const struct nft_object_type *type;
 
+       rcu_read_lock();
        type = __nft_obj_type_get(objtype, family);
-       if (type != NULL && try_module_get(type->owner))
+       if (type != NULL && try_module_get(type->owner)) {
+               rcu_read_unlock();
                return type;
+       }
+       rcu_read_unlock();
 
        lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
@@ -10598,8 +10654,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                case NFT_MSG_DESTROYSETELEM:
                        te = (struct nft_trans_elem *)trans->data;
 
-                       nft_setelem_data_activate(net, te->set, te->elem_priv);
-                       nft_setelem_activate(net, te->set, te->elem_priv);
+                       if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
+                               nft_setelem_data_activate(net, te->set, te->elem_priv);
+                               nft_setelem_activate(net, te->set, te->elem_priv);
+                       }
                        if (!nft_setelem_is_catchall(te->set, te->elem_priv))
                                te->set->ndeact--;
 
@@ -10787,6 +10845,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 {
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
            *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
                return 0;
@@ -10871,6 +10932,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
                                continue;
 
                        iter.genmask    = nft_genmask_next(ctx->net);
+                       iter.type       = NFT_ITER_UPDATE;
                        iter.skip       = 0;
                        iter.count      = 0;
                        iter.err        = 0;
index a0055f510e31e9b77526a11c66c565b973897706..b314ca728a2912da717995840ef3dc337eace815 100644 (file)
@@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
                return 0;
 
        iter.genmask    = nft_genmask_next(ctx->net);
+       iter.type       = NFT_ITER_UPDATE;
        iter.skip       = 0;
        iter.count      = 0;
        iter.err        = 0;
index 32df7a16835da3e1d850d34a8236e0a45f06f026..1caa04619dc6da37f845acc65c8ca86c173096de 100644 (file)
@@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net,
        nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
        /* Enter 11 state. */
        priv->bitmap[idx] |= (genmask << off);
-       nft_set_elem_change_active(net, set, &be->ext);
+       nft_clear(net, &be->ext);
 }
 
 static void nft_bitmap_flush(const struct net *net,
@@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
        list_for_each_entry_rcu(be, &priv->list, head) {
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&be->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &be->priv);
 
index 6968a3b342367c6c0cb0df7523fdfd5864038802..daa56dda737ae2e6b4727c2d3930d68e58a33efb 100644 (file)
@@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
 {
        struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &he->ext);
+       nft_clear(net, &he->ext);
 }
 
 static void nft_rhash_flush(const struct net *net,
@@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&he->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &he->priv);
                if (iter->err < 0)
@@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
 {
        struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &he->ext);
+       nft_clear(net, &he->ext);
 }
 
 static void nft_hash_flush(const struct net *net,
@@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
                hlist_for_each_entry_rcu(he, &priv->table[i], node) {
                        if (iter->count < iter->skip)
                                goto cont;
-                       if (!nft_set_elem_active(&he->ext, iter->genmask))
-                               goto cont;
 
                        iter->err = iter->fn(ctx, set, iter, &he->priv);
                        if (iter->err < 0)
index df8de50902463738642d4d24b59f12b17b5ff726..187138afac45d479f89ea23ec9b09fcd6b6da866 100644 (file)
@@ -1847,7 +1847,7 @@ static void nft_pipapo_activate(const struct net *net,
 {
        struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &e->ext);
+       nft_clear(net, &e->ext);
 }
 
 /**
@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
                rules_fx = rules_f0;
 
                nft_pipapo_for_each_field(f, i, m) {
+                       bool last = i == m->field_count - 1;
+
                        if (!pipapo_match_field(f, start, rules_fx,
                                                match_start, match_end))
                                break;
@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 
                        match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
                        match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
-               }
 
-               if (i == m->field_count) {
-                       priv->dirty = true;
-                       pipapo_drop(m, rulemap);
-                       return;
+                       if (last && f->mt[rulemap[i].to].e == e) {
+                               priv->dirty = true;
+                               pipapo_drop(m, rulemap);
+                               return;
+                       }
                }
 
                first_rule += rules_f0;
        }
+
+       WARN_ON_ONCE(1); /* elem_priv not found */
 }
 
 /**
@@ -2115,13 +2119,15 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
                            struct nft_set_iter *iter)
 {
        struct nft_pipapo *priv = nft_set_priv(set);
-       struct net *net = read_pnet(&set->net);
        const struct nft_pipapo_match *m;
        const struct nft_pipapo_field *f;
        unsigned int i, r;
 
+       WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
+                    iter->type != NFT_ITER_UPDATE);
+
        rcu_read_lock();
-       if (iter->genmask == nft_genmask_cur(net))
+       if (iter->type == NFT_ITER_READ)
                m = rcu_dereference(priv->match);
        else
                m = priv->clone;
@@ -2143,9 +2149,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                e = f->mt[r].e;
 
-               if (!nft_set_elem_active(&e->ext, iter->genmask))
-                       goto cont;
-
                iter->err = iter->fn(ctx, set, iter, &e->priv);
                if (iter->err < 0)
                        goto out;
index 9944fe479e5361dc140f75be8b90bf3c5deb40f6..b7ea21327549b353c087b3e607e722f391ea94c1 100644 (file)
@@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net,
 {
        struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &rbe->ext);
+       nft_clear(net, &rbe->ext);
 }
 
 static void nft_rbtree_flush(const struct net *net,
@@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &rbe->priv);
                if (iter->err < 0) {
index 819157bbb5a2c6ef775633931721490b747f2fc8..d5344563e525c9bc436d5ad0b84380f0bcae62a8 100644 (file)
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > LLCP_MAX_RW) {
                        err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > LLCP_MAX_MIUX) {
                        err = -EINVAL;
index 3019a4406ca4f72be806ff922e377ea7609c3934..74b63cdb59923a95dd03a9c2c540af702564873a 100644 (file)
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
        if (ct_info.timeout[0]) {
                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
                                      ct_info.timeout))
-                       pr_info_ratelimited("Failed to associated timeout "
-                                           "policy `%s'\n", ct_info.timeout);
+                       OVS_NLERR(log,
+                                 "Failed to associated timeout policy '%s'",
+                                 ct_info.timeout);
                else
                        ct_info.nf_ct_timeout = rcu_dereference(
                                nf_ct_timeout_find(ct_info.ct)->timeout);
index ff5336493777507242320d7e9214c637663f0734..4a2c763e2d116693469e6c8bd9ce0ed8f7f667d9 100644 (file)
@@ -974,6 +974,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        sch->enqueue = ops->enqueue;
        sch->dequeue = ops->dequeue;
        sch->dev_queue = dev_queue;
+       sch->owner = -1;
        netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
        refcount_set(&sch->refcnt, 1);
 
index 5b41e2321209ae0a17ac97d7214eefd252ec0180..9a6ad5974dff5e855cbc0ba2a1f7837733420c5f 100644 (file)
@@ -2663,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
                                        WRITE_ONCE(u->oob_skb, NULL);
                                        consume_skb(skb);
                                }
-                       } else if (!(flags & MSG_PEEK)) {
+                       } else if (flags & MSG_PEEK) {
+                               skb = NULL;
+                       } else {
                                skb_unlink(skb, &sk->sk_receive_queue);
-                               consume_skb(skb);
+                               WRITE_ONCE(u->oob_skb, NULL);
+                               if (!WARN_ON_ONCE(skb_unref(skb)))
+                                       kfree_skb(skb);
                                skb = skb_peek(&sk->sk_receive_queue);
                        }
                }
@@ -2739,18 +2743,16 @@ redo:
                last = skb = skb_peek(&sk->sk_receive_queue);
                last_len = last ? last->len : 0;
 
+again:
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
                if (skb) {
                        skb = manage_oob(skb, sk, flags, copied);
-                       if (!skb) {
+                       if (!skb && copied) {
                                unix_state_unlock(sk);
-                               if (copied)
-                                       break;
-                               goto redo;
+                               break;
                        }
                }
 #endif
-again:
                if (skb == NULL) {
                        if (copied >= target)
                                goto unlock;
index fa39b626523851df29275f1448d30a7390e7e0fb..6433a414acf8624a1d98727f4e309b7c040710b9 100644 (file)
@@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
         * receive queues.  Other, non candidate sockets _can_ be
         * added to queue, so we must make sure only to touch
         * candidates.
+        *
+        * Embryos, though never candidates themselves, affect which
+        * candidates are reachable by the garbage collector.  Before
+        * being added to a listener's queue, an embryo may already
+        * receive data carrying SCM_RIGHTS, potentially making the
+        * passed socket a candidate that is not yet reachable by the
+        * collector.  It becomes reachable once the embryo is
+        * enqueued.  Therefore, we must ensure that no SCM-laden
+        * embryo appears in a (candidate) listener's queue between
+        * consecutive scan_children() calls.
         */
        list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+               struct sock *sk = &u->sk;
                long total_refs;
 
-               total_refs = file_count(u->sk.sk_socket->file);
+               total_refs = file_count(sk->sk_socket->file);
 
                WARN_ON_ONCE(!u->inflight);
                WARN_ON_ONCE(total_refs < u->inflight);
@@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
                        list_move_tail(&u->link, &gc_candidates);
                        __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
                        __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+                       if (sk->sk_state == TCP_LISTEN) {
+                               unix_state_lock(sk);
+                               unix_state_unlock(sk);
+                       }
                }
        }
 
index 3404d076a8a3e6a9f43dfca301d3e00078afb934..727aa20be4bde8dc63a544a44a5cdeb19cac7dcb 100644 (file)
@@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
                struct xsk_queue **q;
                int entries;
 
+               if (optlen < sizeof(entries))
+                       return -EINVAL;
                if (copy_from_sockptr(&entries, optval, sizeof(entries)))
                        return -EFAULT;
 
index c5c2ce113c9232c331c4ebac2ba4384a24424640..d20c47d21ad8352973c93ccefc7b0931e93de545 100644 (file)
@@ -467,6 +467,8 @@ static bool stackleak_gate(void)
                        return false;
                if (STRING_EQUAL(section, ".entry.text"))
                        return false;
+               if (STRING_EQUAL(section, ".head.text"))
+                       return false;
        }
 
        return track_frame_size >= 0;
index b141024830ecc831430c697858ce409a56c711af..ee6ac649df836d695133f93da9cfd0518cc1ae23 100644 (file)
@@ -428,7 +428,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
        midi1->note.group = midi2->note.group;
        midi1->note.status = midi2->note.status;
        midi1->note.channel = midi2->note.channel;
-       switch (midi2->note.status << 4) {
+       switch (midi2->note.status) {
        case UMP_MSG_STATUS_NOTE_ON:
        case UMP_MSG_STATUS_NOTE_OFF:
                midi1->note.note = midi2->note.note;
index cdcb28aa9d7bf028d429aeea0016cec7c6bc0c22..70d80b6af3fe370aa9c6f9fd471676ba9c18da72 100644 (file)
@@ -7467,6 +7467,10 @@ enum {
        ALC285_FIXUP_CS35L56_I2C_2,
        ALC285_FIXUP_CS35L56_I2C_4,
        ALC285_FIXUP_ASUS_GA403U,
+       ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC,
+       ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1,
+       ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+       ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9690,6 +9694,38 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc285_fixup_asus_ga403u,
        },
+       [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11050 },
+                       { 0x1b, 0x03a11c30 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1
+       },
+       [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_speaker2_to_dac1,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+       },
+       [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11050 },
+                       { 0x1b, 0x03a11c30 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC285_FIXUP_CS35L56_SPI_2
+       },
+       [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_speaker2_to_dac1,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GA403U,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10084,6 +10120,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
@@ -10143,7 +10181,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
-       SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U),
+       SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10151,7 +10189,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
-       SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_CS35L56_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
        SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
@@ -10228,6 +10266,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -10333,6 +10372,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+       SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
@@ -10394,8 +10434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
-       SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
+       SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
        SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10457,6 +10497,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
index 48dae3339305048fca2262821e5ebfea6bcf237a..75f7674c66ee7ae8d9c407798193f4618b82a6b7 100644 (file)
@@ -514,10 +514,10 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv)
 static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
 {
        static const unsigned char page_array[CALIB_MAX] = {
-               0x17, 0x18, 0x18, 0x0d, 0x18
+               0x17, 0x18, 0x18, 0x13, 0x18,
        };
        static const unsigned char rgno_array[CALIB_MAX] = {
-               0x74, 0x0c, 0x14, 0x3c, 0x7c
+               0x74, 0x0c, 0x14, 0x70, 0x7c,
        };
        unsigned char *data;
        int i, j, rc;
index 318e2dad27e048c08fea615cef8654aa1fcb7d81..ae57bf69ad4af3aa51fd364e89676469dd87a14d 100644 (file)
@@ -76,6 +76,12 @@ enum {
        DNS
 };
 
+enum {
+       IPV4 = 1,
+       IPV6,
+       IP_TYPE_MAX
+};
+
 static int in_hand_shake;
 
 static char *os_name = "";
@@ -102,6 +108,11 @@ static struct utsname uts_buf;
 
 #define MAX_FILE_NAME 100
 #define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
 
 struct kvp_record {
        char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
        return 0;
 }
 
+int ip_version_check(const char *input_addr)
+{
+       struct in6_addr addr;
+
+       if (inet_pton(AF_INET, input_addr, &addr))
+               return IPV4;
+       else if (inet_pton(AF_INET6, input_addr, &addr))
+               return IPV6;
+
+       return -EINVAL;
+}
+
 /*
  * Only IPv4 subnet strings needs to be converted to plen
  * For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
        return plen;
 }
 
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+                                 int ip_sec)
+{
+       char addr[INET6_ADDRSTRLEN], *output_str;
+       int ip_offset = 0, error = 0, ip_ver;
+       char *param_name;
+
+       if (type == DNS)
+               param_name = "dns";
+       else if (type == GATEWAY)
+               param_name = "gateway";
+       else
+               return -EINVAL;
+
+       output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+       if (!output_str)
+               return -ENOMEM;
+
+       while (1) {
+               memset(addr, 0, sizeof(addr));
+
+               if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+                                        (MAX_IP_ADDR_SIZE * 2)))
+                       break;
+
+               ip_ver = ip_version_check(addr);
+               if (ip_ver < 0)
+                       continue;
+
+               if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+                   (ip_ver == IPV6 && ip_sec == IPV6)) {
+                       /*
+                        * do a bound check to avoid out-of bound writes
+                        */
+                       if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+                           (strlen(addr) + 1)) {
+                               strncat(output_str, addr,
+                                       OUTSTR_BUF_SIZE -
+                                       strlen(output_str) - 1);
+                               strncat(output_str, ",",
+                                       OUTSTR_BUF_SIZE -
+                                       strlen(output_str) - 1);
+                       }
+               } else {
+                       continue;
+               }
+       }
+
+       if (strlen(output_str)) {
+               /*
+                * This is to get rid of that extra comma character
+                * in the end of the string
+                */
+               output_str[strlen(output_str) - 1] = '\0';
+               error = fprintf(f, "%s=%s\n", param_name, output_str);
+       }
+
+       free(output_str);
+       return error;
+}
+
 static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
-                               int is_ipv6)
+                               int ip_sec)
 {
        char addr[INET6_ADDRSTRLEN];
        char subnet_addr[INET6_ADDRSTRLEN];
-       int error, i = 0;
+       int error = 0, i = 0;
        int ip_offset = 0, subnet_offset = 0;
-       int plen;
+       int plen, ip_ver;
 
        memset(addr, 0, sizeof(addr));
        memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
                                                       subnet_addr,
                                                       (MAX_IP_ADDR_SIZE *
                                                        2))) {
-               if (!is_ipv6)
+               ip_ver = ip_version_check(addr);
+               if (ip_ver < 0)
+                       continue;
+
+               if (ip_ver == IPV4 && ip_sec == IPV4)
                        plen = kvp_subnet_to_plen((char *)subnet_addr);
-               else
+               else if (ip_ver == IPV6 && ip_sec == IPV6)
                        plen = atoi(subnet_addr);
+               else
+                       continue;
 
                if (plen < 0)
                        return plen;
@@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
                memset(subnet_addr, 0, sizeof(subnet_addr));
        }
 
-       return 0;
+       return error;
 }
 
 static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 {
-       int error = 0;
+       int error = 0, ip_ver;
        char if_filename[PATH_MAX];
        char nm_filename[PATH_MAX];
        FILE *ifcfg_file, *nmfile;
        char cmd[PATH_MAX];
-       int is_ipv6 = 0;
        char *mac_addr;
        int str_len;
 
@@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error)
                goto setval_error;
 
-       if (new_val->addr_family & ADDR_FAMILY_IPV6) {
-               error = fprintf(nmfile, "\n[ipv6]\n");
-               if (error < 0)
-                       goto setval_error;
-               is_ipv6 = 1;
-       } else {
-               error = fprintf(nmfile, "\n[ipv4]\n");
-               if (error < 0)
-                       goto setval_error;
-       }
-
        /*
         * Now we populate the keyfile format
+        *
+        * The keyfile format expects the IPv6 and IPv4 configuration in
+        * different sections. Therefore we iterate through the list twice,
+        * once to populate the IPv4 section and the next time for IPv6
         */
+       ip_ver = IPV4;
+       do {
+               if (ip_ver == IPV4) {
+                       error = fprintf(nmfile, "\n[ipv4]\n");
+                       if (error < 0)
+                               goto setval_error;
+               } else {
+                       error = fprintf(nmfile, "\n[ipv6]\n");
+                       if (error < 0)
+                               goto setval_error;
+               }
 
-       if (new_val->dhcp_enabled) {
-               error = kvp_write_file(nmfile, "method", "", "auto");
-               if (error < 0)
-                       goto setval_error;
-       } else {
-               error = kvp_write_file(nmfile, "method", "", "manual");
+               /*
+                * Write the configuration for ipaddress, netmask, gateway and
+                * name services
+                */
+               error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+                                            (char *)new_val->sub_net,
+                                            ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
 
-       /*
-        * Write the configuration for ipaddress, netmask, gateway and
-        * name services
-        */
-       error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
-                                    (char *)new_val->sub_net, is_ipv6);
-       if (error < 0)
-               goto setval_error;
+               /*
+                * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+                * methods for ipv6 based on dhcp_enabled flag.
+                *
+                * For ipv4, set method to manual only when dhcp_enabled is
+                * false and specific ipv4 addresses are configured. If neither
+                * dhcp_enabled is true and no ipv4 addresses are configured,
+                * set method to 'disabled'.
+                *
+                * For ipv6, set method to manual when we configure ipv6
+                * addresses. Otherwise set method to 'auto' so that SLAAC from
+                * RA may be used.
+                */
+               if (ip_ver == IPV4) {
+                       if (new_val->dhcp_enabled) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "auto");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else if (error) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "manual");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "disabled");
+                               if (error < 0)
+                                       goto setval_error;
+                       }
+               } else if (ip_ver == IPV6) {
+                       if (error) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "manual");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "auto");
+                               if (error < 0)
+                                       goto setval_error;
+                       }
+               }
 
-       /* we do not want ipv4 addresses in ipv6 section and vice versa */
-       if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
-               error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+               error = process_dns_gateway_nm(nmfile,
+                                              (char *)new_val->gate_way,
+                                              GATEWAY, ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
 
-       if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
-               error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+               error = process_dns_gateway_nm(nmfile,
+                                              (char *)new_val->dns_addr, DNS,
+                                              ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
+
+               ip_ver++;
+       } while (ip_ver < IP_TYPE_MAX);
+
        fclose(nmfile);
        fclose(ifcfg_file);
 
index 4b0673bf52c2e615017bf2b94da1f6fc4392e532..07cfad817d53908f2325505d2b9cb644a808a689 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
 #include <linux/math.h>
+#include <linux/panic.h>
 #include <endian.h>
 #include <byteswap.h>
 
index f3c82ab5b14cd77819030096b81e0b67cba0df1d..7d73da0980473fd3fdbdcd88e9e041077d5a2df3 100644 (file)
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
 {
 }
 
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+       return 0;
+}
+
 #endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644 (file)
index 0000000..9c8f17a
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+       va_list argp;
+
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+       exit(-1);
+}
+
+#endif
index 8f08c3fd498d5b81185519728fc1c28a8a0d4d5f..0d3672e5d9ed1553a720f3b0b52ca71f81fbc2d9 100644 (file)
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
 .PP
 \fB--quiet\fP Do not decode and print the system configuration header information.
 .PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
 \fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 .PP
 \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
 .PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
 .PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
 .PP
 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
 .PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
 
 Busy% = MPERF_delta/TSC_delta
 
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
 
 Note that these calculations depend on TSC_delta, so they
 are not reliable during intervals when TSC_MHz is not running at the base frequency.
index 7a334377f92b978fa642a0071b19f33d7e6fe74e..98256468e24806acfc0daee374d0cf9877e92131 100644 (file)
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
@@ -36,6 +36,8 @@
 #include <linux/perf_event.h>
 #include <asm/unistd.h>
 #include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
 
 #define UNUSED(x) (void)(x)
 
 #define        NAME_BYTES 20
 #define PATH_BYTES 128
 
+#define MAX_NOFILE 0x8000
+
 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
 
 struct msr_counter {
        unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
        { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
        { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
        { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
 #define        BIC_IPC         (1ULL << 52)
 #define        BIC_CORE_THROT_CNT      (1ULL << 53)
 #define        BIC_UNCORE_MHZ          (1ULL << 54)
+#define        BIC_SAM_mc6             (1ULL << 55)
+#define        BIC_SAMMHz              (1ULL << 56)
+#define        BIC_SAMACTMHz           (1ULL << 57)
 
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
 #define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
 
+struct amperf_group_fd;
+
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
 int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu;      /* File descriptors for perf group with APERF and MPERF counters. */
 struct timeval interval_tv = { 5, 0 };
 struct timespec interval_ts = { 5, 0 };
 
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
 unsigned long long cpuidle_cur_cpu_lpi_us;
 unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
 unsigned int tj_max;
 unsigned int tj_max_override;
 double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp;  /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;      /* IA32_HWP_REQUEST_PKG */
 unsigned int first_counter_read = 1;
 int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+       GFX_rc6,
+       GFX_MHz,
+       GFX_ACTMHz,
+       SAM_mc6,
+       SAM_MHz,
+       SAM_ACTMHz,
+       GFX_MAX
+};
+
+struct gfx_sysfs_info {
+       const char *path;
+       FILE *fp;
+       unsigned int val;
+       unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
 
 int get_msr(int cpu, off_t offset, unsigned long long *msr);
 
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
        .bclk_freq = BCLK_100MHZ,
        .supported_cstates = CC1 | CC6 | PC2 | PC6,
        .cst_limit = CST_LIMIT_ICX,
+       .has_msr_core_c1_res = 1,
        .has_irtl_msrs = 1,
        .has_cst_prewake_bit = 1,
        .trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define BITMASK_SIZE 32
 
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+       RAPL_RCI_INDEX_ENERGY_PKG = 0,
+       RAPL_RCI_INDEX_ENERGY_CORES = 1,
+       RAPL_RCI_INDEX_DRAM = 2,
+       RAPL_RCI_INDEX_GFX = 3,
+       RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+       RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+       RAPL_RCI_INDEX_CORE_ENERGY = 6,
+       NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+       RAPL_UNIT_INVALID,
+       RAPL_UNIT_JOULES,
+       RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+       unsigned long long data[NUM_RAPL_COUNTERS];
+       enum rapl_source source[NUM_RAPL_COUNTERS];
+       unsigned long long flags[NUM_RAPL_COUNTERS];
+       double scale[NUM_RAPL_COUNTERS];
+       enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+       union {
+               /* Active when source == RAPL_SOURCE_MSR */
+               struct {
+                       unsigned long long msr[NUM_RAPL_COUNTERS];
+                       unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+                       int msr_shift[NUM_RAPL_COUNTERS];
+               };
+       };
+
+       int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+       int feature_mask;       /* Mask for testing if the counter is supported on host */
+       const char *perf_subsys;
+       const char *perf_name;
+       unsigned long long msr;
+       unsigned long long msr_mask;
+       int msr_shift;          /* Positive mean shift right, negative mean shift left */
+       double *platform_rapl_msr_scale;        /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+       unsigned int rci_index; /* Maps data from perf counters to global variables */
+       unsigned long long bic;
+       double compat_scale;    /* Some counters require constant scaling to be in the same range as other, similar ones */
+       unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+       {
+        .feature_mask = RAPL_PKG,
+        .perf_subsys = "power",
+        .perf_name = "energy-pkg",
+        .msr = MSR_PKG_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+        .bic = BIC_PkgWatt | BIC_Pkg_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_AMD_F17H,
+        .perf_subsys = "power",
+        .perf_name = "energy-pkg",
+        .msr = MSR_PKG_ENERGY_STAT,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+        .bic = BIC_PkgWatt | BIC_Pkg_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_CORE_ENERGY_STATUS,
+        .perf_subsys = "power",
+        .perf_name = "energy-cores",
+        .msr = MSR_PP0_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+        .bic = BIC_CorWatt | BIC_Cor_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_DRAM,
+        .perf_subsys = "power",
+        .perf_name = "energy-ram",
+        .msr = MSR_DRAM_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_dram_energy_units,
+        .rci_index = RAPL_RCI_INDEX_DRAM,
+        .bic = BIC_RAMWatt | BIC_RAM_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_GFX,
+        .perf_subsys = "power",
+        .perf_name = "energy-gpu",
+        .msr = MSR_PP1_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_GFX,
+        .bic = BIC_GFXWatt | BIC_GFX_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_PKG_PERF_STATUS,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_PKG_PERF_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_time_units,
+        .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+        .bic = BIC_PKG__,
+        .compat_scale = 100.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_DRAM_PERF_STATUS,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_DRAM_PERF_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_time_units,
+        .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+        .bic = BIC_RAM__,
+        .compat_scale = 100.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_AMD_F17H,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_CORE_ENERGY_STAT,
+        .msr_mask = 0xFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+        .bic = BIC_CorWatt | BIC_Cor_J,
+        .compat_scale = 1.0,
+        .flags = 0,
+         },
+};
+
+struct rapl_counter {
+       unsigned long long raw_value;
+       enum rapl_unit unit;
+       double scale;
+};
+
 struct thread_data {
        struct timeval tv_begin;
        struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
        unsigned long long c7;
        unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
        unsigned int core_temp_c;
-       unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
+       struct rapl_counter core_energy;        /* MSR_CORE_ENERGY_STAT */
        unsigned int core_id;
        unsigned long long core_throt_cnt;
        unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
        unsigned long long pc8;
        unsigned long long pc9;
        unsigned long long pc10;
-       unsigned long long cpu_lpi;
-       unsigned long long sys_lpi;
+       long long cpu_lpi;
+       long long sys_lpi;
        unsigned long long pkg_wtd_core_c0;
        unsigned long long pkg_any_core_c0;
        unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
        long long gfx_rc6_ms;
        unsigned int gfx_mhz;
        unsigned int gfx_act_mhz;
+       long long sam_mc6_ms;
+       unsigned int sam_mhz;
+       unsigned int sam_act_mhz;
        unsigned int package_id;
-       unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
-       unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
-       unsigned long long energy_cores;        /* MSR_PP0_ENERGY_STATUS */
-       unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
-       unsigned long long rapl_pkg_perf_status;        /* MSR_PKG_PERF_STATUS */
-       unsigned long long rapl_dram_perf_status;       /* MSR_DRAM_PERF_STATUS */
+       struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+       struct rapl_counter energy_dram;        /* MSR_DRAM_ENERGY_STATUS */
+       struct rapl_counter energy_cores;       /* MSR_PP0_ENERGY_STATUS */
+       struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+       struct rapl_counter rapl_pkg_perf_status;       /* MSR_PKG_PERF_STATUS */
+       struct rapl_counter rapl_dram_perf_status;      /* MSR_DRAM_PERF_STATUS */
        unsigned int pkg_temp_c;
        unsigned int uncore_mhz;
        unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
        struct msr_counter *pp;
 } sys;
 
+void free_sys_counters(void)
+{
+       struct msr_counter *p = sys.tp, *pnext = NULL;
+
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       p = sys.cp, pnext = NULL;
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       p = sys.pp, pnext = NULL;
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       sys.added_thread_counters = 0;
+       sys.added_core_counters = 0;
+       sys.added_package_counters = 0;
+       sys.tp = NULL;
+       sys.cp = NULL;
+       sys.pp = NULL;
+}
+
 struct system_summary {
        struct thread_data threads;
        struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
        fd = open(pathname, O_RDONLY);
        if (fd < 0)
-               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+                   "or run with --no-msr, or run as root", pathname);
 
        fd_percpu[cpu] = fd;
 
        return fd;
 }
 
+static void bic_disable_msr_access(void)
+{
+       const unsigned long bic_msrs =
+           BIC_SMI |
+           BIC_CPU_c1 |
+           BIC_CPU_c3 |
+           BIC_CPU_c6 |
+           BIC_CPU_c7 |
+           BIC_Mod_c6 |
+           BIC_CoreTmp |
+           BIC_Totl_c0 |
+           BIC_Any_c0 |
+           BIC_GFX_c0 |
+           BIC_CPUGFX |
+           BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+       bic_enabled &= ~bic_msrs;
+
+       free_sys_counters();
+}
+
 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
 {
+       assert(!no_perf);
+
        return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
 }
 
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
 {
-       struct perf_event_attr pea;
-       int fd;
+       struct perf_event_attr attr;
+       const pid_t pid = -1;
+       const unsigned long flags = 0;
 
-       memset(&pea, 0, sizeof(struct perf_event_attr));
-       pea.type = PERF_TYPE_HARDWARE;
-       pea.size = sizeof(struct perf_event_attr);
-       pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+       assert(!no_perf);
 
-       /* counter for cpu_num, including user + kernel and all processes */
-       fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
-       if (fd == -1) {
-               warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
-               BIC_NOT_PRESENT(BIC_IPC);
-       }
+       memset(&attr, 0, sizeof(struct perf_event_attr));
+
+       attr.type = type;
+       attr.size = sizeof(struct perf_event_attr);
+       attr.config = config;
+       attr.disabled = 0;
+       attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+       attr.read_format = read_format;
+
+       const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
 
        return fd;
 }
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
        if (fd_instr_count_percpu[cpu])
                return fd_instr_count_percpu[cpu];
 
-       fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+       fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
 
        return fd_instr_count_percpu[cpu];
 }
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 {
        ssize_t retval;
 
+       assert(!no_msr);
+
        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
        if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
        return 0;
 }
 
+int probe_msr(int cpu, off_t offset)
+{
+       ssize_t retval;
+       unsigned long long dummy;
+
+       assert(!no_msr);
+
+       retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+       if (retval != sizeof(dummy))
+               return 1;
+
+       return 0;
+}
+
 #define MAX_DEFERRED 16
 char *deferred_add_names[MAX_DEFERRED];
 char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
                "               Override default 5-second measurement interval\n"
                "  -J, --Joules displays energy in Joules instead of Watts\n"
                "  -l, --list   list column headers only\n"
+               "  -M, --no-msr Disable all uses of the MSR driver\n"
+               "  -P, --no-perf Disable all uses of the perf API\n"
                "  -n, --num_iterations num\n"
                "               number of the measurement iterations\n"
                "  -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
        if (DO_BIC(BIC_GFXACTMHz))
                outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
 
+       if (DO_BIC(BIC_SAM_mc6))
+               outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+       if (DO_BIC(BIC_SAMMHz))
+               outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
        if (DO_BIC(BIC_Totl_c0))
                outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                        outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
                for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+                       outp +=
+                           sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   t->counter[i], mp->path);
                }
        }
 
-       if (c) {
+       if (c && is_cpu_first_thread_in_core(t, c, p)) {
                outp += sprintf(outp, "core: %d\n", c->core_id);
                outp += sprintf(outp, "c3: %016llX\n", c->c3);
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
                outp += sprintf(outp, "c7: %016llX\n", c->c7);
                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
                outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
-               outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+               const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+               const double energy_scale = c->core_energy.scale;
+
+               if (c->core_energy.unit == RAPL_UNIT_JOULES)
+                       outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
 
                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+                       outp +=
+                           sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   c->counter[i], mp->path);
                }
                outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
        }
 
-       if (p) {
+       if (p && is_cpu_first_core_in_package(t, c, p)) {
                outp += sprintf(outp, "package: %d\n", p->package_id);
 
                outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
                outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
                outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
-               outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
-               outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
-               outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
-               outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
-               outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
-               outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+               outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+               outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+               outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+               outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+               outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+               outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 
                for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+                       outp +=
+                           sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   p->counter[i], mp->path);
                }
        }
 
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
        return 0;
 }
 
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+       assert(desired_unit != RAPL_UNIT_INVALID);
+
+       /*
+        * For now we don't expect anything other than joules,
+        * so just simplify the logic.
+        */
+       assert(c->unit == RAPL_UNIT_JOULES);
+
+       const double scaled = c->raw_value * c->scale;
+
+       if (desired_unit == RAPL_UNIT_WATTS)
+               return scaled / interval;
+       return scaled;
+}
+
 /*
  * column formatting convention & formats
  */
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
 
        /* print per-package data only for 1st core in package */
        if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_GFXACTMHz))
                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
 
+       /* SAMmc6 */
+       if (DO_BIC(BIC_SAM_mc6)) {
+               if (p->sam_mc6_ms == -1) {      /* detect GFX counter reset */
+                       outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+               } else {
+                       outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                       p->sam_mc6_ms / 10.0 / interval_float);
+               }
+       }
+
+       /* SAMMHz */
+       if (DO_BIC(BIC_SAMMHz))
+               outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+       /* SAMACTMHz */
+       if (DO_BIC(BIC_SAMACTMHz))
+               outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
        if (DO_BIC(BIC_Totl_c0))
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_Pkgpc10))
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
 
-       if (DO_BIC(BIC_CPU_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
-       if (DO_BIC(BIC_SYS_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+       if (DO_BIC(BIC_CPU_LPI)) {
+               if (p->cpu_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
+       if (DO_BIC(BIC_SYS_LPI)) {
+               if (p->sys_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->sys_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
 
        if (DO_BIC(BIC_PkgWatt))
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_GFXWatt))
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_RAMWatt))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           p->energy_dram * rapl_dram_energy_units / interval_float);
+                           rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_Pkg_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_GFX_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_RAM_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_PKG__))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+                           rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_RAM__))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+                           rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
        /* UncMHz */
        if (DO_BIC(BIC_UNCORE_MHZ))
                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
        old->gfx_mhz = new->gfx_mhz;
        old->gfx_act_mhz = new->gfx_act_mhz;
 
-       old->energy_pkg = new->energy_pkg - old->energy_pkg;
-       old->energy_cores = new->energy_cores - old->energy_cores;
-       old->energy_gfx = new->energy_gfx - old->energy_gfx;
-       old->energy_dram = new->energy_dram - old->energy_dram;
-       old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
-       old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+       /* flag an error when mc6 counter resets/wraps */
+       if (old->sam_mc6_ms > new->sam_mc6_ms)
+               old->sam_mc6_ms = -1;
+       else
+               old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+       old->sam_mhz = new->sam_mhz;
+       old->sam_act_mhz = new->sam_act_mhz;
+
+       old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+       old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+       old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+       old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+       old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+       old->rapl_dram_perf_status.raw_value =
+           new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
        old->core_throt_cnt = new->core_throt_cnt;
        old->mc6_us = new->mc6_us - old->mc6_us;
 
-       DELTA_WRAP32(new->core_energy, old->core_energy);
+       DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
 
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
        return retval;
 }
 
+void rapl_counter_clear(struct rapl_counter *c)
+{
+       c->raw_value = 0;
+       c->scale = 0.0;
+       c->unit = RAPL_UNIT_INVALID;
+}
+
 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        c->c7 = 0;
        c->mc6_us = 0;
        c->core_temp_c = 0;
-       c->core_energy = 0;
+       rapl_counter_clear(&c->core_energy);
        c->core_throt_cnt = 0;
 
        p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        p->cpu_lpi = 0;
        p->sys_lpi = 0;
 
-       p->energy_pkg = 0;
-       p->energy_dram = 0;
-       p->energy_cores = 0;
-       p->energy_gfx = 0;
-       p->rapl_pkg_perf_status = 0;
-       p->rapl_dram_perf_status = 0;
+       rapl_counter_clear(&p->energy_pkg);
+       rapl_counter_clear(&p->energy_dram);
+       rapl_counter_clear(&p->energy_cores);
+       rapl_counter_clear(&p->energy_gfx);
+       rapl_counter_clear(&p->rapl_pkg_perf_status);
+       rapl_counter_clear(&p->rapl_dram_perf_status);
        p->pkg_temp_c = 0;
 
        p->gfx_rc6_ms = 0;
        p->uncore_mhz = 0;
        p->gfx_mhz = 0;
        p->gfx_act_mhz = 0;
+       p->sam_mc6_ms = 0;
+       p->sam_mhz = 0;
+       p->sam_act_mhz = 0;
        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
                t->counter[i] = 0;
 
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                p->counter[i] = 0;
 }
 
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+       /* Copy unit and scale from src if dst is not initialized */
+       if (dst->unit == RAPL_UNIT_INVALID) {
+               dst->unit = src->unit;
+               dst->scale = src->scale;
+       }
+
+       assert(dst->unit == src->unit);
+       assert(dst->scale == src->scale);
+
+       dst->raw_value += src->raw_value;
+}
+
 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
        average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
 
-       average.cores.core_energy += c->core_energy;
+       rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
 
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.packages.cpu_lpi = p->cpu_lpi;
        average.packages.sys_lpi = p->sys_lpi;
 
-       average.packages.energy_pkg += p->energy_pkg;
-       average.packages.energy_dram += p->energy_dram;
-       average.packages.energy_cores += p->energy_cores;
-       average.packages.energy_gfx += p->energy_gfx;
+       rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+       rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+       rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+       rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
 
        average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
        average.packages.uncore_mhz = p->uncore_mhz;
        average.packages.gfx_mhz = p->gfx_mhz;
        average.packages.gfx_act_mhz = p->gfx_act_mhz;
+       average.packages.sam_mc6_ms = p->sam_mc6_ms;
+       average.packages.sam_mhz = p->sam_mhz;
+       average.packages.sam_act_mhz = p->sam_act_mhz;
 
        average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
 
-       average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
-       average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+       rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+       rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-               if (mp->format == FORMAT_RAW)
-                       continue;
-               average.packages.counter[i] += p->counter[i];
+               if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+                       average.packages.counter[i] = p->counter[i];
+               else
+                       average.packages.counter[i] += p->counter[i];
        }
        return 0;
 }
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 {
        if (mp->msr_num != 0) {
+               assert(!no_msr);
                if (get_msr(cpu, mp->msr_num, counterp))
                        return -1;
        } else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
 {
        char path[128];
 
-       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
                die);
 
        return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
        return epb;
 
 msr_fallback:
+       if (no_msr)
+               return -1;
+
        get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
 
        return msr & 0xf;
@@ -2700,187 +3099,495 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
        return 0;
 }
 
-/*
- * get_counters(...)
- * migrate to cpu
- * acquire and record local counters for that cpu
- */
-int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+struct amperf_group_fd {
+       int aperf;              /* Also the group descriptor */
+       int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
 {
-       int cpu = t->cpu_id;
-       unsigned long long msr;
-       int aperf_mperf_retry_count = 0;
-       struct msr_counter *mp;
-       int i;
+       int fdmt;
+       int bytes_read;
+       char buf[64];
+       int ret = -1;
 
-       if (cpu_migrate(cpu)) {
-               fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
-               return -1;
+       fdmt = open(path, O_RDONLY, 0);
+       if (fdmt == -1) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
        }
 
-       gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+       bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+       if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
+       }
 
-       if (first_counter_read)
-               get_apic_id(t);
-retry:
-       t->tsc = rdtsc();       /* we are running on local CPU of interest */
+       buf[bytes_read] = '\0';
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
-           || soft_c1_residency_display(BIC_Avg_MHz)) {
-               unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+       if (sscanf(buf, parse_format, value_ptr) != 1) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
+       }
 
-               /*
-                * The TSC, APERF and MPERF must be read together for
-                * APERF/MPERF and MPERF/TSC to give accurate results.
-                *
-                * Unfortunately, APERF and MPERF are read by
-                * individual system call, so delays may occur
-                * between them.  If the time to read them
-                * varies by a large amount, we re-read them.
-                */
+       ret = 0;
 
-               /*
-                * This initial dummy APERF read has been seen to
-                * reduce jitter in the subsequent reads.
-                */
+cleanup_and_exit:
+       close(fdmt);
+       return ret;
+}
 
-               if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
-                       return -3;
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+       unsigned int v;
+       int status;
 
-               t->tsc = rdtsc();       /* re-read close to APERF */
+       status = read_perf_counter_info(path, parse_format, &v);
+       if (status)
+               v = -1;
 
-               tsc_before = t->tsc;
+       return v;
+}
 
-               if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
-                       return -3;
+static unsigned int read_msr_type(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/type";
+       const char *const format = "%u";
 
-               tsc_between = rdtsc();
+       return read_perf_counter_info_n(path, format);
+}
 
-               if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
-                       return -4;
+static unsigned int read_aperf_config(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+       const char *const format = "event=%x";
 
-               tsc_after = rdtsc();
+       return read_perf_counter_info_n(path, format);
+}
 
-               aperf_time = tsc_between - tsc_before;
-               mperf_time = tsc_after - tsc_between;
+static unsigned int read_mperf_config(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+       const char *const format = "event=%x";
 
-               /*
-                * If the system call latency to read APERF and MPERF
-                * differ by more than 2x, then try again.
-                */
-               if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
-                       aperf_mperf_retry_count++;
-                       if (aperf_mperf_retry_count < 5)
-                               goto retry;
-                       else
-                               warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
-               }
-               aperf_mperf_retry_count = 0;
+       return read_perf_counter_info_n(path, format);
+}
 
-               t->aperf = t->aperf * aperf_mperf_multiplier;
-               t->mperf = t->mperf * aperf_mperf_multiplier;
-       }
+static unsigned int read_perf_type(const char *subsys)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+       const char *const format = "%u";
+       char path[128];
 
-       if (DO_BIC(BIC_IPC))
-               if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
-                       return -4;
+       snprintf(path, sizeof(path), path_format, subsys);
 
-       if (DO_BIC(BIC_IRQ))
-               t->irq_count = irqs_per_cpu[cpu];
-       if (DO_BIC(BIC_SMI)) {
-               if (get_msr(cpu, MSR_SMI_COUNT, &msr))
-                       return -5;
-               t->smi_count = msr & 0xFFFFFFFF;
-       }
-       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
-               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
-                       return -6;
-       }
+       return read_perf_counter_info_n(path, format);
+}
 
-       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-               if (get_mp(cpu, mp, &t->counter[i]))
-                       return -10;
-       }
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+       const char *const format = "event=%x";
+       char path[128];
 
-       /* collect core counters only for 1st thread in core */
-       if (!is_cpu_first_thread_in_core(t, c, p))
-               goto done;
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
-               if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
-                       return -6;
-       }
+       return read_perf_counter_info_n(path, format);
+}
 
-       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
-               if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
-                       return -7;
-       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
-               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
-                       return -7;
-       }
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+       const char *const format = "%s";
+       char path[128];
+       char unit_buffer[16];
 
-       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
-               if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
-                       return -8;
-               else if (t->is_atom) {
-                       /*
-                        * For Atom CPUs that has core cstate deeper than c6,
-                        * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
-                        * Minus CC7 (and deeper cstates) residency to get
-                        * accturate cc6 residency.
-                        */
-                       c->c6 -= c->c7;
-               }
-       }
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       if (DO_BIC(BIC_Mod_c6))
-               if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
-                       return -8;
+       read_perf_counter_info(path, format, &unit_buffer);
+       if (strcmp("Joules", unit_buffer) == 0)
+               return RAPL_UNIT_JOULES;
 
-       if (DO_BIC(BIC_CoreTmp)) {
-               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
-                       return -9;
-               c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
-       }
+       return RAPL_UNIT_INVALID;
+}
 
-       if (DO_BIC(BIC_CORE_THROT_CNT))
-               get_core_throt_cnt(cpu, &c->core_throt_cnt);
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+       const char *const format = "%lf";
+       char path[128];
+       double scale;
 
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
-               if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
-                       return -14;
-               c->core_energy = msr & 0xFFFFFFFF;
-       }
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-               if (get_mp(cpu, mp, &c->counter[i]))
-                       return -10;
-       }
+       if (read_perf_counter_info(path, format, &scale))
+               return 0.0;
 
-       /* collect package counters only for 1st core in package */
-       if (!is_cpu_first_core_in_package(t, c, p))
-               goto done;
+       return scale;
+}
 
-       if (DO_BIC(BIC_Totl_c0)) {
-               if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
-                       return -10;
-       }
-       if (DO_BIC(BIC_Any_c0)) {
-               if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
-                       return -11;
-       }
-       if (DO_BIC(BIC_GFX_c0)) {
-               if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
-                       return -12;
-       }
-       if (DO_BIC(BIC_CPUGFX)) {
-               if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
-                       return -13;
-       }
-       if (DO_BIC(BIC_Pkgpc3))
-               if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
-                       return -9;
-       if (DO_BIC(BIC_Pkgpc6)) {
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+       const unsigned int msr_type = read_msr_type();
+       const unsigned int aperf_config = read_aperf_config();
+       const unsigned int mperf_config = read_mperf_config();
+       struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+       fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+       fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+       return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+       assert(fd_amperf_percpu);
+
+       if (fd_amperf_percpu[cpu].aperf)
+               return fd_amperf_percpu[cpu].aperf;
+
+       fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+       return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+       union {
+               struct {
+                       unsigned long nr_entries;
+                       unsigned long aperf;
+                       unsigned long mperf;
+               };
+
+               unsigned long as_array[3];
+       } cnt;
+
+       const int fd_amperf = get_amperf_fd(cpu);
+
+       /*
+        * Read the TSC with rdtsc, because we want the absolute value and not
+        * the offset from the start of the counter.
+        */
+       t->tsc = rdtsc();
+
+       const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+       if (n != sizeof(cnt.as_array))
+               return -2;
+
+       t->aperf = cnt.aperf * aperf_mperf_multiplier;
+       t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+       return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+       unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+       int aperf_mperf_retry_count = 0;
+
+       /*
+        * The TSC, APERF and MPERF must be read together for
+        * APERF/MPERF and MPERF/TSC to give accurate results.
+        *
+        * Unfortunately, APERF and MPERF are read by
+        * individual system call, so delays may occur
+        * between them.  If the time to read them
+        * varies by a large amount, we re-read them.
+        */
+
+       /*
+        * This initial dummy APERF read has been seen to
+        * reduce jitter in the subsequent reads.
+        */
+
+       if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+               return -3;
+
+retry:
+       t->tsc = rdtsc();       /* re-read close to APERF */
+
+       tsc_before = t->tsc;
+
+       if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+               return -3;
+
+       tsc_between = rdtsc();
+
+       if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+               return -4;
+
+       tsc_after = rdtsc();
+
+       aperf_time = tsc_between - tsc_before;
+       mperf_time = tsc_after - tsc_between;
+
+       /*
+        * If the system call latency to read APERF and MPERF
+        * differ by more than 2x, then try again.
+        */
+       if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+               aperf_mperf_retry_count++;
+               if (aperf_mperf_retry_count < 5)
+                       goto retry;
+               else
+                       warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+       }
+       aperf_mperf_retry_count = 0;
+
+       t->aperf = t->aperf * aperf_mperf_multiplier;
+       t->mperf = t->mperf * aperf_mperf_multiplier;
+
+       return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+       size_t ret = 0;
+
+       for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+               if (rci->source[i] == RAPL_SOURCE_PERF)
+                       ++ret;
+
+       return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+       rc->raw_value = rci->data[idx];
+       rc->unit = rci->unit[idx];
+       rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+       unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+       struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+       if (debug)
+               fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+       assert(rapl_counter_info_perdomain);
+
+       /*
+        * If we have any perf counters to read, read them all now, in bulk
+        */
+       if (rci->fd_perf != -1) {
+               size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+               const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+               const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+               if (actual_read_size != expected_read_size)
+                       err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+                           actual_read_size);
+       }
+
+       for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+               switch (rci->source[i]) {
+               case RAPL_SOURCE_NONE:
+                       break;
+
+               case RAPL_SOURCE_PERF:
+                       assert(pi < ARRAY_SIZE(perf_data));
+                       assert(rci->fd_perf != -1);
+
+                       if (debug)
+                               fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+                                       i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+                       rci->data[i] = perf_data[pi];
+
+                       ++pi;
+                       break;
+
+               case RAPL_SOURCE_MSR:
+                       if (debug)
+                               fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+                       assert(!no_msr);
+                       if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+                               if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+                                       return -13 - i;
+                       } else {
+                               if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+                                       return -13 - i;
+                       }
+
+                       rci->data[i] &= rci->msr_mask[i];
+                       if (rci->msr_shift[i] >= 0)
+                               rci->data[i] >>= abs(rci->msr_shift[i]);
+                       else
+                               rci->data[i] <<= abs(rci->msr_shift[i]);
+
+                       break;
+               }
+       }
+
+       _Static_assert(NUM_RAPL_COUNTERS == 7);
+       write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+       write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+       write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+       write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+       write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+       write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+       write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+       return 0;
+}
+
+/*
+ * get_counters(...)
+ * migrate to cpu
+ * acquire and record local counters for that cpu
+ */
+int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       int cpu = t->cpu_id;
+       unsigned long long msr;
+       struct msr_counter *mp;
+       int i;
+       int status;
+
+       if (cpu_migrate(cpu)) {
+               fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
+               return -1;
+       }
+
+       gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
+       if (first_counter_read)
+               get_apic_id(t);
+
+       t->tsc = rdtsc();       /* we are running on local CPU of interest */
+
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
+               int status = -1;
+
+               assert(!no_perf || !no_msr);
+
+               switch (amperf_source) {
+               case AMPERF_SOURCE_PERF:
+                       status = read_aperf_mperf_tsc_perf(t, cpu);
+                       break;
+               case AMPERF_SOURCE_MSR:
+                       status = read_aperf_mperf_tsc_msr(t, cpu);
+                       break;
+               }
+
+               if (status != 0)
+                       return status;
+       }
+
+       if (DO_BIC(BIC_IPC))
+               if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
+                       return -4;
+
+       if (DO_BIC(BIC_IRQ))
+               t->irq_count = irqs_per_cpu[cpu];
+       if (DO_BIC(BIC_SMI)) {
+               if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+                       return -5;
+               t->smi_count = msr & 0xFFFFFFFF;
+       }
+       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
+               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+                       return -6;
+       }
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (get_mp(cpu, mp, &t->counter[i]))
+                       return -10;
+       }
+
+       /* collect core counters only for 1st thread in core */
+       if (!is_cpu_first_thread_in_core(t, c, p))
+               goto done;
+
+       if (platform->has_per_core_rapl) {
+               status = get_rapl_counters(cpu, c->core_id, c, p);
+               if (status != 0)
+                       return status;
+       }
+
+       if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
+               if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
+                       return -6;
+       }
+
+       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
+               if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
+       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
+               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
+       }
+
+       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
+               if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
+                       return -8;
+               else if (t->is_atom) {
+                       /*
+                        * For Atom CPUs that has core cstate deeper than c6,
+                        * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
+                        * Minus CC7 (and deeper cstates) residency to get
+                        * accturate cc6 residency.
+                        */
+                       c->c6 -= c->c7;
+               }
+       }
+
+       if (DO_BIC(BIC_Mod_c6))
+               if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+                       return -8;
+
+       if (DO_BIC(BIC_CoreTmp)) {
+               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+                       return -9;
+               c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
+       }
+
+       if (DO_BIC(BIC_CORE_THROT_CNT))
+               get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (get_mp(cpu, mp, &c->counter[i]))
+                       return -10;
+       }
+
+       /* collect package counters only for 1st core in package */
+       if (!is_cpu_first_core_in_package(t, c, p))
+               goto done;
+
+       if (DO_BIC(BIC_Totl_c0)) {
+               if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+                       return -10;
+       }
+       if (DO_BIC(BIC_Any_c0)) {
+               if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+                       return -11;
+       }
+       if (DO_BIC(BIC_GFX_c0)) {
+               if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+                       return -12;
+       }
+       if (DO_BIC(BIC_CPUGFX)) {
+               if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+                       return -13;
+       }
+       if (DO_BIC(BIC_Pkgpc3))
+               if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
+                       return -9;
+       if (DO_BIC(BIC_Pkgpc6)) {
                if (platform->has_msr_atom_pkg_c6_residency) {
                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
                                return -10;
@@ -2911,59 +3618,39 @@ retry:
        if (DO_BIC(BIC_SYS_LPI))
                p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-       if (platform->rapl_msrs & RAPL_PKG) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
-                       return -13;
-               p->energy_pkg = msr;
-       }
-       if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
-               if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
-                       return -14;
-               p->energy_cores = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM) {
-               if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
-                       return -15;
-               p->energy_dram = msr;
-       }
-       if (platform->rapl_msrs & RAPL_GFX) {
-               if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
-                       return -16;
-               p->energy_gfx = msr;
-       }
-       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_pkg_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_dram_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
-                       return -13;
-               p->energy_pkg = msr;
+       if (!platform->has_per_core_rapl) {
+               status = get_rapl_counters(cpu, p->package_id, c, p);
+               if (status != 0)
+                       return status;
        }
+
        if (DO_BIC(BIC_PkgTmp)) {
                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
                        return -17;
                p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
        }
 
-       if (DO_BIC(BIC_GFX_rc6))
-               p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
        /* n.b. assume die0 uncore frequency applies to whole package */
        if (DO_BIC(BIC_UNCORE_MHZ))
                p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
 
+       if (DO_BIC(BIC_GFX_rc6))
+               p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
        if (DO_BIC(BIC_GFXMHz))
-               p->gfx_mhz = gfx_cur_mhz;
+               p->gfx_mhz = gfx_info[GFX_MHz].val;
 
        if (DO_BIC(BIC_GFXACTMHz))
-               p->gfx_act_mhz = gfx_act_mhz;
+               p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+       if (DO_BIC(BIC_SAM_mc6))
+               p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+       if (DO_BIC(BIC_SAMMHz))
+               p->sam_mhz = gfx_info[SAM_MHz].val;
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
        unsigned long long msr;
        int *pkg_cstate_limits;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_irtl_msrs)
+       if (!platform->has_irtl_msrs || no_msr)
                return;
 
        if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
 {
        int i;
 
+       if (!fd_percpu)
+               return;
+
        for (i = 0; i < topo.max_cpu_num + 1; ++i) {
                if (fd_percpu[i] != 0)
                        close(fd_percpu[i]);
        }
 
        free(fd_percpu);
+       fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+       int i;
+
+       if (!fd_amperf_percpu)
+               return;
+
+       for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (fd_amperf_percpu[i].mperf != 0)
+                       close(fd_amperf_percpu[i].mperf);
+
+               if (fd_amperf_percpu[i].aperf != 0)
+                       close(fd_amperf_percpu[i].aperf);
+       }
+
+       free(fd_amperf_percpu);
+       fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+       if (!fd_instr_count_percpu)
+               return;
+
+       for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (fd_instr_count_percpu[i] != 0)
+                       close(fd_instr_count_percpu[i]);
+       }
+
+       free(fd_instr_count_percpu);
+       fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+       if (!rapl_counter_info_perdomain)
+               return;
+
+       const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+       for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+               if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+                       close(rapl_counter_info_perdomain[domain_id].fd_perf);
+       }
+
+       free(rapl_counter_info_perdomain);
 }
 
 void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
        outp = NULL;
 
        free_fd_percpu();
+       free_fd_instr_count_percpu();
+       free_fd_amperf_percpu();
+       free_fd_rapl_percpu();
 
        free(irq_column_2_cpu);
        free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
                err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
 }
 
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
 void re_initialize(void)
 {
        free_all_buffers();
        setup_all_buffers(false);
-       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+       linux_perf_init();
+       rapl_perf_init();
+       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+               topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
 }
 
 /*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
  *
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
  *
  * return 1 if config change requires a restart, else return 0
  */
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
 {
        FILE *fp;
        int retval;
 
-       fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
-       retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
-       if (retval != 1)
-               err(1, "GFX rc6");
-
-       fclose(fp);
-
-       return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
-       static FILE *fp;
-       int retval;
-
-       if (fp == NULL) {
-               fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
-               if (!fp)
-                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-       } else {
-               rewind(fp);
-               fflush(fp);
-       }
-
-       retval = fscanf(fp, "%d", &gfx_cur_mhz);
-       if (retval != 1)
-               err(1, "GFX MHz");
-
-       return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
-       static FILE *fp;
-       int retval;
-
-       if (fp == NULL) {
-               fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
-               if (!fp)
-                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
-       } else {
-               rewind(fp);
-               fflush(fp);
+       switch (idx) {
+       case GFX_rc6:
+       case SAM_mc6:
+               fp = fopen_or_die(gfx_info[idx].path, "r");
+               retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+               if (retval != 1)
+                       err(1, "rc6");
+               fclose(fp);
+               return 0;
+       case GFX_MHz:
+       case GFX_ACTMHz:
+       case SAM_MHz:
+       case SAM_ACTMHz:
+               if (gfx_info[idx].fp == NULL) {
+                       gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+               } else {
+                       rewind(gfx_info[idx].fp);
+                       fflush(gfx_info[idx].fp);
+               }
+               retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+               if (retval != 1)
+                       err(1, "MHz");
+               return 0;
+       default:
+               return -EINVAL;
        }
-
-       retval = fscanf(fp, "%d", &gfx_act_mhz);
-       if (retval != 1)
-               err(1, "GFX ACT MHz");
-
-       return 0;
 }
 
 /*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
                        return 1;
 
        if (DO_BIC(BIC_GFX_rc6))
-               snapshot_gfx_rc6_ms();
+               snapshot_graphics(GFX_rc6);
 
        if (DO_BIC(BIC_GFXMHz))
-               snapshot_gfx_mhz();
+               snapshot_graphics(GFX_MHz);
 
        if (DO_BIC(BIC_GFXACTMHz))
-               snapshot_gfx_act_mhz();
+               snapshot_graphics(GFX_ACTMHz);
+
+       if (DO_BIC(BIC_SAM_mc6))
+               snapshot_graphics(SAM_mc6);
+
+       if (DO_BIC(BIC_SAMMHz))
+               snapshot_graphics(SAM_MHz);
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               snapshot_graphics(SAM_ACTMHz);
 
        if (DO_BIC(BIC_CPU_LPI))
                snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
        int ret, idx;
        unsigned long long msr_cur, msr_last;
 
+       assert(!no_msr);
+
        if (!per_cpu_msr_sum)
                return 1;
 
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
        UNUSED(c);
        UNUSED(p);
 
+       assert(!no_msr);
+
        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
                unsigned long long msr_cur, msr_last;
                off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
 
 /*
  * set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
  */
 int set_my_sched_priority(int priority)
 {
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
        errno = 0;
        original_priority = getpriority(PRIO_PROCESS, 0);
        if (errno && (original_priority == -1))
-               err(errno, "getpriority");
+               return -21;
 
        retval = setpriority(PRIO_PROCESS, 0, priority);
        if (retval)
-               errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+               return -21;
 
        errno = 0;
        retval = getpriority(PRIO_PROCESS, 0);
        if (retval != priority)
-               err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+               return -21;
 
        return original_priority;
 }
@@ -4314,6 +5034,9 @@ void turbostat_loop()
 
        /*
         * elevate own priority for interval mode
+        *
+        * ignore on error - we probably don't have permission to set it, but
+        * it's not a big deal
         */
        set_my_sched_priority(-20);
 
@@ -4399,10 +5122,13 @@ void check_dev_msr()
        struct stat sb;
        char pathname[32];
 
+       if (no_msr)
+               return;
+
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (stat(pathname, &sb))
                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
-                       err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+                       no_msr = 1;
 }
 
 /*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
 {
        cap_t caps;
        cap_flag_value_t cap_flag_value;
+       int ret = 0;
 
        caps = cap_get_proc();
        if (caps == NULL)
-               err(-6, "cap_get_proc\n");
+               return 1;
 
-       if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
-               err(-6, "cap_get\n");
+       if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+               ret = 1;
+               goto free_and_exit;
+       }
 
        if (cap_flag_value != CAP_SET) {
-               warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
-               return 1;
+               ret = 1;
+               goto free_and_exit;
        }
 
+free_and_exit:
        if (cap_free(caps) == -1)
                err(-6, "cap_free\n");
 
-       return 0;
+       return ret;
 }
 
-void check_permissions(void)
+void check_msr_permission(void)
 {
-       int do_exit = 0;
+       int failed = 0;
        char pathname[32];
 
+       if (no_msr)
+               return;
+
        /* check for CAP_SYS_RAWIO */
-       do_exit += check_for_cap_sys_rawio();
+       failed += check_for_cap_sys_rawio();
 
        /* test file permissions */
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (euidaccess(pathname, R_OK)) {
-               do_exit++;
-               warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+               failed++;
        }
 
-       /* if all else fails, thell them to be root */
-       if (do_exit)
-               if (getuid() != 0)
-                       warnx("... or simply run as root");
-
-       if (do_exit)
-               exit(-6);
+       if (failed) {
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+               no_msr = 1;
+       }
 }
 
 void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
        unsigned long long msr;
        unsigned int base_ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
        if (!has_turbo)
                return;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
 static void probe_intel_uncore_frequency(void)
 {
        int i, j;
-       char path[128];
+       char path[256];
 
        if (!genuine_intel)
                return;
 
-       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
-               return;
-
-       /* Cluster level sysfs not supported yet. */
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
-               return;
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+               goto probe_cluster;
 
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
-               BIC_PRESENT(BIC_UNCORE_MHZ);
+       BIC_PRESENT(BIC_UNCORE_MHZ);
 
        if (quiet)
                return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
        for (i = 0; i < topo.num_packages; ++i) {
                for (j = 0; j < topo.num_die; ++j) {
                        int k, l;
+                       char path_base[128];
 
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
-                               i, j);
+                       sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+                               j);
+
+                       sprintf(path, "%s/min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+                       fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
 
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+                       fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+                       sprintf(path, "%s/current_freq_khz", path_base);
+                       k = read_sysfs_int(path);
+                       fprintf(outf, " %d MHz\n", k / 1000);
                }
        }
+       return;
+
+probe_cluster:
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+               return;
+
+       if (quiet)
+               return;
+
+       for (i = 0;; ++i) {
+               int k, l;
+               char path_base[128];
+               int package_id, domain_id, cluster_id;
+
+               sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+               if (access(path_base, R_OK))
+                       break;
+
+               sprintf(path, "%s/package_id", path_base);
+               package_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/domain_id", path_base);
+               domain_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/fabric_cluster_id", path_base);
+               cluster_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+                       cluster_id, k / 1000, l / 1000);
+
+               sprintf(path, "%s/initial_min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/initial_max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+               sprintf(path, "%s/current_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               fprintf(outf, " %d MHz\n", k / 1000);
+       }
 }
 
 static void probe_graphics(void)
 {
+       /* Xe graphics sysfs knobs */
+       if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+               FILE *fp;
+               char buf[8];
+               bool gt0_is_gt;
+               int idx;
+
+               fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+               if (!fp)
+                       goto next;
+
+               if (!fread(buf, sizeof(char), 7, fp)) {
+                       fclose(fp);
+                       goto next;
+               }
+               fclose(fp);
+
+               if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+                       gt0_is_gt = true;
+               else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+                       gt0_is_gt = false;
+               else
+                       goto next;
+
+               idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+               gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+               idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+               idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+               idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+               idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+               idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+               goto end;
+       }
+
+next:
+       /* New i915 graphics sysfs knobs */
+       if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+               gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+               if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+                       gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+                       gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+                       gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+                       gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+                       gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+               goto end;
+       }
+
+       /* Fall back to traditional i915 graphics sysfs knobs */
        if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
-               BIC_PRESENT(BIC_GFX_rc6);
+               gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+               gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+       else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+               gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
 
-       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
-           !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXMHz);
 
-       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
-           !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+               gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+       else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+               gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+       if (gfx_info[GFX_rc6].path)
+               BIC_PRESENT(BIC_GFX_rc6);
+       if (gfx_info[GFX_MHz].path)
+               BIC_PRESENT(BIC_GFXMHz);
+       if (gfx_info[GFX_ACTMHz].path)
                BIC_PRESENT(BIC_GFXACTMHz);
+       if (gfx_info[SAM_mc6].path)
+               BIC_PRESENT(BIC_SAM_mc6);
+       if (gfx_info[SAM_MHz].path)
+               BIC_PRESENT(BIC_SAMMHz);
+       if (gfx_info[SAM_ACTMHz].path)
+               BIC_PRESENT(BIC_SAMACTMHz);
 }
 
 static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!has_hwp)
                return 0;
 
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        cpu = t->cpu_id;
 
        /* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
        unsigned long long msr;
        unsigned int time_unit;
        double tdp;
+       const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+       const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
 
-       if (rapl_joules) {
-               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_Pkg_J);
-               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_Cor_J);
-               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_RAM_J);
-               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_GFX_J);
-       } else {
-               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_PkgWatt);
-               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_CorWatt);
-               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_RAMWatt);
-               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_GFXWatt);
-       }
+       if (rapl_joules)
+               bic_enabled &= ~bic_watt_bits;
+       else
+               bic_enabled &= ~bic_joules_bits;
 
-       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
-               BIC_PRESENT(BIC_PKG__);
-       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
-               BIC_PRESENT(BIC_RAM__);
+       if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+               bic_enabled &= ~BIC_PKG__;
+       if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+               bic_enabled &= ~BIC_RAM__;
 
        /* units on package 0, verify later other packages match */
        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
 {
        unsigned long long msr;
        double tdp;
+       const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+       const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
 
-       if (rapl_joules) {
-               BIC_PRESENT(BIC_Pkg_J);
-               BIC_PRESENT(BIC_Cor_J);
-       } else {
-               BIC_PRESENT(BIC_PkgWatt);
-               BIC_PRESENT(BIC_CorWatt);
-       }
+       if (rapl_joules)
+               bic_enabled &= ~bic_watt_bits;
+       else
+               bic_enabled &= ~bic_joules_bits;
 
        if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
                return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  */
 void probe_rapl(void)
 {
-       if (!platform->rapl_msrs)
+       if (!platform->rapl_msrs || no_msr)
                return;
 
        if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        }
 
        /* Temperature Target MSR is Nehalem and newer only */
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                goto guess;
 
        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!(do_dts || do_ptm))
                return 0;
 
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
                fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
                        base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!genuine_intel)
                return;
 
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_feature_control)
                return;
 
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_pwr_mgmt)
                return;
 
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_c6_demotion_policy_config)
                return;
 
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
 
        fd = open(path, O_RDONLY);
        if (fd < 0) {
-               warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+               if (debug)
+                       warnx("Read %s failed", path);
                return;
        }
 
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
        close(fd);
 }
 
+static int has_instr_count_access(void)
+{
+       int fd;
+       int has_access;
+
+       if (no_perf)
+               return 0;
+
+       fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+       has_access = fd != -1;
+
+       if (fd != -1)
+               close(fd);
+
+       if (!has_access)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "instructions retired perf counter", "--no-perf");
+
+       return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+       return BIC_IS_ENABLED(BIC_Avg_MHz)
+           || BIC_IS_ENABLED(BIC_Busy)
+           || BIC_IS_ENABLED(BIC_Bzy_MHz)
+           || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+                          double *scale_, enum rapl_unit *unit_)
+{
+       if (no_perf)
+               return -1;
+
+       const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+       if (scale == 0.0)
+               return -1;
+
+       const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+       if (unit == RAPL_UNIT_INVALID)
+               return -1;
+
+       const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+       const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+       const int fd_counter =
+           open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+       if (fd_counter == -1)
+               return -1;
+
+       /* If it's the first counter opened, make it a group descriptor */
+       if (rci->fd_perf == -1)
+               rci->fd_perf = fd_counter;
+
+       *scale_ = scale;
+       *unit_ = unit;
+       return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+                         double *scale, enum rapl_unit *unit)
+{
+       int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+       if (debug)
+               fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+       return ret;
+}
+
 /*
  * Linux-perf manages the HW instructions-retired counter
  * by enabling when requested, and hiding rollover
  */
 void linux_perf_init(void)
 {
-       if (!BIC_IS_ENABLED(BIC_IPC))
-               return;
-
        if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
                return;
 
-       fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
-       if (fd_instr_count_percpu == NULL)
-               err(-1, "calloc fd_instr_count_percpu");
+       if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+               fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+               if (fd_instr_count_percpu == NULL)
+                       err(-1, "calloc fd_instr_count_percpu");
+       }
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+               fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+               if (fd_amperf_percpu == NULL)
+                       err(-1, "calloc fd_amperf_percpu");
+       }
+}
+
+void rapl_perf_init(void)
+{
+       const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+       bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+       rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+       if (rapl_counter_info_perdomain == NULL)
+               err(-1, "calloc rapl_counter_info_percpu");
+
+       /*
+        * Initialize rapl_counter_info_percpu
+        */
+       for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+               struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+               rci->fd_perf = -1;
+               for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+                       rci->data[i] = 0;
+                       rci->source[i] = RAPL_SOURCE_NONE;
+               }
+       }
+
+       /*
+        * Open/probe the counters
+        * If can't get it via perf, fallback to MSR
+        */
+       for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+               const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+               bool has_counter = 0;
+               double scale;
+               enum rapl_unit unit;
+               int next_domain;
+
+               memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+               for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+                       if (cpu_is_not_allowed(cpu))
+                               continue;
+
+                       /* Skip already seen and handled RAPL domains */
+                       next_domain =
+                           platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+                       if (domain_visited[next_domain])
+                               continue;
+
+                       domain_visited[next_domain] = 1;
+
+                       struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+                       /* Check if the counter is enabled and accessible */
+                       if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+                               /* Use perf API for this counter */
+                               if (!no_perf && cai->perf_name
+                                   && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+                                       rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+                                       rci->scale[cai->rci_index] = scale * cai->compat_scale;
+                                       rci->unit[cai->rci_index] = unit;
+                                       rci->flags[cai->rci_index] = cai->flags;
+
+                                       /* Use MSR for this counter */
+                               } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+                                       rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+                                       rci->msr[cai->rci_index] = cai->msr;
+                                       rci->msr_mask[cai->rci_index] = cai->msr_mask;
+                                       rci->msr_shift[cai->rci_index] = cai->msr_shift;
+                                       rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+                                       rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+                                       rci->flags[cai->rci_index] = cai->flags;
+                               }
+                       }
+
+                       if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+                               has_counter = 1;
+               }
+
+               /* If any CPU has access to the counter, make it present */
+               if (has_counter)
+                       BIC_PRESENT(cai->bic);
+       }
+
+       free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+       if (no_msr)
+               return 0;
+
+       if (probe_msr(base_cpu, MSR_IA32_APERF))
+               return 0;
+
+       if (probe_msr(base_cpu, MSR_IA32_MPERF))
+               return 0;
+
+       return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+       struct amperf_group_fd fds;
+
+       /*
+        * Cache the last result, so we don't warn the user multiple times
+        *
+        * Negative means cached, no access
+        * Zero means not cached
+        * Positive means cached, has access
+        */
+       static int has_access_cached;
+
+       if (no_perf)
+               return 0;
+
+       if (has_access_cached != 0)
+               return has_access_cached > 0;
+
+       fds = open_amperf_fd(base_cpu);
+       has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+       if (fds.aperf == -1)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "APERF perf counter", "--no-perf");
+       else
+               close(fds.aperf);
+
+       if (fds.mperf == -1)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "MPERF perf counter", "--no-perf");
+       else
+               close(fds.mperf);
+
+       if (has_access_cached == 0)
+               has_access_cached = -1;
+
+       return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+       if (!is_aperf_access_required())
+               return 0;
+
+       if (!no_msr && has_amperf_access_via_msr())
+               return 1;
+
+       if (!no_perf && has_amperf_access_via_perf())
+               return 1;
 
-       BIC_PRESENT(BIC_IPC);
+       return 0;
 }
 
 void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
        if (platform->has_msr_module_c6_res_ms)
                BIC_PRESENT(BIC_Mod_c6);
 
-       if (platform->has_ext_cst_msrs) {
+       if (platform->has_ext_cst_msrs && !no_msr) {
                BIC_PRESENT(BIC_Totl_c0);
                BIC_PRESENT(BIC_Any_c0);
                BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
        unsigned int eax, ebx, ecx, edx;
        unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
        unsigned long long ucode_patch = 0;
+       bool ucode_patch_valid = false;
 
        eax = ebx = ecx = edx = 0;
 
@@ -5650,8 +6762,12 @@ void process_cpuid()
        ecx_flags = ecx;
        edx_flags = edx;
 
-       if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
-               warnx("get_msr(UCODE)");
+       if (!no_msr) {
+               if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+                       warnx("get_msr(UCODE)");
+               else
+                       ucode_patch_valid = true;
+       }
 
        /*
         * check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
        __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
 
        if (!quiet) {
-               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
-                       family, model, stepping, family, model, stepping,
-                       (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+                       family, model, stepping, family, model, stepping);
+               if (ucode_patch_valid)
+                       fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fputc('\n', outf);
+
                fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
 
        __cpuid(0x6, eax, ebx, ecx, edx);
        has_aperf = ecx & (1 << 0);
-       if (has_aperf) {
+       if (has_aperf && has_amperf_access()) {
                BIC_PRESENT(BIC_Avg_MHz);
                BIC_PRESENT(BIC_Busy);
                BIC_PRESENT(BIC_Bzy_MHz);
+               BIC_PRESENT(BIC_IPC);
        }
        do_dts = eax & (1 << 0);
        if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
                base_mhz = max_mhz = bus_mhz = edx = 0;
 
                __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+               bclk = bus_mhz;
+
+               base_hz = base_mhz * 1000000;
+               has_base_hz = 1;
+
+               if (platform->enable_tsc_tweak)
+                       tsc_tweak = base_hz / tsc_hz;
+
                if (!quiet)
                        fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
                                base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
 
        probe_thermal();
 
-       if (platform->has_nhm_msrs)
+       if (platform->has_nhm_msrs && !no_msr)
                BIC_PRESENT(BIC_SMI);
 
        if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
        topo.allowed_packages = 0;
        for_all_cpus(update_topo, ODD_COUNTERS);
 }
+
 void setup_all_buffers(bool startup)
 {
        topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
        err(-ENODEV, "No valid cpus found");
 }
 
+static void set_amperf_source(void)
+{
+       amperf_source = AMPERF_SOURCE_PERF;
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+               amperf_source = AMPERF_SOURCE_MSR;
+
+       if (quiet || !debug)
+               return;
+
+       fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+       /*
+        * It only makes sense to call this after the command line is parsed,
+        * otherwise sys structure is not populated.
+        */
+
+       return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+       if (no_msr)
+               return false;
+
+       if (has_added_counters())
+               return true;
+
+       return BIC_IS_ENABLED(BIC_SMI)
+           || BIC_IS_ENABLED(BIC_CPU_c1)
+           || BIC_IS_ENABLED(BIC_CPU_c3)
+           || BIC_IS_ENABLED(BIC_CPU_c6)
+           || BIC_IS_ENABLED(BIC_CPU_c7)
+           || BIC_IS_ENABLED(BIC_Mod_c6)
+           || BIC_IS_ENABLED(BIC_CoreTmp)
+           || BIC_IS_ENABLED(BIC_Totl_c0)
+           || BIC_IS_ENABLED(BIC_Any_c0)
+           || BIC_IS_ENABLED(BIC_GFX_c0)
+           || BIC_IS_ENABLED(BIC_CPUGFX)
+           || BIC_IS_ENABLED(BIC_Pkgpc3)
+           || BIC_IS_ENABLED(BIC_Pkgpc6)
+           || BIC_IS_ENABLED(BIC_Pkgpc2)
+           || BIC_IS_ENABLED(BIC_Pkgpc7)
+           || BIC_IS_ENABLED(BIC_Pkgpc8)
+           || BIC_IS_ENABLED(BIC_Pkgpc9)
+           || BIC_IS_ENABLED(BIC_Pkgpc10)
+           /* TODO: Multiplex access with perf */
+           || BIC_IS_ENABLED(BIC_CorWatt)
+           || BIC_IS_ENABLED(BIC_Cor_J)
+           || BIC_IS_ENABLED(BIC_PkgWatt)
+           || BIC_IS_ENABLED(BIC_CorWatt)
+           || BIC_IS_ENABLED(BIC_GFXWatt)
+           || BIC_IS_ENABLED(BIC_RAMWatt)
+           || BIC_IS_ENABLED(BIC_Pkg_J)
+           || BIC_IS_ENABLED(BIC_Cor_J)
+           || BIC_IS_ENABLED(BIC_GFX_J)
+           || BIC_IS_ENABLED(BIC_RAM_J)
+           || BIC_IS_ENABLED(BIC_PKG__)
+           || BIC_IS_ENABLED(BIC_RAM__)
+           || BIC_IS_ENABLED(BIC_PkgTmp)
+           || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+       if (!is_msr_access_required())
+               no_msr = 1;
+
+       check_dev_msr();
+       check_msr_permission();
+
+       if (no_msr)
+               bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+       const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+       if (no_perf || !intrcount_required || !has_instr_count_access())
+               bic_enabled &= ~BIC_IPC;
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (!aperf_required || !has_amperf_access()) {
+               bic_enabled &= ~BIC_Avg_MHz;
+               bic_enabled &= ~BIC_Busy;
+               bic_enabled &= ~BIC_Bzy_MHz;
+               bic_enabled &= ~BIC_IPC;
+       }
+}
+
 void turbostat_init()
 {
        setup_all_buffers(true);
        set_base_cpu();
-       check_dev_msr();
-       check_permissions();
+       check_msr_access();
+       check_perf_access();
        process_cpuid();
        probe_pm_features();
+       set_amperf_source();
        linux_perf_init();
+       rapl_perf_init();
 
        for_all_cpus(get_cpu_type, ODD_COUNTERS);
        for_all_cpus(get_cpu_type, EVEN_COUNTERS);
 
        if (DO_BIC(BIC_IPC))
                (void)get_instr_count_fd(base_cpu);
+
+       /*
+        * If TSC tweak is needed, but couldn't get it,
+        * disable more BICs, since it can't be reported accurately.
+        */
+       if (platform->enable_tsc_tweak && !has_base_hz) {
+               bic_enabled &= ~BIC_Busy;
+               bic_enabled &= ~BIC_Bzy_MHz;
+       }
 }
 
 int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-       fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+       fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 {
        struct msr_counter *msrp;
 
+       if (no_msr && msr_num)
+               errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
        msrp = calloc(1, sizeof(struct msr_counter));
        if (msrp == NULL) {
                perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
                { "list", no_argument, 0, 'l' },
                { "out", required_argument, 0, 'o' },
                { "quiet", no_argument, 0, 'q' },
+               { "no-msr", no_argument, 0, 'M' },
+               { "no-perf", no_argument, 0, 'P' },
                { "show", required_argument, 0, 's' },
                { "Summary", no_argument, 0, 'S' },
                { "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
 
        progname = argv[0];
 
-       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+       /*
+        * Parse some options early, because they may make other options invalid,
+        * like adding the MSR counter with --add and at the same time using --no-msr.
+        */
+       while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+               switch (opt) {
+               case 'M':
+                       no_msr = 1;
+                       break;
+               case 'P':
+                       no_perf = 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+       optind = 0;
+
+       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
                switch (opt) {
                case 'a':
                        parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
                case 'q':
                        quiet = 1;
                        break;
+               case 'M':
+               case 'P':
+                       /* Parsed earlier */
+                       break;
                case 'n':
                        num_iterations = strtod(optarg, NULL);
 
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
        }
 }
 
+void set_rlimit(void)
+{
+       struct rlimit limit;
+
+       if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to get rlimit");
+
+       if (limit.rlim_max < MAX_NOFILE)
+               limit.rlim_max = MAX_NOFILE;
+       if (limit.rlim_cur < MAX_NOFILE)
+               limit.rlim_cur = MAX_NOFILE;
+
+       if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to set rlimit");
+}
+
 int main(int argc, char **argv)
 {
        int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
 
        probe_sysfs();
 
+       if (!getuid())
+               set_rlimit();
+
        turbostat_init();
 
-       msr_sum_record();
+       if (!no_msr)
+               msr_sum_record();
 
        /* dump counters and exit */
        if (dump_only)
index 908e0d0839369c2e41f090bddc2e9a9b9121b4c9..61c69297e7978fceed700be3ad43a7a870d20de2 100644 (file)
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
 {
        dpa_perf->qos_class = FAKE_QTG_ID;
        dpa_perf->dpa_range = *range;
-       dpa_perf->coord.read_latency = 500;
-       dpa_perf->coord.write_latency = 500;
-       dpa_perf->coord.read_bandwidth = 1000;
-       dpa_perf->coord.write_bandwidth = 1000;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               dpa_perf->coord[i].read_latency = 500;
+               dpa_perf->coord[i].write_latency = 500;
+               dpa_perf->coord[i].read_bandwidth = 1000;
+               dpa_perf->coord[i].write_bandwidth = 1000;
+       }
 }
 
 static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
index b1ede624986676a554514105936698fdd2b0a915..b7c8f29c09a978895c1176e1a39aeda8c97e8416 100644 (file)
@@ -18,7 +18,7 @@ echo 'sched:*' > set_event
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
     fail "at least fork, exec and exit events should be recorded"
 fi
@@ -29,7 +29,7 @@ echo 1 > events/sched/enable
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
     fail "at least fork, exec and exit events should be recorded"
 fi
@@ -40,7 +40,7 @@ echo 0 > events/sched/enable
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -ne 0 ]; then
     fail "any of scheduler events should not be recorded"
 fi
index 541bf192e30e6bcec377908643d41d1d1dbf765a..14bbab0cce13521abbcae9bbd3772a567239c77f 100644 (file)
@@ -51,6 +51,7 @@
 #include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
+#include <sys/utsname.h>
 #endif
 
 #ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
 #define KSFT_XPASS 3
 #define KSFT_SKIP  4
 
+#ifndef __noreturn
+#define __noreturn       __attribute__((__noreturn__))
+#endif
 #define __printf(a, b)   __attribute__((format(printf, a, b)))
 
 /* counters */
@@ -288,24 +292,26 @@ void ksft_test_result_code(int exit_code, const char *test_name,
        }
 
        /* Docs seem to call for double space if directive is absent */
-       if (!directive[0] && msg[0])
+       if (!directive[0] && msg)
                directive = " #  ";
 
-       va_start(args, msg);
        printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
        errno = saved_errno;
-       vprintf(msg, args);
+       if (msg) {
+               va_start(args, msg);
+               vprintf(msg, args);
+               va_end(args);
+       }
        printf("\n");
-       va_end(args);
 }
 
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
 {
        ksft_print_cnts();
        exit(KSFT_PASS);
 }
 
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
 {
        ksft_print_cnts();
        exit(KSFT_FAIL);
@@ -332,7 +338,7 @@ static inline int ksft_exit_fail(void)
                  ksft_cnt.ksft_xfail + \
                  ksft_cnt.ksft_xskip)
 
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
 {
        int saved_errno = errno;
        va_list args;
@@ -347,19 +353,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
        exit(KSFT_FAIL);
 }
 
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
 {
        ksft_print_cnts();
        exit(KSFT_XFAIL);
 }
 
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
 {
        ksft_print_cnts();
        exit(KSFT_XPASS);
 }
 
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
 {
        int saved_errno = errno;
        va_list args;
@@ -388,4 +394,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
        exit(KSFT_SKIP);
 }
 
+static inline int ksft_min_kernel_version(unsigned int min_major,
+                                         unsigned int min_minor)
+{
+#ifdef NOLIBC
+       ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+       return 0;
+#else
+       unsigned int major, minor;
+       struct utsname info;
+
+       if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+               ksft_exit_fail_msg("Can't parse kernel version\n");
+
+       return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
 #endif /* __KSELFTEST_H */
index 4fd735e48ee7eea99702fcb3e27f539c887b15bc..ba3ddeda24bf527295acaf32159097c03ac52153 100644 (file)
                FIXTURE_DATA(fixture_name) self; \
                pid_t child = 1; \
                int status = 0; \
+               bool jmp = false; \
                memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
                if (setjmp(_metadata->env) == 0) { \
                        /* Use the same _metadata. */ \
                                _metadata->exit_code = KSFT_FAIL; \
                        } \
                } \
+               else \
+                       jmp = true; \
                if (child == 0) { \
-                       if (_metadata->setup_completed && !_metadata->teardown_parent) \
+                       if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
                                fixture_name##_teardown(_metadata, &self, variant->data); \
                        _exit(0); \
                } \
@@ -1202,7 +1205,7 @@ void __run_test(struct __fixture_metadata *f,
                diagnostic = "unknown";
 
        ksft_test_result_code(t->exit_code, test_name,
-                             diagnostic ? "%s" : "", diagnostic);
+                             diagnostic ? "%s" : NULL, diagnostic);
 }
 
 static int test_harness_run(int argc, char **argv)
index 2fb6dd8adba6945d0000c19fe90da1002d8c0dd4..8b984fa042869e595507368541504f0b04d42014 100644 (file)
@@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
 
        pos = strchr(line, ' ') + 1;
 
-       if (fscanf(fnetstat, type->header_name) == EOF)
+       if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF)
                test_error("fscanf(%s)", type->header_name);
        if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
                test_error("Unexpected netstat format (%c)", tmp);
index 92276f916f2f30d080ba3e1f5521c492192f8e98..e408b9243b2c5a5cf66785518fbfc16f2682b169 100644 (file)
@@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
 void __test_msg(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_print_msg(buf);
+       ksft_print_msg("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_ok(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_pass(buf);
+       ksft_test_result_pass("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_fail(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_fail(buf);
+       ksft_test_result_fail("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_xfail(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_xfail(buf);
+       ksft_test_result_xfail("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_error(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_error(buf);
+       ksft_test_result_error("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_skip(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_skip(buf);
+       ksft_test_result_skip("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 
index 7df8b8700e39e96292f8eafdf105ee0314a65497..a2fe88d35ac06e4f534bd4d452670528d9f77219 100644 (file)
@@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
 
 static void test_client_active_rst(unsigned int port)
 {
-       /* one in queue, another accept()ed */
-       unsigned int wait_for = backlog + 2;
        int i, sk[3], err;
        bool is_writable[ARRAY_SIZE(sk)] = {false};
        unsigned int last = ARRAY_SIZE(sk) - 1;
@@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port)
        for (i = 0; i < last; i++) {
                err = _test_connect_socket(sk[i], this_ip_dest, port,
                                               (i == 0) ? TEST_TIMEOUT_SEC : -1);
-
                if (err < 0)
                        test_error("failed to connect()");
        }
 
-       synchronize_threads(); /* 2: connection accept()ed, another queued */
-       err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+       synchronize_threads(); /* 2: two connections: one accept()ed, another queued */
+       err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC);
        if (err < 0)
                test_error("test_wait_fds(): %d", err);
 
+       /* async connect() with third sk to get into request_sock_queue */
+       err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+       if (err < 0)
+               test_error("failed to connect()");
+
        synchronize_threads(); /* 3: close listen socket */
        if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
                test_fail("Failed to send data on connected socket");
@@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port)
                test_ok("Verified established tcp connection");
 
        synchronize_threads(); /* 4: finishing up */
-       err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
-       if (err < 0)
-               test_error("failed to connect()");
 
        synchronize_threads(); /* 5: closed active sk */
-       err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
-                           wait_for, TEST_TIMEOUT_SEC);
+       /*
+        * Wait for 2 connections: one accepted, another in the accept queue,
+        * the one in request_sock_queue won't get fully established, so
+        * doesn't receive an active RST, see inet_csk_listen_stop().
+        */
+       err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC);
        if (err < 0)
                test_error("select(): %d", err);
 
index 452de131fa3a9c720cd1fc4b9dc24438fd01d15d..517930f9721bd9b062d178def9fb296c17353119 100644 (file)
@@ -21,7 +21,7 @@ static void make_listen(int sk)
 static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
                                const char *tst)
 {
-       struct tcp_ao_info_opt tmp;
+       struct tcp_ao_info_opt tmp = {};
        socklen_t len = sizeof(tmp);
 
        if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
index 1d975bf52af33908593f61894233f9d8560cb16f..85b3baa3f7f34112ea95239c8819a2b1d834e22a 100644 (file)
@@ -34,7 +34,7 @@
 #endif
 
 #ifndef UDP_MAX_SEGMENTS
-#define UDP_MAX_SEGMENTS       (1 << 6UL)
+#define UDP_MAX_SEGMENTS       (1 << 7UL)
 #endif
 
 #define CONST_MTU_TEST 1500
index d49dd3ffd0d96abeaa38cd92f3040ef747726541..c001dd79179d5d28e51d69cbad4c7e9a6a026053 100644 (file)
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
        diff = end.tv_usec - start.tv_usec;
        diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
 
-       if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+       if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
                printf("Diff too high: %lld..", diff);
                return -1;
        }
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
        return 0;
 }
 
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
 
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
 {
-       while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
-       return NULL;
+       if (pthread_self() != ctd_thread)
+               ctd_failed = 1;
+       ctd_count--;
 }
 
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
 {
-       if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
-               __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
-       int err, i;
-       timer_t id;
-       const int nthreads = 10;
-       pthread_t threads[nthreads];
        struct itimerspec val = {
                .it_value.tv_sec = 0,
                .it_value.tv_nsec = 1000 * 1000,
                .it_interval.tv_sec = 0,
                .it_interval.tv_nsec = 1000 * 1000,
        };
+       timer_t id;
 
-       remain = nthreads + 1;  /* worker threads + this thread */
-       signal(SIGALRM, distribution_handler);
-       err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
-       if (err < 0) {
-               ksft_perror("Can't create timer");
-               return -1;
-       }
-       err = timer_settime(id, 0, &val, NULL);
-       if (err < 0) {
-               ksft_perror("Can't set timer");
-               return -1;
-       }
+       /* 1/10 seconds to ensure the leader sleeps */
+       usleep(10000);
 
-       for (i = 0; i < nthreads; i++) {
-               err = pthread_create(&threads[i], NULL, distribution_thread,
-                                    NULL);
-               if (err) {
-                       ksft_print_msg("Can't create thread: %s (%d)\n",
-                                      strerror(errno), errno);
-                       return -1;
-               }
-       }
+       ctd_count = 100;
+       if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+               return "Can't create timer\n";
+       if (timer_settime(id, 0, &val, NULL))
+               return "Can't set timer\n";
 
-       /* Wait for all threads to receive the signal. */
-       while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+       while (ctd_count > 0 && !ctd_failed)
+               ;
 
-       for (i = 0; i < nthreads; i++) {
-               err = pthread_join(threads[i], NULL);
-               if (err) {
-                       ksft_print_msg("Can't join thread: %s (%d)\n",
-                                      strerror(errno), errno);
-                       return -1;
-               }
-       }
+       if (timer_delete(id))
+               return "Can't delete timer\n";
 
-       if (timer_delete(id)) {
-               ksft_perror("Can't delete timer");
-               return -1;
-       }
+       return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+       const char *errmsg;
 
-       ksft_test_result_pass("check_timer_distribution\n");
+       signal(SIGALRM, ctd_sighandler);
+
+       errmsg = "Can't create thread\n";
+       if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+               goto err;
+
+       errmsg = "Can't join thread\n";
+       if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+               goto err;
+
+       if (!ctd_failed)
+               ksft_test_result_pass("check signal distribution\n");
+       else if (ksft_min_kernel_version(6, 3))
+               ksft_test_result_fail("check signal distribution\n");
+       else
+               ksft_test_result_skip("check signal distribution (old kernel)\n");
        return 0;
+err:
+       ksft_print_msg("%s", errmsg);
+       return -1;
 }
 
 int main(int argc, char **argv)
index 48b9a803235a80413f0d94d9eb841d9f045779e8..d13ebde203221ae3fa81835fae684c8e180cf111 100644 (file)
@@ -21,9 +21,6 @@
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *   GNU General Public License for more details.
  */
-
-
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
 #define NUM_FREQ_OUTOFRANGE 4
 #define NUM_FREQ_INVALID 2
 
+#define SHIFTED_PPM (1 << 16)
+
 long valid_freq[NUM_FREQ_VALID] = {
-       -499<<16,
-       -450<<16,
-       -400<<16,
-       -350<<16,
-       -300<<16,
-       -250<<16,
-       -200<<16,
-       -150<<16,
-       -100<<16,
-       -75<<16,
-       -50<<16,
-       -25<<16,
-       -10<<16,
-       -5<<16,
-       -1<<16,
+        -499 * SHIFTED_PPM,
+        -450 * SHIFTED_PPM,
+        -400 * SHIFTED_PPM,
+        -350 * SHIFTED_PPM,
+        -300 * SHIFTED_PPM,
+        -250 * SHIFTED_PPM,
+        -200 * SHIFTED_PPM,
+        -150 * SHIFTED_PPM,
+        -100 * SHIFTED_PPM,
+         -75 * SHIFTED_PPM,
+         -50 * SHIFTED_PPM,
+         -25 * SHIFTED_PPM,
+         -10 * SHIFTED_PPM,
+          -5 * SHIFTED_PPM,
+          -1 * SHIFTED_PPM,
        -1000,
-       1<<16,
-       5<<16,
-       10<<16,
-       25<<16,
-       50<<16,
-       75<<16,
-       100<<16,
-       150<<16,
-       200<<16,
-       250<<16,
-       300<<16,
-       350<<16,
-       400<<16,
-       450<<16,
-       499<<16,
+           1 * SHIFTED_PPM,
+           5 * SHIFTED_PPM,
+          10 * SHIFTED_PPM,
+          25 * SHIFTED_PPM,
+          50 * SHIFTED_PPM,
+          75 * SHIFTED_PPM,
+         100 * SHIFTED_PPM,
+         150 * SHIFTED_PPM,
+         200 * SHIFTED_PPM,
+         250 * SHIFTED_PPM,
+         300 * SHIFTED_PPM,
+         350 * SHIFTED_PPM,
+         400 * SHIFTED_PPM,
+         450 * SHIFTED_PPM,
+         499 * SHIFTED_PPM,
 };
 
 long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
-       -1000<<16,
-       -550<<16,
-       550<<16,
-       1000<<16,
+       -1000 * SHIFTED_PPM,
+        -550 * SHIFTED_PPM,
+         550 * SHIFTED_PPM,
+        1000 * SHIFTED_PPM,
 };
 
 #define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755 (executable)
index 0000000..d9b0420
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+       print('Could not find turbostat binary')
+       exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+       print('Could not find timeout binary')
+       exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+       print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+       exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+       print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+       exit(1)
+print('OK')